src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

   1 /*
   2  * Copyright 2017 Red Hat Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * Authors: Karol Herbst <kherbst@redhat.com>
  23  */
  24
  25 #include "compiler/nir/nir.h"
  26
  27 #include "util/u_debug.h"
  28
  29 #include "codegen/nv50_ir.h"
  30 #include "codegen/nv50_ir_from_common.h"
  31 #include "codegen/nv50_ir_lowering_helper.h"
  32 #include "codegen/nv50_ir_util.h"
  33 #include "tgsi/tgsi_from_mesa.h"
  34
  35 #if __cplusplus >= 201103L
  36 #include <unordered_map>
  37 #else
  38 #include <tr1/unordered_map>
  39 #endif
  40 #include <cstring>
  41 #include <list>
  42 #include <vector>
  43
  44 namespace {
  45
  46 #if __cplusplus >= 201103L
  47 using std::hash;
  48 using std::unordered_map;
  49 #else
  50 using std::tr1::hash;
  51 using std::tr1::unordered_map;
  52 #endif
  53
  54 using namespace nv50_ir;
  55
  56 int
  57 type_size(const struct glsl_type *type, bool bindless)
  58 {
  59    return glsl_count_attribute_slots(type, false);
  60 }
  61
  62 class Converter : public ConverterCommon
  63 {
  64 public:
  65    Converter(Program *, nir_shader *, nv50_ir_prog_info *);
  66
  67    bool run();
  68 private:
  69    typedef std::vector<LValue*> LValues;
  70    typedef unordered_map<unsigned, LValues> NirDefMap;
  71    typedef unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
  72    typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
  73    typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
  74
  75    CacheMode convert(enum gl_access_qualifier);
  76    TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
  77    LValues& convert(nir_alu_dest *);
  78    BasicBlock* convert(nir_block *);
  79    LValues& convert(nir_dest *);
  80    SVSemantic convert(nir_intrinsic_op);
  81    Value* convert(nir_load_const_instr*, uint8_t);
  82    LValues& convert(nir_register *);
  83    LValues& convert(nir_ssa_def *);
  84
  85    Value* getSrc(nir_alu_src *, uint8_t component = 0);
  86    Value* getSrc(nir_register *, uint8_t);
  87    Value* getSrc(nir_src *, uint8_t, bool indirect = false);
  88    Value* getSrc(nir_ssa_def *, uint8_t);
  89
  90    // returned value is the constant part of the given source (either the
  91    // nir_src or the selected source component of an intrinsic). Even though
  92    // this is mostly an optimization to be able to skip indirects in a few
  93    // cases, sometimes we require immediate values or set some fileds on
  94    // instructions (e.g. tex) in order for codegen to consume those.
  95    // If the found value has not a constant part, the Value gets returned
  96    // through the Value parameter.
  97    uint32_t getIndirect(nir_src *, uint8_t, Value *&);
  98    // isScalar indicates that the addressing is scalar, vec4 addressing is
  99    // assumed otherwise
 100    uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&,
 101                         bool isScalar = false);
 102
 103    uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
 104
 105    void setInterpolate(nv50_ir_varying *,
 106                        uint8_t,
 107                        bool centroid,
 108                        unsigned semantics);
 109
 110    Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
 111                          uint8_t c, Value *indirect0 = NULL,
 112                          Value *indirect1 = NULL, bool patch = false);
 113    void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
 114                 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
 115                 Value *indirect1 = NULL);
 116
 117    bool isFloatType(nir_alu_type);
 118    bool isSignedType(nir_alu_type);
 119    bool isResultFloat(nir_op);
 120    bool isResultSigned(nir_op);
 121
 122    DataType getDType(nir_alu_instr *);
 123    DataType getDType(nir_intrinsic_instr *);
 124    DataType getDType(nir_intrinsic_instr *, bool isSigned);
 125    DataType getDType(nir_op, uint8_t);
 126
 127    std::vector<DataType> getSTypes(nir_alu_instr *);
 128    DataType getSType(nir_src &, bool isFloat, bool isSigned);
 129
 130    operation getOperation(nir_intrinsic_op);
 131    operation getOperation(nir_op);
 132    operation getOperation(nir_texop);
 133    operation preOperationNeeded(nir_op);
 134
 135    int getSubOp(nir_intrinsic_op);
 136    int getSubOp(nir_op);
 137
 138    CondCode getCondCode(nir_op);
 139
 140    bool assignSlots();
 141    bool parseNIR();
 142
 143    bool visit(nir_alu_instr *);
 144    bool visit(nir_block *);
 145    bool visit(nir_cf_node *);
 146    bool visit(nir_function *);
 147    bool visit(nir_if *);
 148    bool visit(nir_instr *);
 149    bool visit(nir_intrinsic_instr *);
 150    bool visit(nir_jump_instr *);
 151    bool visit(nir_load_const_instr*);
 152    bool visit(nir_loop *);
 153    bool visit(nir_ssa_undef_instr *);
 154    bool visit(nir_tex_instr *);
 155
 156    // tex stuff
 157    Value* applyProjection(Value *src, Value *proj);
 158    unsigned int getNIRArgCount(TexInstruction::Target&);
 159
 160    nir_shader *nir;
 161
 162    NirDefMap ssaDefs;
 163    NirDefMap regDefs;
 164    ImmediateMap immediates;
 165    NirArrayLMemOffsets regToLmemOffset;
 166    NirBlockMap blocks;
 167    unsigned int curLoopDepth;
 168
 169    BasicBlock *exit;
 170    Value *zero;
 171    Instruction *immInsertPos;
 172
 173    int clipVertexOutput;
 174
 175    union {
 176       struct {
 177          Value *position;
 178       } fp;
 179    };
 180 };
 181
 182 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
 183    : ConverterCommon(prog, info),
 184      nir(nir),
 185      curLoopDepth(0),
 186      clipVertexOutput(-1)
 187 {
 188    zero = mkImm((uint32_t)0);
 189 }
 190
 191 BasicBlock *
 192 Converter::convert(nir_block *block)
 193 {
 194    NirBlockMap::iterator it = blocks.find(block->index);
 195    if (it != blocks.end())
 196       return it->second;
 197
 198    BasicBlock *bb = new BasicBlock(func);
 199    blocks[block->index] = bb;
 200    return bb;
 201 }
 202
 203 bool
 204 Converter::isFloatType(nir_alu_type type)
 205 {
 206    return nir_alu_type_get_base_type(type) == nir_type_float;
 207 }
 208
 209 bool
 210 Converter::isSignedType(nir_alu_type type)
 211 {
 212    return nir_alu_type_get_base_type(type) == nir_type_int;
 213 }
 214
 215 bool
 216 Converter::isResultFloat(nir_op op)
 217 {
 218    const nir_op_info &info = nir_op_infos[op];
 219    if (info.output_type != nir_type_invalid)
 220       return isFloatType(info.output_type);
 221
 222    ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
 223    assert(false);
 224    return true;
 225 }
 226
 227 bool
 228 Converter::isResultSigned(nir_op op)
 229 {
 230    switch (op) {
 231    // there is no umul and we get wrong results if we treat all muls as signed
 232    case nir_op_imul:
 233    case nir_op_inot:
 234       return false;
 235    default:
 236       const nir_op_info &info = nir_op_infos[op];
 237       if (info.output_type != nir_type_invalid)
 238          return isSignedType(info.output_type);
 239       ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
 240       assert(false);
 241       return true;
 242    }
 243 }
 244
 245 DataType
 246 Converter::getDType(nir_alu_instr *insn)
 247 {
 248    if (insn->dest.dest.is_ssa)
 249       return getDType(insn->op, insn->dest.dest.ssa.bit_size);
 250    else
 251       return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
 252 }
 253
 254 DataType
 255 Converter::getDType(nir_intrinsic_instr *insn)
 256 {
 257    bool isSigned;
 258    switch (insn->intrinsic) {
 259    case nir_intrinsic_shared_atomic_imax:
 260    case nir_intrinsic_shared_atomic_imin:
 261    case nir_intrinsic_ssbo_atomic_imax:
 262    case nir_intrinsic_ssbo_atomic_imin:
 263       isSigned = true;
 264       break;
 265    default:
 266       isSigned = false;
 267       break;
 268    }
 269
 270    return getDType(insn, isSigned);
 271 }
 272
 273 DataType
 274 Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
 275 {
 276    if (insn->dest.is_ssa)
 277       return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
 278    else
 279       return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
 280 }
 281
 282 DataType
 283 Converter::getDType(nir_op op, uint8_t bitSize)
 284 {
 285    DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
 286    if (ty == TYPE_NONE) {
 287       ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
 288       assert(false);
 289    }
 290    return ty;
 291 }
 292
 293 std::vector<DataType>
 294 Converter::getSTypes(nir_alu_instr *insn)
 295 {
 296    const nir_op_info &info = nir_op_infos[insn->op];
 297    std::vector<DataType> res(info.num_inputs);
 298
 299    for (uint8_t i = 0; i < info.num_inputs; ++i) {
 300       if (info.input_types[i] != nir_type_invalid) {
 301          res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
 302       } else {
 303          ERROR("getSType not implemented for %s idx %u\n", info.name, i);
 304          assert(false);
 305          res[i] = TYPE_NONE;
 306          break;
 307       }
 308    }
 309
 310    return res;
 311 }
 312
 313 DataType
 314 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
 315 {
 316    uint8_t bitSize;
 317    if (src.is_ssa)
 318       bitSize = src.ssa->bit_size;
 319    else
 320       bitSize = src.reg.reg->bit_size;
 321
 322    DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
 323    if (ty == TYPE_NONE) {
 324       const char *str;
 325       if (isFloat)
 326          str = "float";
 327       else if (isSigned)
 328          str = "int";
 329       else
 330          str = "uint";
 331       ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
 332       assert(false);
 333    }
 334    return ty;
 335 }
 336
 337 operation
 338 Converter::getOperation(nir_op op)
 339 {
 340    switch (op) {
 341    // basic ops with float and int variants
 342    case nir_op_fabs:
 343    case nir_op_iabs:
 344       return OP_ABS;
 345    case nir_op_fadd:
 346    case nir_op_iadd:
 347       return OP_ADD;
 348    case nir_op_iand:
 349       return OP_AND;
 350    case nir_op_ifind_msb:
 351    case nir_op_ufind_msb:
 352       return OP_BFIND;
 353    case nir_op_fceil:
 354       return OP_CEIL;
 355    case nir_op_fcos:
 356       return OP_COS;
 357    case nir_op_f2f32:
 358    case nir_op_f2f64:
 359    case nir_op_f2i32:
 360    case nir_op_f2i64:
 361    case nir_op_f2u32:
 362    case nir_op_f2u64:
 363    case nir_op_i2f32:
 364    case nir_op_i2f64:
 365    case nir_op_i2i32:
 366    case nir_op_i2i64:
 367    case nir_op_u2f32:
 368    case nir_op_u2f64:
 369    case nir_op_u2u32:
 370    case nir_op_u2u64:
 371       return OP_CVT;
 372    case nir_op_fddx:
 373    case nir_op_fddx_coarse:
 374    case nir_op_fddx_fine:
 375       return OP_DFDX;
 376    case nir_op_fddy:
 377    case nir_op_fddy_coarse:
 378    case nir_op_fddy_fine:
 379       return OP_DFDY;
 380    case nir_op_fdiv:
 381    case nir_op_idiv:
 382    case nir_op_udiv:
 383       return OP_DIV;
 384    case nir_op_fexp2:
 385       return OP_EX2;
 386    case nir_op_ffloor:
 387       return OP_FLOOR;
 388    case nir_op_ffma:
 389       return OP_FMA;
 390    case nir_op_flog2:
 391       return OP_LG2;
 392    case nir_op_fmax:
 393    case nir_op_imax:
 394    case nir_op_umax:
 395       return OP_MAX;
 396    case nir_op_pack_64_2x32_split:
 397       return OP_MERGE;
 398    case nir_op_fmin:
 399    case nir_op_imin:
 400    case nir_op_umin:
 401       return OP_MIN;
 402    case nir_op_fmod:
 403    case nir_op_imod:
 404    case nir_op_umod:
 405    case nir_op_frem:
 406    case nir_op_irem:
 407       return OP_MOD;
 408    case nir_op_fmul:
 409    case nir_op_imul:
 410    case nir_op_imul_high:
 411    case nir_op_umul_high:
 412       return OP_MUL;
 413    case nir_op_fneg:
 414    case nir_op_ineg:
 415       return OP_NEG;
 416    case nir_op_inot:
 417       return OP_NOT;
 418    case nir_op_ior:
 419       return OP_OR;
 420    case nir_op_fpow:
 421       return OP_POW;
 422    case nir_op_frcp:
 423       return OP_RCP;
 424    case nir_op_frsq:
 425       return OP_RSQ;
 426    case nir_op_fsat:
 427       return OP_SAT;
 428    case nir_op_feq32:
 429    case nir_op_ieq32:
 430    case nir_op_fge32:
 431    case nir_op_ige32:
 432    case nir_op_uge32:
 433    case nir_op_flt32:
 434    case nir_op_ilt32:
 435    case nir_op_ult32:
 436    case nir_op_fne32:
 437    case nir_op_ine32:
 438       return OP_SET;
 439    case nir_op_ishl:
 440       return OP_SHL;
 441    case nir_op_ishr:
 442    case nir_op_ushr:
 443       return OP_SHR;
 444    case nir_op_fsin:
 445       return OP_SIN;
 446    case nir_op_fsqrt:
 447       return OP_SQRT;
 448    case nir_op_ftrunc:
 449       return OP_TRUNC;
 450    case nir_op_ixor:
 451       return OP_XOR;
 452    default:
 453       ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
 454       assert(false);
 455       return OP_NOP;
 456    }
 457 }
 458
 459 operation
 460 Converter::getOperation(nir_texop op)
 461 {
 462    switch (op) {
 463    case nir_texop_tex:
 464       return OP_TEX;
 465    case nir_texop_lod:
 466       return OP_TXLQ;
 467    case nir_texop_txb:
 468       return OP_TXB;
 469    case nir_texop_txd:
 470       return OP_TXD;
 471    case nir_texop_txf:
 472    case nir_texop_txf_ms:
 473       return OP_TXF;
 474    case nir_texop_tg4:
 475       return OP_TXG;
 476    case nir_texop_txl:
 477       return OP_TXL;
 478    case nir_texop_query_levels:
 479    case nir_texop_texture_samples:
 480    case nir_texop_txs:
 481       return OP_TXQ;
 482    default:
 483       ERROR("couldn't get operation for nir_texop %u\n", op);
 484       assert(false);
 485       return OP_NOP;
 486    }
 487 }
 488
 489 operation
 490 Converter::getOperation(nir_intrinsic_op op)
 491 {
 492    switch (op) {
 493    case nir_intrinsic_emit_vertex:
 494       return OP_EMIT;
 495    case nir_intrinsic_end_primitive:
 496       return OP_RESTART;
 497    case nir_intrinsic_bindless_image_atomic_add:
 498    case nir_intrinsic_image_atomic_add:
 499    case nir_intrinsic_bindless_image_atomic_and:
 500    case nir_intrinsic_image_atomic_and:
 501    case nir_intrinsic_bindless_image_atomic_comp_swap:
 502    case nir_intrinsic_image_atomic_comp_swap:
 503    case nir_intrinsic_bindless_image_atomic_exchange:
 504    case nir_intrinsic_image_atomic_exchange:
 505    case nir_intrinsic_bindless_image_atomic_imax:
 506    case nir_intrinsic_image_atomic_imax:
 507    case nir_intrinsic_bindless_image_atomic_umax:
 508    case nir_intrinsic_image_atomic_umax:
 509    case nir_intrinsic_bindless_image_atomic_imin:
 510    case nir_intrinsic_image_atomic_imin:
 511    case nir_intrinsic_bindless_image_atomic_umin:
 512    case nir_intrinsic_image_atomic_umin:
 513    case nir_intrinsic_bindless_image_atomic_or:
 514    case nir_intrinsic_image_atomic_or:
 515    case nir_intrinsic_bindless_image_atomic_xor:
 516    case nir_intrinsic_image_atomic_xor:
 517    case nir_intrinsic_bindless_image_atomic_inc_wrap:
 518    case nir_intrinsic_image_atomic_inc_wrap:
 519    case nir_intrinsic_bindless_image_atomic_dec_wrap:
 520    case nir_intrinsic_image_atomic_dec_wrap:
 521       return OP_SUREDP;
 522    case nir_intrinsic_bindless_image_load:
 523    case nir_intrinsic_image_load:
 524       return OP_SULDP;
 525    case nir_intrinsic_bindless_image_samples:
 526    case nir_intrinsic_image_samples:
 527    case nir_intrinsic_bindless_image_size:
 528    case nir_intrinsic_image_size:
 529       return OP_SUQ;
 530    case nir_intrinsic_bindless_image_store:
 531    case nir_intrinsic_image_store:
 532       return OP_SUSTP;
 533    default:
 534       ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
 535       assert(false);
 536       return OP_NOP;
 537    }
 538 }
 539
 540 operation
 541 Converter::preOperationNeeded(nir_op op)
 542 {
 543    switch (op) {
 544    case nir_op_fcos:
 545    case nir_op_fsin:
 546       return OP_PRESIN;
 547    default:
 548       return OP_NOP;
 549    }
 550 }
 551
 552 int
 553 Converter::getSubOp(nir_op op)
 554 {
 555    switch (op) {
 556    case nir_op_imul_high:
 557    case nir_op_umul_high:
 558       return NV50_IR_SUBOP_MUL_HIGH;
 559    case nir_op_ishl:
 560    case nir_op_ishr:
 561    case nir_op_ushr:
 562       return NV50_IR_SUBOP_SHIFT_WRAP;
 563    default:
 564       return 0;
 565    }
 566 }
 567
 568 int
 569 Converter::getSubOp(nir_intrinsic_op op)
 570 {
 571    switch (op) {
 572    case nir_intrinsic_bindless_image_atomic_add:
 573    case nir_intrinsic_global_atomic_add:
 574    case nir_intrinsic_image_atomic_add:
 575    case nir_intrinsic_shared_atomic_add:
 576    case nir_intrinsic_ssbo_atomic_add:
 577       return  NV50_IR_SUBOP_ATOM_ADD;
 578    case nir_intrinsic_bindless_image_atomic_and:
 579    case nir_intrinsic_global_atomic_and:
 580    case nir_intrinsic_image_atomic_and:
 581    case nir_intrinsic_shared_atomic_and:
 582    case nir_intrinsic_ssbo_atomic_and:
 583       return  NV50_IR_SUBOP_ATOM_AND;
 584    case nir_intrinsic_bindless_image_atomic_comp_swap:
 585    case nir_intrinsic_global_atomic_comp_swap:
 586    case nir_intrinsic_image_atomic_comp_swap:
 587    case nir_intrinsic_shared_atomic_comp_swap:
 588    case nir_intrinsic_ssbo_atomic_comp_swap:
 589       return  NV50_IR_SUBOP_ATOM_CAS;
 590    case nir_intrinsic_bindless_image_atomic_exchange:
 591    case nir_intrinsic_global_atomic_exchange:
 592    case nir_intrinsic_image_atomic_exchange:
 593    case nir_intrinsic_shared_atomic_exchange:
 594    case nir_intrinsic_ssbo_atomic_exchange:
 595       return  NV50_IR_SUBOP_ATOM_EXCH;
 596    case nir_intrinsic_bindless_image_atomic_or:
 597    case nir_intrinsic_global_atomic_or:
 598    case nir_intrinsic_image_atomic_or:
 599    case nir_intrinsic_shared_atomic_or:
 600    case nir_intrinsic_ssbo_atomic_or:
 601       return  NV50_IR_SUBOP_ATOM_OR;
 602    case nir_intrinsic_bindless_image_atomic_imax:
 603    case nir_intrinsic_bindless_image_atomic_umax:
 604    case nir_intrinsic_global_atomic_imax:
 605    case nir_intrinsic_global_atomic_umax:
 606    case nir_intrinsic_image_atomic_imax:
 607    case nir_intrinsic_image_atomic_umax:
 608    case nir_intrinsic_shared_atomic_imax:
 609    case nir_intrinsic_shared_atomic_umax:
 610    case nir_intrinsic_ssbo_atomic_imax:
 611    case nir_intrinsic_ssbo_atomic_umax:
 612       return  NV50_IR_SUBOP_ATOM_MAX;
 613    case nir_intrinsic_bindless_image_atomic_imin:
 614    case nir_intrinsic_bindless_image_atomic_umin:
 615    case nir_intrinsic_global_atomic_imin:
 616    case nir_intrinsic_global_atomic_umin:
 617    case nir_intrinsic_image_atomic_imin:
 618    case nir_intrinsic_image_atomic_umin:
 619    case nir_intrinsic_shared_atomic_imin:
 620    case nir_intrinsic_shared_atomic_umin:
 621    case nir_intrinsic_ssbo_atomic_imin:
 622    case nir_intrinsic_ssbo_atomic_umin:
 623       return  NV50_IR_SUBOP_ATOM_MIN;
 624    case nir_intrinsic_bindless_image_atomic_xor:
 625    case nir_intrinsic_global_atomic_xor:
 626    case nir_intrinsic_image_atomic_xor:
 627    case nir_intrinsic_shared_atomic_xor:
 628    case nir_intrinsic_ssbo_atomic_xor:
 629       return  NV50_IR_SUBOP_ATOM_XOR;
 630    case nir_intrinsic_bindless_image_atomic_inc_wrap:
 631    case nir_intrinsic_image_atomic_inc_wrap:
 632       return NV50_IR_SUBOP_ATOM_INC;
 633    case nir_intrinsic_bindless_image_atomic_dec_wrap:
 634    case nir_intrinsic_image_atomic_dec_wrap:
 635       return NV50_IR_SUBOP_ATOM_DEC;
 636
 637    case nir_intrinsic_group_memory_barrier:
 638    case nir_intrinsic_memory_barrier:
 639    case nir_intrinsic_memory_barrier_buffer:
 640    case nir_intrinsic_memory_barrier_image:
 641       return NV50_IR_SUBOP_MEMBAR(M, GL);
 642    case nir_intrinsic_memory_barrier_shared:
 643       return NV50_IR_SUBOP_MEMBAR(M, CTA);
 644
 645    case nir_intrinsic_vote_all:
 646       return NV50_IR_SUBOP_VOTE_ALL;
 647    case nir_intrinsic_vote_any:
 648       return NV50_IR_SUBOP_VOTE_ANY;
 649    case nir_intrinsic_vote_ieq:
 650       return NV50_IR_SUBOP_VOTE_UNI;
 651    default:
 652       return 0;
 653    }
 654 }
 655
 656 CondCode
 657 Converter::getCondCode(nir_op op)
 658 {
 659    switch (op) {
 660    case nir_op_feq32:
 661    case nir_op_ieq32:
 662       return CC_EQ;
 663    case nir_op_fge32:
 664    case nir_op_ige32:
 665    case nir_op_uge32:
 666       return CC_GE;
 667    case nir_op_flt32:
 668    case nir_op_ilt32:
 669    case nir_op_ult32:
 670       return CC_LT;
 671    case nir_op_fne32:
 672       return CC_NEU;
 673    case nir_op_ine32:
 674       return CC_NE;
 675    default:
 676       ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
 677       assert(false);
 678       return CC_FL;
 679    }
 680 }
 681
 682 Converter::LValues&
 683 Converter::convert(nir_alu_dest *dest)
 684 {
 685    return convert(&dest->dest);
 686 }
 687
 688 Converter::LValues&
 689 Converter::convert(nir_dest *dest)
 690 {
 691    if (dest->is_ssa)
 692       return convert(&dest->ssa);
 693    if (dest->reg.indirect) {
 694       ERROR("no support for indirects.");
 695       assert(false);
 696    }
 697    return convert(dest->reg.reg);
 698 }
 699
 700 Converter::LValues&
 701 Converter::convert(nir_register *reg)
 702 {
 703    NirDefMap::iterator it = regDefs.find(reg->index);
 704    if (it != regDefs.end())
 705       return it->second;
 706
 707    LValues newDef(reg->num_components);
 708    for (uint8_t i = 0; i < reg->num_components; i++)
 709       newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
 710    return regDefs[reg->index] = newDef;
 711 }
 712
 713 Converter::LValues&
 714 Converter::convert(nir_ssa_def *def)
 715 {
 716    NirDefMap::iterator it = ssaDefs.find(def->index);
 717    if (it != ssaDefs.end())
 718       return it->second;
 719
 720    LValues newDef(def->num_components);
 721    for (uint8_t i = 0; i < def->num_components; i++)
 722       newDef[i] = getSSA(std::max(4, def->bit_size / 8));
 723    return ssaDefs[def->index] = newDef;
 724 }
 725
 726 Value*
 727 Converter::getSrc(nir_alu_src *src, uint8_t component)
 728 {
 729    if (src->abs || src->negate) {
 730       ERROR("modifiers currently not supported on nir_alu_src\n");
 731       assert(false);
 732    }
 733    return getSrc(&src->src, src->swizzle[component]);
 734 }
 735
 736 Value*
 737 Converter::getSrc(nir_register *reg, uint8_t idx)
 738 {
 739    NirDefMap::iterator it = regDefs.find(reg->index);
 740    if (it == regDefs.end())
 741       return convert(reg)[idx];
 742    return it->second[idx];
 743 }
 744
 745 Value*
 746 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
 747 {
 748    if (src->is_ssa)
 749       return getSrc(src->ssa, idx);
 750
 751    if (src->reg.indirect) {
 752       if (indirect)
 753          return getSrc(src->reg.indirect, idx);
 754       ERROR("no support for indirects.");
 755       assert(false);
 756       return NULL;
 757    }
 758
 759    return getSrc(src->reg.reg, idx);
 760 }
 761
 762 Value*
 763 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
 764 {
 765    ImmediateMap::iterator iit = immediates.find(src->index);
 766    if (iit != immediates.end())
 767       return convert((*iit).second, idx);
 768
 769    NirDefMap::iterator it = ssaDefs.find(src->index);
 770    if (it == ssaDefs.end()) {
 771       ERROR("SSA value %u not found\n", src->index);
 772       assert(false);
 773       return NULL;
 774    }
 775    return it->second[idx];
 776 }
 777
 778 uint32_t
 779 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
 780 {
 781    nir_const_value *offset = nir_src_as_const_value(*src);
 782
 783    if (offset) {
 784       indirect = NULL;
 785       return offset[0].u32;
 786    }
 787
 788    indirect = getSrc(src, idx, true);
 789    return 0;
 790 }
 791
 792 uint32_t
 793 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect, bool isScalar)
 794 {
 795    int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
 796    if (indirect && !isScalar)
 797       indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
 798    return idx;
 799 }
 800
 801 static void
 802 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
 803 {
 804    assert(name && index);
 805
 806    if (slot >= VERT_ATTRIB_MAX) {
 807       ERROR("invalid varying slot %u\n", slot);
 808       assert(false);
 809       return;
 810    }
 811
 812    if (slot >= VERT_ATTRIB_GENERIC0 &&
 813        slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
 814       *name = TGSI_SEMANTIC_GENERIC;
 815       *index = slot - VERT_ATTRIB_GENERIC0;
 816       return;
 817    }
 818
 819    if (slot >= VERT_ATTRIB_TEX0 &&
 820        slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
 821       *name = TGSI_SEMANTIC_TEXCOORD;
 822       *index = slot - VERT_ATTRIB_TEX0;
 823       return;
 824    }
 825
 826    switch (slot) {
 827    case VERT_ATTRIB_COLOR0:
 828       *name = TGSI_SEMANTIC_COLOR;
 829       *index = 0;
 830       break;
 831    case VERT_ATTRIB_COLOR1:
 832       *name = TGSI_SEMANTIC_COLOR;
 833       *index = 1;
 834       break;
 835    case VERT_ATTRIB_EDGEFLAG:
 836       *name = TGSI_SEMANTIC_EDGEFLAG;
 837       *index = 0;
 838       break;
 839    case VERT_ATTRIB_FOG:
 840       *name = TGSI_SEMANTIC_FOG;
 841       *index = 0;
 842       break;
 843    case VERT_ATTRIB_NORMAL:
 844       *name = TGSI_SEMANTIC_NORMAL;
 845       *index = 0;
 846       break;
 847    case VERT_ATTRIB_POS:
 848       *name = TGSI_SEMANTIC_POSITION;
 849       *index = 0;
 850       break;
 851    case VERT_ATTRIB_POINT_SIZE:
 852       *name = TGSI_SEMANTIC_PSIZE;
 853       *index = 0;
 854       break;
 855    default:
 856       ERROR("unknown vert attrib slot %u\n", slot);
 857       assert(false);
 858       break;
 859    }
 860 }
 861
 862 void
 863 Converter::setInterpolate(nv50_ir_varying *var,
 864                           uint8_t mode,
 865                           bool centroid,
 866                           unsigned semantic)
 867 {
 868    switch (mode) {
 869    case INTERP_MODE_FLAT:
 870       var->flat = 1;
 871       break;
 872    case INTERP_MODE_NONE:
 873       if (semantic == TGSI_SEMANTIC_COLOR)
 874          var->sc = 1;
 875       else if (semantic == TGSI_SEMANTIC_POSITION)
 876          var->linear = 1;
 877       break;
 878    case INTERP_MODE_NOPERSPECTIVE:
 879       var->linear = 1;
 880       break;
 881    case INTERP_MODE_SMOOTH:
 882       break;
 883    }
 884    var->centroid = centroid;
 885 }
 886
 887 static uint16_t
 888 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
 889           bool input, const nir_variable *var)
 890 {
 891    if (!type->is_array())
 892       return type->count_attribute_slots(false);
 893
 894    uint16_t slots;
 895    switch (stage) {
 896    case Program::TYPE_GEOMETRY:
 897       slots = type->uniform_locations();
 898       if (input)
 899          slots /= info.gs.vertices_in;
 900       break;
 901    case Program::TYPE_TESSELLATION_CONTROL:
 902    case Program::TYPE_TESSELLATION_EVAL:
 903       // remove first dimension
 904       if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
 905          slots = type->uniform_locations();
 906       else
 907          slots = type->fields.array->uniform_locations();
 908       break;
 909    default:
 910       slots = type->count_attribute_slots(false);
 911       break;
 912    }
 913
 914    return slots;
 915 }
 916
 917 bool Converter::assignSlots() {
 918    unsigned name;
 919    unsigned index;
 920
 921    info->io.viewportId = -1;
 922    info->numInputs = 0;
 923    info->numOutputs = 0;
 924    info->numSysVals = 0;
 925
 926    for (uint8_t i = 0; i < SYSTEM_VALUE_MAX; ++i) {
 927       if (!(nir->info.system_values_read & 1ull << i))
 928          continue;
 929
 930       info->sv[info->numSysVals].sn = tgsi_get_sysval_semantic(i);
 931       info->sv[info->numSysVals].si = 0;
 932       info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
 933
 934       switch (i) {
 935       case SYSTEM_VALUE_INSTANCE_ID:
 936          info->io.instanceId = info->numSysVals;
 937          break;
 938       case SYSTEM_VALUE_TESS_LEVEL_INNER:
 939       case SYSTEM_VALUE_TESS_LEVEL_OUTER:
 940          info->sv[info->numSysVals].patch = 1;
 941          break;
 942       case SYSTEM_VALUE_VERTEX_ID:
 943          info->io.vertexId = info->numSysVals;
 944          break;
 945       default:
 946          break;
 947       }
 948
 949       info->numSysVals += 1;
 950    }
 951
 952    if (prog->getType() == Program::TYPE_COMPUTE)
 953       return true;
 954
 955    nir_foreach_variable(var, &nir->inputs) {
 956       const glsl_type *type = var->type;
 957       int slot = var->data.location;
 958       uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
 959       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
 960                                        : type->component_slots();
 961       uint32_t frac = var->data.location_frac;
 962       uint32_t vary = var->data.driver_location;
 963
 964       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
 965          if (comp > 2)
 966             slots *= 2;
 967       }
 968
 969       assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
 970
 971       switch(prog->getType()) {
 972       case Program::TYPE_FRAGMENT:
 973          tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
 974                                       &name, &index);
 975          for (uint16_t i = 0; i < slots; ++i) {
 976             setInterpolate(&info->in[vary + i], var->data.interpolation,
 977                            var->data.centroid | var->data.sample, name);
 978          }
 979          break;
 980       case Program::TYPE_GEOMETRY:
 981          tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
 982                                       &name, &index);
 983          break;
 984       case Program::TYPE_TESSELLATION_CONTROL:
 985       case Program::TYPE_TESSELLATION_EVAL:
 986          tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
 987                                       &name, &index);
 988          if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
 989             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
 990          break;
 991       case Program::TYPE_VERTEX:
 992          if (slot >= VERT_ATTRIB_GENERIC0)
 993             slot = VERT_ATTRIB_GENERIC0 + vary;
 994          vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
 995          switch (name) {
 996          case TGSI_SEMANTIC_EDGEFLAG:
 997             info->io.edgeFlagIn = vary;
 998             break;
 999          default:
1000             break;
1001          }
1002          break;
1003       default:
1004          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1005          return false;
1006       }
1007
1008       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1009          info->in[vary].id = vary;
1010          info->in[vary].patch = var->data.patch;
1011          info->in[vary].sn = name;
1012          info->in[vary].si = index + i;
1013          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1014             if (i & 0x1)
1015                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1016             else
1017                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1018          else
1019             info->in[vary].mask |= ((1 << comp) - 1) << frac;
1020       }
1021       info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1022    }
1023
1024    nir_foreach_variable(var, &nir->outputs) {
1025       const glsl_type *type = var->type;
1026       int slot = var->data.location;
1027       uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1028       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1029                                        : type->component_slots();
1030       uint32_t frac = var->data.location_frac;
1031       uint32_t vary = var->data.driver_location;
1032
1033       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1034          if (comp > 2)
1035             slots *= 2;
1036       }
1037
1038       assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1039
1040       switch(prog->getType()) {
1041       case Program::TYPE_FRAGMENT:
1042          tgsi_get_gl_frag_result_semantic((gl_frag_result)slot, &name, &index);
1043          switch (name) {
1044          case TGSI_SEMANTIC_COLOR:
1045             if (!var->data.fb_fetch_output)
1046                info->prop.fp.numColourResults++;
1047
1048             if (var->data.location == FRAG_RESULT_COLOR &&
1049                 nir->info.outputs_written & BITFIELD64_BIT(var->data.location))
1050                info->prop.fp.separateFragData = true;
1051
1052             // sometimes we get FRAG_RESULT_DATAX with data.index 0
1053             // sometimes we get FRAG_RESULT_DATA0 with data.index X
1054             index = index == 0 ? var->data.index : index;
1055             break;
1056          case TGSI_SEMANTIC_POSITION:
1057             info->io.fragDepth = vary;
1058             info->prop.fp.writesDepth = true;
1059             break;
1060          case TGSI_SEMANTIC_SAMPLEMASK:
1061             info->io.sampleMask = vary;
1062             break;
1063          default:
1064             break;
1065          }
1066          break;
1067       case Program::TYPE_GEOMETRY:
1068       case Program::TYPE_TESSELLATION_CONTROL:
1069       case Program::TYPE_TESSELLATION_EVAL:
1070       case Program::TYPE_VERTEX:
1071          tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1072                                       &name, &index);
1073
1074          if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1075              name != TGSI_SEMANTIC_TESSOUTER)
1076             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1077
1078          switch (name) {
1079          case TGSI_SEMANTIC_CLIPDIST:
1080             info->io.genUserClip = -1;
1081             break;
1082          case TGSI_SEMANTIC_CLIPVERTEX:
1083             clipVertexOutput = vary;
1084             break;
1085          case TGSI_SEMANTIC_EDGEFLAG:
1086             info->io.edgeFlagOut = vary;
1087             break;
1088          case TGSI_SEMANTIC_POSITION:
1089             if (clipVertexOutput < 0)
1090                clipVertexOutput = vary;
1091             break;
1092          default:
1093             break;
1094          }
1095          break;
1096       default:
1097          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1098          return false;
1099       }
1100
1101       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1102          info->out[vary].id = vary;
1103          info->out[vary].patch = var->data.patch;
1104          info->out[vary].sn = name;
1105          info->out[vary].si = index + i;
1106          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1107             if (i & 0x1)
1108                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1109             else
1110                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1111          else
1112             info->out[vary].mask |= ((1 << comp) - 1) << frac;
1113
1114          if (nir->info.outputs_read & 1ull << slot)
1115             info->out[vary].oread = 1;
1116       }
1117       info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1118    }
1119
1120    if (info->io.genUserClip > 0) {
1121       info->io.clipDistances = info->io.genUserClip;
1122
1123       const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1124
1125       for (unsigned int n = 0; n < nOut; ++n) {
1126          unsigned int i = info->numOutputs++;
1127          info->out[i].id = i;
1128          info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1129          info->out[i].si = n;
1130          info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1131       }
1132    }
1133
1134    return info->assignSlots(info) == 0;
1135 }
1136
1137 uint32_t
1138 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1139 {
1140    DataType ty;
1141    int offset = nir_intrinsic_component(insn);
1142    bool input;
1143
1144    if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1145       ty = getDType(insn);
1146    else
1147       ty = getSType(insn->src[0], false, false);
1148
1149    switch (insn->intrinsic) {
1150    case nir_intrinsic_load_input:
1151    case nir_intrinsic_load_interpolated_input:
1152    case nir_intrinsic_load_per_vertex_input:
1153       input = true;
1154       break;
1155    case nir_intrinsic_load_output:
1156    case nir_intrinsic_load_per_vertex_output:
1157    case nir_intrinsic_store_output:
1158    case nir_intrinsic_store_per_vertex_output:
1159       input = false;
1160       break;
1161    default:
1162       ERROR("unknown intrinsic in getSlotAddress %s",
1163             nir_intrinsic_infos[insn->intrinsic].name);
1164       input = false;
1165       assert(false);
1166       break;
1167    }
1168
1169    if (typeSizeof(ty) == 8) {
1170       slot *= 2;
1171       slot += offset;
1172       if (slot >= 4) {
1173          idx += 1;
1174          slot -= 4;
1175       }
1176    } else {
1177       slot += offset;
1178    }
1179
1180    assert(slot < 4);
1181    assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1182    assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1183
1184    const nv50_ir_varying *vary = input ? info->in : info->out;
1185    return vary[idx].slot[slot] * 4;
1186 }
1187
1188 Instruction *
1189 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1190                     uint32_t base, uint8_t c, Value *indirect0,
1191                     Value *indirect1, bool patch)
1192 {
1193    unsigned int tySize = typeSizeof(ty);
1194
1195    if (tySize == 8 &&
1196        (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1197       Value *lo = getSSA();
1198       Value *hi = getSSA();
1199
1200       Instruction *loi =
1201          mkLoad(TYPE_U32, lo,
1202                 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1203                 indirect0);
1204       loi->setIndirect(0, 1, indirect1);
1205       loi->perPatch = patch;
1206
1207       Instruction *hii =
1208          mkLoad(TYPE_U32, hi,
1209                 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1210                 indirect0);
1211       hii->setIndirect(0, 1, indirect1);
1212       hii->perPatch = patch;
1213
1214       return mkOp2(OP_MERGE, ty, def, lo, hi);
1215    } else {
1216       Instruction *ld =
1217          mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1218       ld->setIndirect(0, 1, indirect1);
1219       ld->perPatch = patch;
1220       return ld;
1221    }
1222 }
1223
1224 void
1225 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1226                    DataType ty, Value *src, uint8_t idx, uint8_t c,
1227                    Value *indirect0, Value *indirect1)
1228 {
1229    uint8_t size = typeSizeof(ty);
1230    uint32_t address = getSlotAddress(insn, idx, c);
1231
1232    if (size == 8 && indirect0) {
1233       Value *split[2];
1234       mkSplit(split, 4, src);
1235
1236       if (op == OP_EXPORT) {
1237          split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1238          split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1239       }
1240
1241       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1242               split[0])->perPatch = info->out[idx].patch;
1243       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1244               split[1])->perPatch = info->out[idx].patch;
1245    } else {
1246       if (op == OP_EXPORT)
1247          src = mkMov(getSSA(size), src, ty)->getDef(0);
1248       mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1249               src)->perPatch = info->out[idx].patch;
1250    }
1251 }
1252
1253 bool
1254 Converter::parseNIR()
1255 {
1256    info->bin.tlsSpace = 0;
1257    info->io.clipDistances = nir->info.clip_distance_array_size;
1258    info->io.cullDistances = nir->info.cull_distance_array_size;
1259    info->io.layer_viewport_relative = nir->info.layer_viewport_relative;
1260
1261    switch(prog->getType()) {
1262    case Program::TYPE_COMPUTE:
1263       info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1264       info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1265       info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1266       info->bin.smemSize = nir->info.cs.shared_size;
1267       break;
1268    case Program::TYPE_FRAGMENT:
1269       info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1270       info->prop.fp.persampleInvocation =
1271          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1272          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1273       info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1274       info->prop.fp.readsSampleLocations =
1275          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1276       info->prop.fp.usesDiscard = nir->info.fs.uses_discard || nir->info.fs.uses_demote;
1277       info->prop.fp.usesSampleMaskIn =
1278          !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1279       break;
1280    case Program::TYPE_GEOMETRY:
1281       info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1282       info->prop.gp.instanceCount = nir->info.gs.invocations;
1283       info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1284       info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1285       break;
1286    case Program::TYPE_TESSELLATION_CONTROL:
1287    case Program::TYPE_TESSELLATION_EVAL:
1288       if (nir->info.tess.primitive_mode == GL_ISOLINES)
1289          info->prop.tp.domain = GL_LINES;
1290       else
1291          info->prop.tp.domain = nir->info.tess.primitive_mode;
1292       info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1293       info->prop.tp.outputPrim =
1294          nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1295       info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1296       info->prop.tp.winding = !nir->info.tess.ccw;
1297       break;
1298    case Program::TYPE_VERTEX:
1299       info->prop.vp.usesDrawParameters =
1300          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1301          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1302          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1303       break;
1304    default:
1305       break;
1306    }
1307
1308    return true;
1309 }
1310
1311 bool
1312 Converter::visit(nir_function *function)
1313 {
1314    assert(function->impl);
1315
1316    // usually the blocks will set everything up, but main is special
1317    BasicBlock *entry = new BasicBlock(prog->main);
1318    exit = new BasicBlock(prog->main);
1319    blocks[nir_start_block(function->impl)->index] = entry;
1320    prog->main->setEntry(entry);
1321    prog->main->setExit(exit);
1322
1323    setPosition(entry, true);
1324
1325    if (info->io.genUserClip > 0) {
1326       for (int c = 0; c < 4; ++c)
1327          clipVtx[c] = getScratch();
1328    }
1329
1330    switch (prog->getType()) {
1331    case Program::TYPE_TESSELLATION_CONTROL:
1332       outBase = mkOp2v(
1333          OP_SUB, TYPE_U32, getSSA(),
1334          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1335          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1336       break;
1337    case Program::TYPE_FRAGMENT: {
1338       Symbol *sv = mkSysVal(SV_POSITION, 3);
1339       fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1340       fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1341       break;
1342    }
1343    default:
1344       break;
1345    }
1346
1347    nir_foreach_register(reg, &function->impl->registers) {
1348       if (reg->num_array_elems) {
1349          // TODO: packed variables would be nice, but MemoryOpt fails
1350          // replace 4 with reg->num_components
1351          uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
1352          regToLmemOffset[reg->index] = info->bin.tlsSpace;
1353          info->bin.tlsSpace += size;
1354       }
1355    }
1356
1357    nir_index_ssa_defs(function->impl);
1358    foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1359       if (!visit(node))
1360          return false;
1361    }
1362
1363    bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1364    setPosition(exit, true);
1365
1366    if ((prog->getType() == Program::TYPE_VERTEX ||
1367         prog->getType() == Program::TYPE_TESSELLATION_EVAL)
1368        && info->io.genUserClip > 0)
1369       handleUserClipPlanes();
1370
1371    // TODO: for non main function this needs to be a OP_RETURN
1372    mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1373    return true;
1374 }
1375
1376 bool
1377 Converter::visit(nir_cf_node *node)
1378 {
1379    switch (node->type) {
1380    case nir_cf_node_block:
1381       return visit(nir_cf_node_as_block(node));
1382    case nir_cf_node_if:
1383       return visit(nir_cf_node_as_if(node));
1384    case nir_cf_node_loop:
1385       return visit(nir_cf_node_as_loop(node));
1386    default:
1387       ERROR("unknown nir_cf_node type %u\n", node->type);
1388       return false;
1389    }
1390 }
1391
1392 bool
1393 Converter::visit(nir_block *block)
1394 {
1395    if (!block->predecessors->entries && block->instr_list.is_empty())
1396       return true;
1397
1398    BasicBlock *bb = convert(block);
1399
1400    setPosition(bb, true);
1401    nir_foreach_instr(insn, block) {
1402       if (!visit(insn))
1403          return false;
1404    }
1405    return true;
1406 }
1407
1408 bool
1409 Converter::visit(nir_if *nif)
1410 {
1411    DataType sType = getSType(nif->condition, false, false);
1412    Value *src = getSrc(&nif->condition, 0);
1413
1414    nir_block *lastThen = nir_if_last_then_block(nif);
1415    nir_block *lastElse = nir_if_last_else_block(nif);
1416
1417    assert(!lastThen->successors[1]);
1418    assert(!lastElse->successors[1]);
1419
1420    BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1421    BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1422
1423    bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1424    bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1425
1426    // we only insert joinats, if both nodes end up at the end of the if again.
1427    // the reason for this to not happens are breaks/continues/ret/... which
1428    // have their own handling
1429    if (lastThen->successors[0] == lastElse->successors[0])
1430       bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1431                           CC_ALWAYS, NULL);
1432
1433    mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1434
1435    foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1436       if (!visit(node))
1437          return false;
1438    }
1439    setPosition(convert(lastThen), true);
1440    if (!bb->getExit() ||
1441        !bb->getExit()->asFlow() ||
1442         bb->getExit()->asFlow()->op == OP_JOIN) {
1443       BasicBlock *tailBB = convert(lastThen->successors[0]);
1444       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1445       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1446    }
1447
1448    foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1449       if (!visit(node))
1450          return false;
1451    }
1452    setPosition(convert(lastElse), true);
1453    if (!bb->getExit() ||
1454        !bb->getExit()->asFlow() ||
1455         bb->getExit()->asFlow()->op == OP_JOIN) {
1456       BasicBlock *tailBB = convert(lastElse->successors[0]);
1457       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1458       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1459    }
1460
1461    if (lastThen->successors[0] == lastElse->successors[0]) {
1462       setPosition(convert(lastThen->successors[0]), true);
1463       mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1464    }
1465
1466    return true;
1467 }
1468
1469 bool
1470 Converter::visit(nir_loop *loop)
1471 {
1472    curLoopDepth += 1;
1473    func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1474
1475    BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1476    BasicBlock *tailBB =
1477       convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1478    bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1479
1480    mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1481    setPosition(loopBB, false);
1482    mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1483
1484    foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1485       if (!visit(node))
1486          return false;
1487    }
1488    Instruction *insn = bb->getExit();
1489    if (bb->cfg.incidentCount() != 0) {
1490       if (!insn || !insn->asFlow()) {
1491          mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1492          bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1493       } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1494                  tailBB->cfg.incidentCount() == 0) {
1495          // RA doesn't like having blocks around with no incident edge,
1496          // so we create a fake one to make it happy
1497          bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1498       }
1499    }
1500
1501    curLoopDepth -= 1;
1502
1503    return true;
1504 }
1505
1506 bool
1507 Converter::visit(nir_instr *insn)
1508 {
1509    // we need an insertion point for on the fly generated immediate loads
1510    immInsertPos = bb->getExit();
1511    switch (insn->type) {
1512    case nir_instr_type_alu:
1513       return visit(nir_instr_as_alu(insn));
1514    case nir_instr_type_intrinsic:
1515       return visit(nir_instr_as_intrinsic(insn));
1516    case nir_instr_type_jump:
1517       return visit(nir_instr_as_jump(insn));
1518    case nir_instr_type_load_const:
1519       return visit(nir_instr_as_load_const(insn));
1520    case nir_instr_type_ssa_undef:
1521       return visit(nir_instr_as_ssa_undef(insn));
1522    case nir_instr_type_tex:
1523       return visit(nir_instr_as_tex(insn));
1524    default:
1525       ERROR("unknown nir_instr type %u\n", insn->type);
1526       return false;
1527    }
1528    return true;
1529 }
1530
1531 SVSemantic
1532 Converter::convert(nir_intrinsic_op intr)
1533 {
1534    switch (intr) {
1535    case nir_intrinsic_load_base_vertex:
1536       return SV_BASEVERTEX;
1537    case nir_intrinsic_load_base_instance:
1538       return SV_BASEINSTANCE;
1539    case nir_intrinsic_load_draw_id:
1540       return SV_DRAWID;
1541    case nir_intrinsic_load_front_face:
1542       return SV_FACE;
1543    case nir_intrinsic_is_helper_invocation:
1544    case nir_intrinsic_load_helper_invocation:
1545       return SV_THREAD_KILL;
1546    case nir_intrinsic_load_instance_id:
1547       return SV_INSTANCE_ID;
1548    case nir_intrinsic_load_invocation_id:
1549       return SV_INVOCATION_ID;
1550    case nir_intrinsic_load_local_group_size:
1551       return SV_NTID;
1552    case nir_intrinsic_load_local_invocation_id:
1553       return SV_TID;
1554    case nir_intrinsic_load_num_work_groups:
1555       return SV_NCTAID;
1556    case nir_intrinsic_load_patch_vertices_in:
1557       return SV_VERTEX_COUNT;
1558    case nir_intrinsic_load_primitive_id:
1559       return SV_PRIMITIVE_ID;
1560    case nir_intrinsic_load_sample_id:
1561       return SV_SAMPLE_INDEX;
1562    case nir_intrinsic_load_sample_mask_in:
1563       return SV_SAMPLE_MASK;
1564    case nir_intrinsic_load_sample_pos:
1565       return SV_SAMPLE_POS;
1566    case nir_intrinsic_load_subgroup_eq_mask:
1567       return SV_LANEMASK_EQ;
1568    case nir_intrinsic_load_subgroup_ge_mask:
1569       return SV_LANEMASK_GE;
1570    case nir_intrinsic_load_subgroup_gt_mask:
1571       return SV_LANEMASK_GT;
1572    case nir_intrinsic_load_subgroup_le_mask:
1573       return SV_LANEMASK_LE;
1574    case nir_intrinsic_load_subgroup_lt_mask:
1575       return SV_LANEMASK_LT;
1576    case nir_intrinsic_load_subgroup_invocation:
1577       return SV_LANEID;
1578    case nir_intrinsic_load_tess_coord:
1579       return SV_TESS_COORD;
1580    case nir_intrinsic_load_tess_level_inner:
1581       return SV_TESS_INNER;
1582    case nir_intrinsic_load_tess_level_outer:
1583       return SV_TESS_OUTER;
1584    case nir_intrinsic_load_vertex_id:
1585       return SV_VERTEX_ID;
1586    case nir_intrinsic_load_work_group_id:
1587       return SV_CTAID;
1588    default:
1589       ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1590             nir_intrinsic_infos[intr].name);
1591       assert(false);
1592       return SV_LAST;
1593    }
1594 }
1595
1596 bool
1597 Converter::visit(nir_intrinsic_instr *insn)
1598 {
1599    nir_intrinsic_op op = insn->intrinsic;
1600    const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1601    unsigned dest_components = nir_intrinsic_dest_components(insn);
1602
1603    switch (op) {
1604    case nir_intrinsic_load_uniform: {
1605       LValues &newDefs = convert(&insn->dest);
1606       const DataType dType = getDType(insn);
1607       Value *indirect;
1608       uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1609       for (uint8_t i = 0; i < dest_components; ++i) {
1610          loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1611       }
1612       break;
1613    }
1614    case nir_intrinsic_store_output:
1615    case nir_intrinsic_store_per_vertex_output: {
1616       Value *indirect;
1617       DataType dType = getSType(insn->src[0], false, false);
1618       uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1619
1620       for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
1621          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1622             continue;
1623
1624          uint8_t offset = 0;
1625          Value *src = getSrc(&insn->src[0], i);
1626          switch (prog->getType()) {
1627          case Program::TYPE_FRAGMENT: {
1628             if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1629                // TGSI uses a different interface than NIR, TGSI stores that
1630                // value in the z component, NIR in X
1631                offset += 2;
1632                src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1633             }
1634             break;
1635          }
1636          case Program::TYPE_GEOMETRY:
1637          case Program::TYPE_VERTEX: {
1638             if (info->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) {
1639                mkMov(clipVtx[i], src);
1640                src = clipVtx[i];
1641             }
1642             break;
1643          }
1644          default:
1645             break;
1646          }
1647
1648          storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1649       }
1650       break;
1651    }
1652    case nir_intrinsic_load_input:
1653    case nir_intrinsic_load_interpolated_input:
1654    case nir_intrinsic_load_output: {
1655       LValues &newDefs = convert(&insn->dest);
1656
1657       // FBFetch
1658       if (prog->getType() == Program::TYPE_FRAGMENT &&
1659           op == nir_intrinsic_load_output) {
1660          std::vector<Value*> defs, srcs;
1661          uint8_t mask = 0;
1662
1663          srcs.push_back(getSSA());
1664          srcs.push_back(getSSA());
1665          Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1666          Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1667          mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1668          mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1669
1670          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1671          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1672
1673          for (uint8_t i = 0u; i < dest_components; ++i) {
1674             defs.push_back(newDefs[i]);
1675             mask |= 1 << i;
1676          }
1677
1678          TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1679          texi->tex.levelZero = 1;
1680          texi->tex.mask = mask;
1681          texi->tex.useOffsets = 0;
1682          texi->tex.r = 0xffff;
1683          texi->tex.s = 0xffff;
1684
1685          info->prop.fp.readsFramebuffer = true;
1686          break;
1687       }
1688
1689       const DataType dType = getDType(insn);
1690       Value *indirect;
1691       bool input = op != nir_intrinsic_load_output;
1692       operation nvirOp;
1693       uint32_t mode = 0;
1694
1695       uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1696       nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
1697
1698       // see load_barycentric_* handling
1699       if (prog->getType() == Program::TYPE_FRAGMENT) {
1700          mode = translateInterpMode(&vary, nvirOp);
1701          if (op == nir_intrinsic_load_interpolated_input) {
1702             ImmediateValue immMode;
1703             if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1704                mode |= immMode.reg.data.u32;
1705          }
1706       }
1707
1708       for (uint8_t i = 0u; i < dest_components; ++i) {
1709          uint32_t address = getSlotAddress(insn, idx, i);
1710          Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
1711          if (prog->getType() == Program::TYPE_FRAGMENT) {
1712             int s = 1;
1713             if (typeSizeof(dType) == 8) {
1714                Value *lo = getSSA();
1715                Value *hi = getSSA();
1716                Instruction *interp;
1717
1718                interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1719                if (nvirOp == OP_PINTERP)
1720                   interp->setSrc(s++, fp.position);
1721                if (mode & NV50_IR_INTERP_OFFSET)
1722                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1723                interp->setInterpolate(mode);
1724                interp->setIndirect(0, 0, indirect);
1725
1726                Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
1727                interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1728                if (nvirOp == OP_PINTERP)
1729                   interp->setSrc(s++, fp.position);
1730                if (mode & NV50_IR_INTERP_OFFSET)
1731                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1732                interp->setInterpolate(mode);
1733                interp->setIndirect(0, 0, indirect);
1734
1735                mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
1736             } else {
1737                Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
1738                if (nvirOp == OP_PINTERP)
1739                   interp->setSrc(s++, fp.position);
1740                if (mode & NV50_IR_INTERP_OFFSET)
1741                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1742                interp->setInterpolate(mode);
1743                interp->setIndirect(0, 0, indirect);
1744             }
1745          } else {
1746             mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
1747          }
1748       }
1749       break;
1750    }
1751    case nir_intrinsic_load_kernel_input: {
1752       assert(prog->getType() == Program::TYPE_COMPUTE);
1753       assert(insn->num_components == 1);
1754
1755       LValues &newDefs = convert(&insn->dest);
1756       const DataType dType = getDType(insn);
1757       Value *indirect;
1758       uint32_t idx = getIndirect(insn, 0, 0, indirect, true);
1759
1760       mkLoad(dType, newDefs[0], mkSymbol(FILE_SHADER_INPUT, 0, dType, idx), indirect);
1761       break;
1762    }
1763    case nir_intrinsic_load_barycentric_at_offset:
1764    case nir_intrinsic_load_barycentric_at_sample:
1765    case nir_intrinsic_load_barycentric_centroid:
1766    case nir_intrinsic_load_barycentric_pixel:
1767    case nir_intrinsic_load_barycentric_sample: {
1768       LValues &newDefs = convert(&insn->dest);
1769       uint32_t mode;
1770
1771       if (op == nir_intrinsic_load_barycentric_centroid ||
1772           op == nir_intrinsic_load_barycentric_sample) {
1773          mode = NV50_IR_INTERP_CENTROID;
1774       } else if (op == nir_intrinsic_load_barycentric_at_offset) {
1775          Value *offs[2];
1776          for (uint8_t c = 0; c < 2; c++) {
1777             offs[c] = getScratch();
1778             mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
1779             mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
1780             mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
1781             mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
1782          }
1783          mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
1784
1785          mode = NV50_IR_INTERP_OFFSET;
1786       } else if (op == nir_intrinsic_load_barycentric_pixel) {
1787          mode = NV50_IR_INTERP_DEFAULT;
1788       } else if (op == nir_intrinsic_load_barycentric_at_sample) {
1789          info->prop.fp.readsSampleLocations = true;
1790          mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
1791          mode = NV50_IR_INTERP_OFFSET;
1792       } else {
1793          unreachable("all intrinsics already handled above");
1794       }
1795
1796       loadImm(newDefs[1], mode);
1797       break;
1798    }
1799    case nir_intrinsic_demote:
1800    case nir_intrinsic_discard:
1801       mkOp(OP_DISCARD, TYPE_NONE, NULL);
1802       break;
1803    case nir_intrinsic_demote_if:
1804    case nir_intrinsic_discard_if: {
1805       Value *pred = getSSA(1, FILE_PREDICATE);
1806       if (insn->num_components > 1) {
1807          ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
1808          assert(false);
1809          return false;
1810       }
1811       mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1812       mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
1813       break;
1814    }
1815    case nir_intrinsic_load_base_vertex:
1816    case nir_intrinsic_load_base_instance:
1817    case nir_intrinsic_load_draw_id:
1818    case nir_intrinsic_load_front_face:
1819    case nir_intrinsic_is_helper_invocation:
1820    case nir_intrinsic_load_helper_invocation:
1821    case nir_intrinsic_load_instance_id:
1822    case nir_intrinsic_load_invocation_id:
1823    case nir_intrinsic_load_local_group_size:
1824    case nir_intrinsic_load_local_invocation_id:
1825    case nir_intrinsic_load_num_work_groups:
1826    case nir_intrinsic_load_patch_vertices_in:
1827    case nir_intrinsic_load_primitive_id:
1828    case nir_intrinsic_load_sample_id:
1829    case nir_intrinsic_load_sample_mask_in:
1830    case nir_intrinsic_load_sample_pos:
1831    case nir_intrinsic_load_subgroup_eq_mask:
1832    case nir_intrinsic_load_subgroup_ge_mask:
1833    case nir_intrinsic_load_subgroup_gt_mask:
1834    case nir_intrinsic_load_subgroup_le_mask:
1835    case nir_intrinsic_load_subgroup_lt_mask:
1836    case nir_intrinsic_load_subgroup_invocation:
1837    case nir_intrinsic_load_tess_coord:
1838    case nir_intrinsic_load_tess_level_inner:
1839    case nir_intrinsic_load_tess_level_outer:
1840    case nir_intrinsic_load_vertex_id:
1841    case nir_intrinsic_load_work_group_id: {
1842       const DataType dType = getDType(insn);
1843       SVSemantic sv = convert(op);
1844       LValues &newDefs = convert(&insn->dest);
1845
1846       for (uint8_t i = 0u; i < nir_intrinsic_dest_components(insn); ++i) {
1847          Value *def;
1848          if (typeSizeof(dType) == 8)
1849             def = getSSA();
1850          else
1851             def = newDefs[i];
1852
1853          if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
1854             loadImm(def, 0u);
1855          } else {
1856             Symbol *sym = mkSysVal(sv, i);
1857             Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
1858             if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
1859                rdsv->perPatch = 1;
1860          }
1861
1862          if (typeSizeof(dType) == 8)
1863             mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
1864       }
1865       break;
1866    }
1867    // constants
1868    case nir_intrinsic_load_subgroup_size: {
1869       LValues &newDefs = convert(&insn->dest);
1870       loadImm(newDefs[0], 32u);
1871       break;
1872    }
1873    case nir_intrinsic_vote_all:
1874    case nir_intrinsic_vote_any:
1875    case nir_intrinsic_vote_ieq: {
1876       LValues &newDefs = convert(&insn->dest);
1877       Value *pred = getScratch(1, FILE_PREDICATE);
1878       mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1879       mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
1880       mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
1881       break;
1882    }
1883    case nir_intrinsic_ballot: {
1884       LValues &newDefs = convert(&insn->dest);
1885       Value *pred = getSSA(1, FILE_PREDICATE);
1886       mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1887       mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
1888       break;
1889    }
1890    case nir_intrinsic_read_first_invocation:
1891    case nir_intrinsic_read_invocation: {
1892       LValues &newDefs = convert(&insn->dest);
1893       const DataType dType = getDType(insn);
1894       Value *tmp = getScratch();
1895
1896       if (op == nir_intrinsic_read_first_invocation) {
1897          mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
1898          mkOp1(OP_BREV, TYPE_U32, tmp, tmp);
1899          mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
1900       } else
1901          tmp = getSrc(&insn->src[1], 0);
1902
1903       for (uint8_t i = 0; i < dest_components; ++i) {
1904          mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
1905             ->subOp = NV50_IR_SUBOP_SHFL_IDX;
1906       }
1907       break;
1908    }
1909    case nir_intrinsic_load_per_vertex_input: {
1910       const DataType dType = getDType(insn);
1911       LValues &newDefs = convert(&insn->dest);
1912       Value *indirectVertex;
1913       Value *indirectOffset;
1914       uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
1915       uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
1916
1917       Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
1918                               mkImm(baseVertex), indirectVertex);
1919       for (uint8_t i = 0u; i < dest_components; ++i) {
1920          uint32_t address = getSlotAddress(insn, idx, i);
1921          loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
1922                   indirectOffset, vtxBase, info->in[idx].patch);
1923       }
1924       break;
1925    }
1926    case nir_intrinsic_load_per_vertex_output: {
1927       const DataType dType = getDType(insn);
1928       LValues &newDefs = convert(&insn->dest);
1929       Value *indirectVertex;
1930       Value *indirectOffset;
1931       uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
1932       uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
1933       Value *vtxBase = NULL;
1934
1935       if (indirectVertex)
1936          vtxBase = indirectVertex;
1937       else
1938          vtxBase = loadImm(NULL, baseVertex);
1939
1940       vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
1941
1942       for (uint8_t i = 0u; i < dest_components; ++i) {
1943          uint32_t address = getSlotAddress(insn, idx, i);
1944          loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
1945                   indirectOffset, vtxBase, info->in[idx].patch);
1946       }
1947       break;
1948    }
1949    case nir_intrinsic_emit_vertex: {
1950       if (info->io.genUserClip > 0)
1951          handleUserClipPlanes();
1952       uint32_t idx = nir_intrinsic_stream_id(insn);
1953       mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
1954       break;
1955    }
1956    case nir_intrinsic_end_primitive: {
1957       uint32_t idx = nir_intrinsic_stream_id(insn);
1958       if (idx)
1959          break;
1960       mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
1961       break;
1962    }
1963    case nir_intrinsic_load_ubo: {
1964       const DataType dType = getDType(insn);
1965       LValues &newDefs = convert(&insn->dest);
1966       Value *indirectIndex;
1967       Value *indirectOffset;
1968       uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
1969       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
1970
1971       for (uint8_t i = 0u; i < dest_components; ++i) {
1972          loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
1973                   indirectOffset, indirectIndex);
1974       }
1975       break;
1976    }
1977    case nir_intrinsic_get_buffer_size: {
1978       LValues &newDefs = convert(&insn->dest);
1979       const DataType dType = getDType(insn);
1980       Value *indirectBuffer;
1981       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
1982
1983       Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
1984       mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
1985       break;
1986    }
1987    case nir_intrinsic_store_ssbo: {
1988       DataType sType = getSType(insn->src[0], false, false);
1989       Value *indirectBuffer;
1990       Value *indirectOffset;
1991       uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
1992       uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
1993
1994       for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
1995          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1996             continue;
1997          Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
1998                                 offset + i * typeSizeof(sType));
1999          mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))
2000             ->setIndirect(0, 1, indirectBuffer);
2001       }
2002       info->io.globalAccess |= 0x2;
2003       break;
2004    }
2005    case nir_intrinsic_load_ssbo: {
2006       const DataType dType = getDType(insn);
2007       LValues &newDefs = convert(&insn->dest);
2008       Value *indirectBuffer;
2009       Value *indirectOffset;
2010       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2011       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2012
2013       for (uint8_t i = 0u; i < dest_components; ++i)
2014          loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
2015                   indirectOffset, indirectBuffer);
2016
2017       info->io.globalAccess |= 0x1;
2018       break;
2019    }
2020    case nir_intrinsic_shared_atomic_add:
2021    case nir_intrinsic_shared_atomic_and:
2022    case nir_intrinsic_shared_atomic_comp_swap:
2023    case nir_intrinsic_shared_atomic_exchange:
2024    case nir_intrinsic_shared_atomic_or:
2025    case nir_intrinsic_shared_atomic_imax:
2026    case nir_intrinsic_shared_atomic_imin:
2027    case nir_intrinsic_shared_atomic_umax:
2028    case nir_intrinsic_shared_atomic_umin:
2029    case nir_intrinsic_shared_atomic_xor: {
2030       const DataType dType = getDType(insn);
2031       LValues &newDefs = convert(&insn->dest);
2032       Value *indirectOffset;
2033       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2034       Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2035       Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2036       if (op == nir_intrinsic_shared_atomic_comp_swap)
2037          atom->setSrc(2, getSrc(&insn->src[2], 0));
2038       atom->setIndirect(0, 0, indirectOffset);
2039       atom->subOp = getSubOp(op);
2040       break;
2041    }
2042    case nir_intrinsic_ssbo_atomic_add:
2043    case nir_intrinsic_ssbo_atomic_and:
2044    case nir_intrinsic_ssbo_atomic_comp_swap:
2045    case nir_intrinsic_ssbo_atomic_exchange:
2046    case nir_intrinsic_ssbo_atomic_or:
2047    case nir_intrinsic_ssbo_atomic_imax:
2048    case nir_intrinsic_ssbo_atomic_imin:
2049    case nir_intrinsic_ssbo_atomic_umax:
2050    case nir_intrinsic_ssbo_atomic_umin:
2051    case nir_intrinsic_ssbo_atomic_xor: {
2052       const DataType dType = getDType(insn);
2053       LValues &newDefs = convert(&insn->dest);
2054       Value *indirectBuffer;
2055       Value *indirectOffset;
2056       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2057       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2058
2059       Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2060       Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2061                                 getSrc(&insn->src[2], 0));
2062       if (op == nir_intrinsic_ssbo_atomic_comp_swap)
2063          atom->setSrc(2, getSrc(&insn->src[3], 0));
2064       atom->setIndirect(0, 0, indirectOffset);
2065       atom->setIndirect(0, 1, indirectBuffer);
2066       atom->subOp = getSubOp(op);
2067
2068       info->io.globalAccess |= 0x2;
2069       break;
2070    }
2071    case nir_intrinsic_global_atomic_add:
2072    case nir_intrinsic_global_atomic_and:
2073    case nir_intrinsic_global_atomic_comp_swap:
2074    case nir_intrinsic_global_atomic_exchange:
2075    case nir_intrinsic_global_atomic_or:
2076    case nir_intrinsic_global_atomic_imax:
2077    case nir_intrinsic_global_atomic_imin:
2078    case nir_intrinsic_global_atomic_umax:
2079    case nir_intrinsic_global_atomic_umin:
2080    case nir_intrinsic_global_atomic_xor: {
2081       const DataType dType = getDType(insn);
2082       LValues &newDefs = convert(&insn->dest);
2083       Value *address;
2084       uint32_t offset = getIndirect(&insn->src[0], 0, address);
2085
2086       Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, dType, offset);
2087       Instruction *atom =
2088          mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2089       atom->setIndirect(0, 0, address);
2090       atom->subOp = getSubOp(op);
2091
2092       info->io.globalAccess |= 0x2;
2093       break;
2094    }
2095    case nir_intrinsic_bindless_image_atomic_add:
2096    case nir_intrinsic_bindless_image_atomic_and:
2097    case nir_intrinsic_bindless_image_atomic_comp_swap:
2098    case nir_intrinsic_bindless_image_atomic_exchange:
2099    case nir_intrinsic_bindless_image_atomic_imax:
2100    case nir_intrinsic_bindless_image_atomic_umax:
2101    case nir_intrinsic_bindless_image_atomic_imin:
2102    case nir_intrinsic_bindless_image_atomic_umin:
2103    case nir_intrinsic_bindless_image_atomic_or:
2104    case nir_intrinsic_bindless_image_atomic_xor:
2105    case nir_intrinsic_bindless_image_atomic_inc_wrap:
2106    case nir_intrinsic_bindless_image_atomic_dec_wrap:
2107    case nir_intrinsic_bindless_image_load:
2108    case nir_intrinsic_bindless_image_samples:
2109    case nir_intrinsic_bindless_image_size:
2110    case nir_intrinsic_bindless_image_store:
2111    case nir_intrinsic_image_atomic_add:
2112    case nir_intrinsic_image_atomic_and:
2113    case nir_intrinsic_image_atomic_comp_swap:
2114    case nir_intrinsic_image_atomic_exchange:
2115    case nir_intrinsic_image_atomic_imax:
2116    case nir_intrinsic_image_atomic_umax:
2117    case nir_intrinsic_image_atomic_imin:
2118    case nir_intrinsic_image_atomic_umin:
2119    case nir_intrinsic_image_atomic_or:
2120    case nir_intrinsic_image_atomic_xor:
2121    case nir_intrinsic_image_atomic_inc_wrap:
2122    case nir_intrinsic_image_atomic_dec_wrap:
2123    case nir_intrinsic_image_load:
2124    case nir_intrinsic_image_samples:
2125    case nir_intrinsic_image_size:
2126    case nir_intrinsic_image_store: {
2127       std::vector<Value*> srcs, defs;
2128       Value *indirect;
2129       DataType ty;
2130
2131       uint32_t mask = 0;
2132       TexInstruction::Target target =
2133          convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);
2134       unsigned int argCount = getNIRArgCount(target);
2135       uint16_t location = 0;
2136
2137       if (opInfo.has_dest) {
2138          LValues &newDefs = convert(&insn->dest);
2139          for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2140             defs.push_back(newDefs[i]);
2141             mask |= 1 << i;
2142          }
2143       }
2144
2145       int lod_src = -1;
2146       bool bindless = false;
2147       switch (op) {
2148       case nir_intrinsic_bindless_image_atomic_add:
2149       case nir_intrinsic_bindless_image_atomic_and:
2150       case nir_intrinsic_bindless_image_atomic_comp_swap:
2151       case nir_intrinsic_bindless_image_atomic_exchange:
2152       case nir_intrinsic_bindless_image_atomic_imax:
2153       case nir_intrinsic_bindless_image_atomic_umax:
2154       case nir_intrinsic_bindless_image_atomic_imin:
2155       case nir_intrinsic_bindless_image_atomic_umin:
2156       case nir_intrinsic_bindless_image_atomic_or:
2157       case nir_intrinsic_bindless_image_atomic_xor:
2158       case nir_intrinsic_bindless_image_atomic_inc_wrap:
2159       case nir_intrinsic_bindless_image_atomic_dec_wrap:
2160          ty = getDType(insn);
2161          bindless = true;
2162          info->io.globalAccess |= 0x2;
2163          mask = 0x1;
2164          break;
2165       case nir_intrinsic_image_atomic_add:
2166       case nir_intrinsic_image_atomic_and:
2167       case nir_intrinsic_image_atomic_comp_swap:
2168       case nir_intrinsic_image_atomic_exchange:
2169       case nir_intrinsic_image_atomic_imax:
2170       case nir_intrinsic_image_atomic_umax:
2171       case nir_intrinsic_image_atomic_imin:
2172       case nir_intrinsic_image_atomic_umin:
2173       case nir_intrinsic_image_atomic_or:
2174       case nir_intrinsic_image_atomic_xor:
2175       case nir_intrinsic_image_atomic_inc_wrap:
2176       case nir_intrinsic_image_atomic_dec_wrap:
2177          ty = getDType(insn);
2178          bindless = false;
2179          info->io.globalAccess |= 0x2;
2180          mask = 0x1;
2181          break;
2182       case nir_intrinsic_bindless_image_load:
2183       case nir_intrinsic_image_load:
2184          ty = TYPE_U32;
2185          bindless = op == nir_intrinsic_bindless_image_load;
2186          info->io.globalAccess |= 0x1;
2187          lod_src = 4;
2188          break;
2189       case nir_intrinsic_bindless_image_store:
2190       case nir_intrinsic_image_store:
2191          ty = TYPE_U32;
2192          bindless = op == nir_intrinsic_bindless_image_store;
2193          info->io.globalAccess |= 0x2;
2194          lod_src = 5;
2195          mask = 0xf;
2196          break;
2197       case nir_intrinsic_bindless_image_samples:
2198       case nir_intrinsic_image_samples:
2199          ty = TYPE_U32;
2200          bindless = op == nir_intrinsic_bindless_image_samples;
2201          mask = 0x8;
2202          break;
2203       case nir_intrinsic_bindless_image_size:
2204       case nir_intrinsic_image_size:
2205          ty = TYPE_U32;
2206          bindless = op == nir_intrinsic_bindless_image_size;
2207          break;
2208       default:
2209          unreachable("unhandled image opcode");
2210          break;
2211       }
2212
2213       if (bindless)
2214          indirect = getSrc(&insn->src[0], 0);
2215       else
2216          location = getIndirect(&insn->src[0], 0, indirect);
2217
2218       // coords
2219       if (opInfo.num_srcs >= 2)
2220          for (unsigned int i = 0u; i < argCount; ++i)
2221             srcs.push_back(getSrc(&insn->src[1], i));
2222
2223       // the sampler is just another src added after coords
2224       if (opInfo.num_srcs >= 3 && target.isMS())
2225          srcs.push_back(getSrc(&insn->src[2], 0));
2226
2227       if (opInfo.num_srcs >= 4 && lod_src != 4) {
2228          unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2229          for (uint8_t i = 0u; i < components; ++i)
2230             srcs.push_back(getSrc(&insn->src[3], i));
2231       }
2232
2233       if (opInfo.num_srcs >= 5 && lod_src != 5)
2234          // 1 for aotmic swap
2235          for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2236             srcs.push_back(getSrc(&insn->src[4], i));
2237
2238       TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2239       texi->tex.bindless = bindless;
2240       texi->tex.format = nv50_ir::TexInstruction::translateImgFormat(nir_intrinsic_format(insn));
2241       texi->tex.mask = mask;
2242       texi->cache = convert(nir_intrinsic_access(insn));
2243       texi->setType(ty);
2244       texi->subOp = getSubOp(op);
2245
2246       if (indirect)
2247          texi->setIndirectR(indirect);
2248
2249       break;
2250    }
2251    case nir_intrinsic_store_shared: {
2252       DataType sType = getSType(insn->src[0], false, false);
2253       Value *indirectOffset;
2254       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2255
2256       for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
2257          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2258             continue;
2259          Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
2260          mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2261       }
2262       break;
2263    }
2264    case nir_intrinsic_load_shared: {
2265       const DataType dType = getDType(insn);
2266       LValues &newDefs = convert(&insn->dest);
2267       Value *indirectOffset;
2268       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2269
2270       for (uint8_t i = 0u; i < dest_components; ++i)
2271          loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
2272
2273       break;
2274    }
2275    case nir_intrinsic_control_barrier: {
2276       // TODO: add flag to shader_info
2277       info->numBarriers = 1;
2278       Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2279       bar->fixed = 1;
2280       bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2281       break;
2282    }
2283    case nir_intrinsic_group_memory_barrier:
2284    case nir_intrinsic_memory_barrier:
2285    case nir_intrinsic_memory_barrier_buffer:
2286    case nir_intrinsic_memory_barrier_image:
2287    case nir_intrinsic_memory_barrier_shared: {
2288       Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2289       bar->fixed = 1;
2290       bar->subOp = getSubOp(op);
2291       break;
2292    }
2293    case nir_intrinsic_memory_barrier_tcs_patch:
2294       break;
2295    case nir_intrinsic_shader_clock: {
2296       const DataType dType = getDType(insn);
2297       LValues &newDefs = convert(&insn->dest);
2298
2299       loadImm(newDefs[0], 0u);
2300       mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2301       break;
2302    }
2303    case nir_intrinsic_load_global: {
2304       const DataType dType = getDType(insn);
2305       LValues &newDefs = convert(&insn->dest);
2306       Value *indirectOffset;
2307       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2308
2309       for (auto i = 0u; i < dest_components; ++i)
2310          loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset);
2311
2312       info->io.globalAccess |= 0x1;
2313       break;
2314    }
2315    case nir_intrinsic_store_global: {
2316       DataType sType = getSType(insn->src[0], false, false);
2317
2318       for (auto i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
2319          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2320             continue;
2321          if (typeSizeof(sType) == 8) {
2322             Value *split[2];
2323             mkSplit(split, 4, getSrc(&insn->src[0], i));
2324
2325             Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType));
2326             mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[0]);
2327
2328             sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType) + 4);
2329             mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[1]);
2330          } else {
2331             Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, sType, i * typeSizeof(sType));
2332             mkStore(OP_STORE, sType, sym, getSrc(&insn->src[1], 0), getSrc(&insn->src[0], i));
2333          }
2334       }
2335
2336       info->io.globalAccess |= 0x2;
2337       break;
2338    }
2339    default:
2340       ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2341       return false;
2342    }
2343
2344    return true;
2345 }
2346
2347 bool
2348 Converter::visit(nir_jump_instr *insn)
2349 {
2350    switch (insn->type) {
2351    case nir_jump_return:
2352       // TODO: this only works in the main function
2353       mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2354       bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2355       break;
2356    case nir_jump_break:
2357    case nir_jump_continue: {
2358       bool isBreak = insn->type == nir_jump_break;
2359       nir_block *block = insn->instr.block;
2360       assert(!block->successors[1]);
2361       BasicBlock *target = convert(block->successors[0]);
2362       mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2363       bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2364       break;
2365    }
2366    default:
2367       ERROR("unknown nir_jump_type %u\n", insn->type);
2368       return false;
2369    }
2370
2371    return true;
2372 }
2373
2374 Value*
2375 Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2376 {
2377    Value *val;
2378
2379    if (immInsertPos)
2380       setPosition(immInsertPos, true);
2381    else
2382       setPosition(bb, false);
2383
2384    switch (insn->def.bit_size) {
2385    case 64:
2386       val = loadImm(getSSA(8), insn->value[idx].u64);
2387       break;
2388    case 32:
2389       val = loadImm(getSSA(4), insn->value[idx].u32);
2390       break;
2391    case 16:
2392       val = loadImm(getSSA(2), insn->value[idx].u16);
2393       break;
2394    case 8:
2395       val = loadImm(getSSA(1), insn->value[idx].u8);
2396       break;
2397    default:
2398       unreachable("unhandled bit size!\n");
2399    }
2400    setPosition(bb, true);
2401    return val;
2402 }
2403
2404 bool
2405 Converter::visit(nir_load_const_instr *insn)
2406 {
2407    assert(insn->def.bit_size <= 64);
2408    immediates[insn->def.index] = insn;
2409    return true;
2410 }
2411
2412 #define DEFAULT_CHECKS \
2413       if (insn->dest.dest.ssa.num_components > 1) { \
2414          ERROR("nir_alu_instr only supported with 1 component!\n"); \
2415          return false; \
2416       } \
2417       if (insn->dest.write_mask != 1) { \
2418          ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2419          return false; \
2420       }
2421 bool
2422 Converter::visit(nir_alu_instr *insn)
2423 {
2424    const nir_op op = insn->op;
2425    const nir_op_info &info = nir_op_infos[op];
2426    DataType dType = getDType(insn);
2427    const std::vector<DataType> sTypes = getSTypes(insn);
2428
2429    Instruction *oldPos = this->bb->getExit();
2430
2431    switch (op) {
2432    case nir_op_fabs:
2433    case nir_op_iabs:
2434    case nir_op_fadd:
2435    case nir_op_iadd:
2436    case nir_op_iand:
2437    case nir_op_fceil:
2438    case nir_op_fcos:
2439    case nir_op_fddx:
2440    case nir_op_fddx_coarse:
2441    case nir_op_fddx_fine:
2442    case nir_op_fddy:
2443    case nir_op_fddy_coarse:
2444    case nir_op_fddy_fine:
2445    case nir_op_fdiv:
2446    case nir_op_idiv:
2447    case nir_op_udiv:
2448    case nir_op_fexp2:
2449    case nir_op_ffloor:
2450    case nir_op_ffma:
2451    case nir_op_flog2:
2452    case nir_op_fmax:
2453    case nir_op_imax:
2454    case nir_op_umax:
2455    case nir_op_fmin:
2456    case nir_op_imin:
2457    case nir_op_umin:
2458    case nir_op_fmod:
2459    case nir_op_imod:
2460    case nir_op_umod:
2461    case nir_op_fmul:
2462    case nir_op_imul:
2463    case nir_op_imul_high:
2464    case nir_op_umul_high:
2465    case nir_op_fneg:
2466    case nir_op_ineg:
2467    case nir_op_inot:
2468    case nir_op_ior:
2469    case nir_op_pack_64_2x32_split:
2470    case nir_op_fpow:
2471    case nir_op_frcp:
2472    case nir_op_frem:
2473    case nir_op_irem:
2474    case nir_op_frsq:
2475    case nir_op_fsat:
2476    case nir_op_ishr:
2477    case nir_op_ushr:
2478    case nir_op_fsin:
2479    case nir_op_fsqrt:
2480    case nir_op_ftrunc:
2481    case nir_op_ishl:
2482    case nir_op_ixor: {
2483       DEFAULT_CHECKS;
2484       LValues &newDefs = convert(&insn->dest);
2485       operation preOp = preOperationNeeded(op);
2486       if (preOp != OP_NOP) {
2487          assert(info.num_inputs < 2);
2488          Value *tmp = getSSA(typeSizeof(dType));
2489          Instruction *i0 = mkOp(preOp, dType, tmp);
2490          Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2491          if (info.num_inputs) {
2492             i0->setSrc(0, getSrc(&insn->src[0]));
2493             i1->setSrc(0, tmp);
2494          }
2495          i1->subOp = getSubOp(op);
2496       } else {
2497          Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2498          for (unsigned s = 0u; s < info.num_inputs; ++s) {
2499             i->setSrc(s, getSrc(&insn->src[s]));
2500          }
2501          i->subOp = getSubOp(op);
2502       }
2503       break;
2504    }
2505    case nir_op_ifind_msb:
2506    case nir_op_ufind_msb: {
2507       DEFAULT_CHECKS;
2508       LValues &newDefs = convert(&insn->dest);
2509       dType = sTypes[0];
2510       mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2511       break;
2512    }
2513    case nir_op_fround_even: {
2514       DEFAULT_CHECKS;
2515       LValues &newDefs = convert(&insn->dest);
2516       mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2517       break;
2518    }
2519    // convert instructions
2520    case nir_op_f2f32:
2521    case nir_op_f2i32:
2522    case nir_op_f2u32:
2523    case nir_op_i2f32:
2524    case nir_op_i2i32:
2525    case nir_op_u2f32:
2526    case nir_op_u2u32:
2527    case nir_op_f2f64:
2528    case nir_op_f2i64:
2529    case nir_op_f2u64:
2530    case nir_op_i2f64:
2531    case nir_op_i2i64:
2532    case nir_op_u2f64:
2533    case nir_op_u2u64: {
2534       DEFAULT_CHECKS;
2535       LValues &newDefs = convert(&insn->dest);
2536       Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2537       if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2538          i->rnd = ROUND_Z;
2539       i->sType = sTypes[0];
2540       break;
2541    }
2542    // compare instructions
2543    case nir_op_feq32:
2544    case nir_op_ieq32:
2545    case nir_op_fge32:
2546    case nir_op_ige32:
2547    case nir_op_uge32:
2548    case nir_op_flt32:
2549    case nir_op_ilt32:
2550    case nir_op_ult32:
2551    case nir_op_fne32:
2552    case nir_op_ine32: {
2553       DEFAULT_CHECKS;
2554       LValues &newDefs = convert(&insn->dest);
2555       Instruction *i = mkCmp(getOperation(op),
2556                              getCondCode(op),
2557                              dType,
2558                              newDefs[0],
2559                              dType,
2560                              getSrc(&insn->src[0]),
2561                              getSrc(&insn->src[1]));
2562       if (info.num_inputs == 3)
2563          i->setSrc(2, getSrc(&insn->src[2]));
2564       i->sType = sTypes[0];
2565       break;
2566    }
2567    // those are weird ALU ops and need special handling, because
2568    //   1. they are always componend based
2569    //   2. they basically just merge multiple values into one data type
2570    case nir_op_mov:
2571       if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) {
2572          nir_reg_dest& reg = insn->dest.dest.reg;
2573          uint32_t goffset = regToLmemOffset[reg.reg->index];
2574          uint8_t comps = reg.reg->num_components;
2575          uint8_t size = reg.reg->bit_size / 8;
2576          uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2577          uint32_t aoffset = csize * reg.base_offset;
2578          Value *indirect = NULL;
2579
2580          if (reg.indirect)
2581             indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
2582                               getSrc(reg.indirect, 0), mkImm(csize));
2583
2584          for (uint8_t i = 0u; i < comps; ++i) {
2585             if (!((1u << i) & insn->dest.write_mask))
2586                continue;
2587
2588             Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size);
2589             mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
2590          }
2591          break;
2592       } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) {
2593          LValues &newDefs = convert(&insn->dest);
2594          nir_reg_src& reg = insn->src[0].src.reg;
2595          uint32_t goffset = regToLmemOffset[reg.reg->index];
2596          // uint8_t comps = reg.reg->num_components;
2597          uint8_t size = reg.reg->bit_size / 8;
2598          uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2599          uint32_t aoffset = csize * reg.base_offset;
2600          Value *indirect = NULL;
2601
2602          if (reg.indirect)
2603             indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize));
2604
2605          for (uint8_t i = 0u; i < newDefs.size(); ++i)
2606             loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect);
2607
2608          break;
2609       } else {
2610          LValues &newDefs = convert(&insn->dest);
2611          for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2612             mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2613          }
2614       }
2615       break;
2616    case nir_op_vec2:
2617    case nir_op_vec3:
2618    case nir_op_vec4:
2619    case nir_op_vec8:
2620    case nir_op_vec16: {
2621       LValues &newDefs = convert(&insn->dest);
2622       for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2623          mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2624       }
2625       break;
2626    }
2627    // (un)pack
2628    case nir_op_pack_64_2x32: {
2629       LValues &newDefs = convert(&insn->dest);
2630       Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2631       merge->setSrc(0, getSrc(&insn->src[0], 0));
2632       merge->setSrc(1, getSrc(&insn->src[0], 1));
2633       break;
2634    }
2635    case nir_op_pack_half_2x16_split: {
2636       LValues &newDefs = convert(&insn->dest);
2637       Value *tmpH = getSSA();
2638       Value *tmpL = getSSA();
2639
2640       mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2641       mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2642       mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2643       break;
2644    }
2645    case nir_op_unpack_half_2x16_split_x:
2646    case nir_op_unpack_half_2x16_split_y: {
2647       LValues &newDefs = convert(&insn->dest);
2648       Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2649       if (op == nir_op_unpack_half_2x16_split_y)
2650          cvt->subOp = 1;
2651       break;
2652    }
2653    case nir_op_unpack_64_2x32: {
2654       LValues &newDefs = convert(&insn->dest);
2655       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2656       break;
2657    }
2658    case nir_op_unpack_64_2x32_split_x: {
2659       LValues &newDefs = convert(&insn->dest);
2660       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2661       break;
2662    }
2663    case nir_op_unpack_64_2x32_split_y: {
2664       LValues &newDefs = convert(&insn->dest);
2665       mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2666       break;
2667    }
2668    // special instructions
2669    case nir_op_fsign:
2670    case nir_op_isign: {
2671       DEFAULT_CHECKS;
2672       DataType iType;
2673       if (::isFloatType(dType))
2674          iType = TYPE_F32;
2675       else
2676          iType = TYPE_S32;
2677
2678       LValues &newDefs = convert(&insn->dest);
2679       LValue *val0 = getScratch();
2680       LValue *val1 = getScratch();
2681       mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2682       mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2683
2684       if (dType == TYPE_F64) {
2685          mkOp2(OP_SUB, iType, val0, val0, val1);
2686          mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2687       } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2688          mkOp2(OP_SUB, iType, val0, val1, val0);
2689          mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2690          mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2691       } else if (::isFloatType(dType))
2692          mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2693       else
2694          mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2695       break;
2696    }
2697    case nir_op_fcsel:
2698    case nir_op_b32csel: {
2699       DEFAULT_CHECKS;
2700       LValues &newDefs = convert(&insn->dest);
2701       mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2702       break;
2703    }
2704    case nir_op_ibitfield_extract:
2705    case nir_op_ubitfield_extract: {
2706       DEFAULT_CHECKS;
2707       Value *tmp = getSSA();
2708       LValues &newDefs = convert(&insn->dest);
2709       mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2710       mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2711       break;
2712    }
2713    case nir_op_bfm: {
2714       DEFAULT_CHECKS;
2715       LValues &newDefs = convert(&insn->dest);
2716       mkOp2(OP_BMSK, dType, newDefs[0], getSrc(&insn->src[1]), getSrc(&insn->src[0]))->subOp = NV50_IR_SUBOP_BMSK_W;
2717       break;
2718    }
2719    case nir_op_bitfield_insert: {
2720       DEFAULT_CHECKS;
2721       LValues &newDefs = convert(&insn->dest);
2722       LValue *temp = getSSA();
2723       mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2724       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2725       break;
2726    }
2727    case nir_op_bit_count: {
2728       DEFAULT_CHECKS;
2729       LValues &newDefs = convert(&insn->dest);
2730       mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
2731       break;
2732    }
2733    case nir_op_bitfield_reverse: {
2734       DEFAULT_CHECKS;
2735       LValues &newDefs = convert(&insn->dest);
2736       mkOp1(OP_BREV, TYPE_U32, newDefs[0], getSrc(&insn->src[0]));
2737       break;
2738    }
2739    case nir_op_find_lsb: {
2740       DEFAULT_CHECKS;
2741       LValues &newDefs = convert(&insn->dest);
2742       Value *tmp = getSSA();
2743       mkOp1(OP_BREV, TYPE_U32, tmp, getSrc(&insn->src[0]));
2744       mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2745       break;
2746    }
2747    case nir_op_extract_u8: {
2748       DEFAULT_CHECKS;
2749       LValues &newDefs = convert(&insn->dest);
2750       Value *prmt = getSSA();
2751       mkOp2(OP_OR, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x4440));
2752       mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2753       break;
2754    }
2755    case nir_op_extract_i8: {
2756       DEFAULT_CHECKS;
2757       LValues &newDefs = convert(&insn->dest);
2758       Value *prmt = getSSA();
2759       mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x1111), loadImm(NULL, 0x8880));
2760       mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2761       break;
2762    }
2763    case nir_op_extract_u16: {
2764       DEFAULT_CHECKS;
2765       LValues &newDefs = convert(&insn->dest);
2766       Value *prmt = getSSA();
2767       mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x22), loadImm(NULL, 0x4410));
2768       mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2769       break;
2770    }
2771    case nir_op_extract_i16: {
2772       DEFAULT_CHECKS;
2773       LValues &newDefs = convert(&insn->dest);
2774       Value *prmt = getSSA();
2775       mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x2222), loadImm(NULL, 0x9910));
2776       mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2777       break;
2778    }
2779    case nir_op_urol: {
2780       DEFAULT_CHECKS;
2781       LValues &newDefs = convert(&insn->dest);
2782       mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),
2783             getSrc(&insn->src[1]), getSrc(&insn->src[0]))
2784          ->subOp = NV50_IR_SUBOP_SHF_L |
2785                    NV50_IR_SUBOP_SHF_W |
2786                    NV50_IR_SUBOP_SHF_HI;
2787       break;
2788    }
2789    case nir_op_uror: {
2790       DEFAULT_CHECKS;
2791       LValues &newDefs = convert(&insn->dest);
2792       mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),
2793             getSrc(&insn->src[1]), getSrc(&insn->src[0]))
2794          ->subOp = NV50_IR_SUBOP_SHF_R |
2795                    NV50_IR_SUBOP_SHF_W |
2796                    NV50_IR_SUBOP_SHF_LO;
2797       break;
2798    }
2799    // boolean conversions
2800    case nir_op_b2f32: {
2801       DEFAULT_CHECKS;
2802       LValues &newDefs = convert(&insn->dest);
2803       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
2804       break;
2805    }
2806    case nir_op_b2f64: {
2807       DEFAULT_CHECKS;
2808       LValues &newDefs = convert(&insn->dest);
2809       Value *tmp = getSSA(4);
2810       mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
2811       mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
2812       break;
2813    }
2814    case nir_op_f2b32:
2815    case nir_op_i2b32: {
2816       DEFAULT_CHECKS;
2817       LValues &newDefs = convert(&insn->dest);
2818       Value *src1;
2819       if (typeSizeof(sTypes[0]) == 8) {
2820          src1 = loadImm(getSSA(8), 0.0);
2821       } else {
2822          src1 = zero;
2823       }
2824       CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
2825       mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
2826       break;
2827    }
2828    case nir_op_b2i32: {
2829       DEFAULT_CHECKS;
2830       LValues &newDefs = convert(&insn->dest);
2831       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
2832       break;
2833    }
2834    case nir_op_b2i64: {
2835       DEFAULT_CHECKS;
2836       LValues &newDefs = convert(&insn->dest);
2837       LValue *def = getScratch();
2838       mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
2839       mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
2840       break;
2841    }
2842    default:
2843       ERROR("unknown nir_op %s\n", info.name);
2844       return false;
2845    }
2846
2847    if (!oldPos) {
2848       oldPos = this->bb->getEntry();
2849       oldPos->precise = insn->exact;
2850    }
2851
2852    if (unlikely(!oldPos))
2853       return true;
2854
2855    while (oldPos->next) {
2856       oldPos = oldPos->next;
2857       oldPos->precise = insn->exact;
2858    }
2859    oldPos->saturate = insn->dest.saturate;
2860
2861    return true;
2862 }
2863 #undef DEFAULT_CHECKS
2864
2865 bool
2866 Converter::visit(nir_ssa_undef_instr *insn)
2867 {
2868    LValues &newDefs = convert(&insn->def);
2869    for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
2870       mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
2871    }
2872    return true;
2873 }
2874
2875 #define CASE_SAMPLER(ty) \
2876    case GLSL_SAMPLER_DIM_ ## ty : \
2877       if (isArray && !isShadow) \
2878          return TEX_TARGET_ ## ty ## _ARRAY; \
2879       else if (!isArray && isShadow) \
2880          return TEX_TARGET_## ty ## _SHADOW; \
2881       else if (isArray && isShadow) \
2882          return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
2883       else \
2884          return TEX_TARGET_ ## ty
2885
2886 TexTarget
2887 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
2888 {
2889    switch (dim) {
2890    CASE_SAMPLER(1D);
2891    CASE_SAMPLER(2D);
2892    CASE_SAMPLER(CUBE);
2893    case GLSL_SAMPLER_DIM_3D:
2894       return TEX_TARGET_3D;
2895    case GLSL_SAMPLER_DIM_MS:
2896       if (isArray)
2897          return TEX_TARGET_2D_MS_ARRAY;
2898       return TEX_TARGET_2D_MS;
2899    case GLSL_SAMPLER_DIM_RECT:
2900       if (isShadow)
2901          return TEX_TARGET_RECT_SHADOW;
2902       return TEX_TARGET_RECT;
2903    case GLSL_SAMPLER_DIM_BUF:
2904       return TEX_TARGET_BUFFER;
2905    case GLSL_SAMPLER_DIM_EXTERNAL:
2906       return TEX_TARGET_2D;
2907    default:
2908       ERROR("unknown glsl_sampler_dim %u\n", dim);
2909       assert(false);
2910       return TEX_TARGET_COUNT;
2911    }
2912 }
2913 #undef CASE_SAMPLER
2914
2915 Value*
2916 Converter::applyProjection(Value *src, Value *proj)
2917 {
2918    if (!proj)
2919       return src;
2920    return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
2921 }
2922
2923 unsigned int
2924 Converter::getNIRArgCount(TexInstruction::Target& target)
2925 {
2926    unsigned int result = target.getArgCount();
2927    if (target.isCube() && target.isArray())
2928       result--;
2929    if (target.isMS())
2930       result--;
2931    return result;
2932 }
2933
2934 CacheMode
2935 Converter::convert(enum gl_access_qualifier access)
2936 {
2937    switch (access) {
2938    case ACCESS_VOLATILE:
2939       return CACHE_CV;
2940    case ACCESS_COHERENT:
2941       return CACHE_CG;
2942    default:
2943       return CACHE_CA;
2944    }
2945 }
2946
2947 bool
2948 Converter::visit(nir_tex_instr *insn)
2949 {
2950    switch (insn->op) {
2951    case nir_texop_lod:
2952    case nir_texop_query_levels:
2953    case nir_texop_tex:
2954    case nir_texop_texture_samples:
2955    case nir_texop_tg4:
2956    case nir_texop_txb:
2957    case nir_texop_txd:
2958    case nir_texop_txf:
2959    case nir_texop_txf_ms:
2960    case nir_texop_txl:
2961    case nir_texop_txs: {
2962       LValues &newDefs = convert(&insn->dest);
2963       std::vector<Value*> srcs;
2964       std::vector<Value*> defs;
2965       std::vector<nir_src*> offsets;
2966       uint8_t mask = 0;
2967       bool lz = false;
2968       Value *proj = NULL;
2969       TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
2970       operation op = getOperation(insn->op);
2971
2972       int r, s;
2973       int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
2974       int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
2975       int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
2976       int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
2977       int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
2978       int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
2979       int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
2980       int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
2981       int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
2982       int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
2983       int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
2984       int sampHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_handle);
2985       int texHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_handle);
2986
2987       bool bindless = sampHandleIdx != -1 || texHandleIdx != -1;
2988       assert((sampHandleIdx != -1) == (texHandleIdx != -1));
2989
2990       if (projIdx != -1)
2991          proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
2992
2993       srcs.resize(insn->coord_components);
2994       for (uint8_t i = 0u; i < insn->coord_components; ++i)
2995          srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
2996
2997       // sometimes we get less args than target.getArgCount, but codegen expects the latter
2998       if (insn->coord_components) {
2999          uint32_t argCount = target.getArgCount();
3000
3001          if (target.isMS())
3002             argCount -= 1;
3003
3004          for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
3005             srcs.push_back(getSSA());
3006       }
3007
3008       if (insn->op == nir_texop_texture_samples)
3009          srcs.push_back(zero);
3010       else if (!insn->num_srcs)
3011          srcs.push_back(loadImm(NULL, 0));
3012       if (biasIdx != -1)
3013          srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
3014       if (lodIdx != -1)
3015          srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
3016       else if (op == OP_TXF)
3017          lz = true;
3018       if (msIdx != -1)
3019          srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
3020       if (offsetIdx != -1)
3021          offsets.push_back(&insn->src[offsetIdx].src);
3022       if (compIdx != -1)
3023          srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
3024       if (texOffIdx != -1) {
3025          srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
3026          texOffIdx = srcs.size() - 1;
3027       }
3028       if (sampOffIdx != -1) {
3029          srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
3030          sampOffIdx = srcs.size() - 1;
3031       }
3032       if (bindless) {
3033          // currently we use the lower bits
3034          Value *split[2];
3035          Value *handle = getSrc(&insn->src[sampHandleIdx].src, 0);
3036
3037          mkSplit(split, 4, handle);
3038
3039          srcs.push_back(split[0]);
3040          texOffIdx = srcs.size() - 1;
3041       }
3042
3043       r = bindless ? 0xff : insn->texture_index;
3044       s = bindless ? 0x1f : insn->sampler_index;
3045
3046       defs.resize(newDefs.size());
3047       for (uint8_t d = 0u; d < newDefs.size(); ++d) {
3048          defs[d] = newDefs[d];
3049          mask |= 1 << d;
3050       }
3051       if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
3052          lz = true;
3053
3054       TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
3055       texi->tex.levelZero = lz;
3056       texi->tex.mask = mask;
3057       texi->tex.bindless = bindless;
3058
3059       if (texOffIdx != -1)
3060          texi->tex.rIndirectSrc = texOffIdx;
3061       if (sampOffIdx != -1)
3062          texi->tex.sIndirectSrc = sampOffIdx;
3063
3064       switch (insn->op) {
3065       case nir_texop_tg4:
3066          if (!target.isShadow())
3067             texi->tex.gatherComp = insn->component;
3068          break;
3069       case nir_texop_txs:
3070          texi->tex.query = TXQ_DIMS;
3071          break;
3072       case nir_texop_texture_samples:
3073          texi->tex.mask = 0x4;
3074          texi->tex.query = TXQ_TYPE;
3075          break;
3076       case nir_texop_query_levels:
3077          texi->tex.mask = 0x8;
3078          texi->tex.query = TXQ_DIMS;
3079          break;
3080       default:
3081          break;
3082       }
3083
3084       texi->tex.useOffsets = offsets.size();
3085       if (texi->tex.useOffsets) {
3086          for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
3087             for (uint32_t c = 0u; c < 3; ++c) {
3088                uint8_t s2 = std::min(c, target.getDim() - 1);
3089                texi->offset[s][c].set(getSrc(offsets[s], s2));
3090                texi->offset[s][c].setInsn(texi);
3091             }
3092          }
3093       }
3094
3095       if (op == OP_TXG && offsetIdx == -1) {
3096          if (nir_tex_instr_has_explicit_tg4_offsets(insn)) {
3097             texi->tex.useOffsets = 4;
3098             setPosition(texi, false);
3099             for (uint8_t i = 0; i < 4; ++i) {
3100                for (uint8_t j = 0; j < 2; ++j) {
3101                   texi->offset[i][j].set(loadImm(NULL, insn->tg4_offsets[i][j]));
3102                   texi->offset[i][j].setInsn(texi);
3103                }
3104             }
3105             setPosition(texi, true);
3106          }
3107       }
3108
3109       if (ddxIdx != -1 && ddyIdx != -1) {
3110          for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
3111             texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
3112             texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
3113          }
3114       }
3115
3116       break;
3117    }
3118    default:
3119       ERROR("unknown nir_texop %u\n", insn->op);
3120       return false;
3121    }
3122    return true;
3123 }
3124
3125 bool
3126 Converter::run()
3127 {
3128    bool progress;
3129
3130    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
3131       nir_print_shader(nir, stderr);
3132
3133    struct nir_lower_subgroups_options subgroup_options = {
3134       .subgroup_size = 32,
3135       .ballot_bit_size = 32,
3136    };
3137
3138    NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
3139    NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3140    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3141    NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
3142    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3143    NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
3144    NIR_PASS_V(nir, nir_lower_phis_to_scalar);
3145
3146    /*TODO: improve this lowering/optimisation loop so that we can use
3147     *      nir_opt_idiv_const effectively before this.
3148     */
3149    NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_precise);
3150
3151    do {
3152       progress = false;
3153       NIR_PASS(progress, nir, nir_copy_prop);
3154       NIR_PASS(progress, nir, nir_opt_remove_phis);
3155       NIR_PASS(progress, nir, nir_opt_trivial_continues);
3156       NIR_PASS(progress, nir, nir_opt_cse);
3157       NIR_PASS(progress, nir, nir_opt_algebraic);
3158       NIR_PASS(progress, nir, nir_opt_constant_folding);
3159       NIR_PASS(progress, nir, nir_copy_prop);
3160       NIR_PASS(progress, nir, nir_opt_dce);
3161       NIR_PASS(progress, nir, nir_opt_dead_cf);
3162    } while (progress);
3163
3164    NIR_PASS_V(nir, nir_lower_bool_to_int32);
3165    NIR_PASS_V(nir, nir_lower_locals_to_regs);
3166    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3167    NIR_PASS_V(nir, nir_convert_from_ssa, true);
3168
3169    // Garbage collect dead instructions
3170    nir_sweep(nir);
3171
3172    if (!parseNIR()) {
3173       ERROR("Couldn't prase NIR!\n");
3174       return false;
3175    }
3176
3177    if (!assignSlots()) {
3178       ERROR("Couldn't assign slots!\n");
3179       return false;
3180    }
3181
3182    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
3183       nir_print_shader(nir, stderr);
3184
3185    nir_foreach_function(function, nir) {
3186       if (!visit(function))
3187          return false;
3188    }
3189
3190    return true;
3191 }
3192
3193 } // unnamed namespace
3194
3195 namespace nv50_ir {
3196
3197 bool
3198 Program::makeFromNIR(struct nv50_ir_prog_info *info)
3199 {
3200    nir_shader *nir = (nir_shader*)info->bin.source;
3201    Converter converter(this, nir, info);
3202    bool result = converter.run();
3203    if (!result)
3204       return result;
3205    LoweringHelper lowering;
3206    lowering.run(this);
3207    tlsSize = info->bin.tlsSpace;
3208    return result;
3209 }
3210
3211 } // namespace nv50_ir
3212
3213 static nir_shader_compiler_options
3214 nvir_nir_shader_compiler_options(int chipset)
3215 {
3216    nir_shader_compiler_options op = {};
3217    op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET);
3218    op.lower_ffma = false;
3219    op.fuse_ffma = false; /* nir doesn't track mad vs fma */
3220    op.lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET);
3221    op.lower_flrp32 = true;
3222    op.lower_flrp64 = true;
3223    op.lower_fpow = false; // TODO: nir's lowering is broken, or we could use it
3224    op.lower_fsat = false;
3225    op.lower_fsqrt = false; // TODO: only before gm200
3226    op.lower_sincos = false;
3227    op.lower_fmod = true;
3228    op.lower_bitfield_extract = false;
3229    op.lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET);
3230    op.lower_bitfield_insert = false;
3231    op.lower_bitfield_insert_to_shifts = (chipset >= NVISA_GV100_CHIPSET);
3232    op.lower_bitfield_insert_to_bitfield_select = false;
3233    op.lower_bitfield_reverse = false;
3234    op.lower_bit_count = false;
3235    op.lower_ifind_msb = false;
3236    op.lower_find_lsb = false;
3237    op.lower_uadd_carry = true; // TODO
3238    op.lower_usub_borrow = true; // TODO
3239    op.lower_mul_high = false;
3240    op.lower_negate = false;
3241    op.lower_sub = true;
3242    op.lower_scmp = true; // TODO: not implemented yet
3243    op.lower_vector_cmp = false;
3244    op.lower_idiv = true;
3245    op.lower_bitops = false;
3246    op.lower_isign = (chipset >= NVISA_GV100_CHIPSET);
3247    op.lower_fsign = (chipset >= NVISA_GV100_CHIPSET);
3248    op.lower_fdph = false;
3249    op.lower_fdot = false;
3250    op.fdot_replicates = false; // TODO
3251    op.lower_ffloor = false; // TODO
3252    op.lower_ffract = true;
3253    op.lower_fceil = false; // TODO
3254    op.lower_ftrunc = false;
3255    op.lower_ldexp = true;
3256    op.lower_pack_half_2x16 = true;
3257    op.lower_pack_unorm_2x16 = true;
3258    op.lower_pack_snorm_2x16 = true;
3259    op.lower_pack_unorm_4x8 = true;
3260    op.lower_pack_snorm_4x8 = true;
3261    op.lower_unpack_half_2x16 = true;
3262    op.lower_unpack_unorm_2x16 = true;
3263    op.lower_unpack_snorm_2x16 = true;
3264    op.lower_unpack_unorm_4x8 = true;
3265    op.lower_unpack_snorm_4x8 = true;
3266    op.lower_pack_split = false;
3267    op.lower_extract_byte = (chipset < NVISA_GM107_CHIPSET);
3268    op.lower_extract_word = (chipset < NVISA_GM107_CHIPSET);
3269    op.lower_all_io_to_temps = false;
3270    op.lower_all_io_to_elements = false;
3271    op.vertex_id_zero_based = false;
3272    op.lower_base_vertex = false;
3273    op.lower_helper_invocation = false;
3274    op.optimize_sample_mask_in = false;
3275    op.lower_cs_local_index_from_id = true;
3276    op.lower_cs_local_id_from_index = false;
3277    op.lower_device_index_to_zero = false; // TODO
3278    op.lower_wpos_pntc = false; // TODO
3279    op.lower_hadd = true; // TODO
3280    op.lower_add_sat = true; // TODO
3281    op.vectorize_io = false;
3282    op.lower_to_scalar = true;
3283    op.unify_interfaces = false;
3284    op.use_interpolated_input_intrinsics = true;
3285    op.lower_mul_2x32_64 = true; // TODO
3286    op.lower_rotate = (chipset < NVISA_GV100_CHIPSET);
3287    op.has_imul24 = false;
3288    op.intel_vec4 = false;
3289    op.max_unroll_iterations = 32;
3290    op.lower_int64_options = (nir_lower_int64_options) (
3291       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul64 : 0) |
3292       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_isign64 : 0) |
3293       nir_lower_divmod64 |
3294       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_high64 : 0) |
3295       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_mov64 : 0) |
3296       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_icmp64 : 0) |
3297       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_iabs64 : 0) |
3298       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ineg64 : 0) |
3299       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) |
3300       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) |
3301       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) |
3302       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_2x32_64 : 0) |
3303       ((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) |
3304       nir_lower_ufind_msb64
3305    );
3306    op.lower_doubles_options = (nir_lower_doubles_options) (
3307       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drcp : 0) |
3308       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsqrt : 0) |
3309       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drsq : 0) |
3310       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dfract : 0) |
3311       nir_lower_dmod |
3312       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsub : 0) |
3313       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ddiv : 0)
3314    );
3315    return op;
3316 }
3317
3318 static const nir_shader_compiler_options gf100_nir_shader_compiler_options =
3319 nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET);
3320 static const nir_shader_compiler_options gm107_nir_shader_compiler_options =
3321 nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET);
3322 static const nir_shader_compiler_options gv100_nir_shader_compiler_options =
3323 nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET);
3324
3325 const nir_shader_compiler_options *
3326 nv50_ir_nir_shader_compiler_options(int chipset)
3327 {
3328    if (chipset >= NVISA_GV100_CHIPSET)
3329       return &gv100_nir_shader_compiler_options;
3330    if (chipset >= NVISA_GM107_CHIPSET)
3331       return &gm107_nir_shader_compiler_options;
3332    return &gf100_nir_shader_compiler_options;
3333 }