src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

   1 /*
   2  * Copyright 2017 Red Hat Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * Authors: Karol Herbst <kherbst@redhat.com>
  23  */
  24
  25 #include "compiler/nir/nir.h"
  26
  27 #include "util/u_debug.h"
  28
  29 #include "codegen/nv50_ir.h"
  30 #include "codegen/nv50_ir_from_common.h"
  31 #include "codegen/nv50_ir_lowering_helper.h"
  32 #include "codegen/nv50_ir_util.h"
  33 #include "tgsi/tgsi_from_mesa.h"
  34
  35 #if __cplusplus >= 201103L
  36 #include <unordered_map>
  37 #else
  38 #include <tr1/unordered_map>
  39 #endif
  40 #include <cstring>
  41 #include <list>
  42 #include <vector>
  43
  44 namespace {
  45
  46 #if __cplusplus >= 201103L
  47 using std::hash;
  48 using std::unordered_map;
  49 #else
  50 using std::tr1::hash;
  51 using std::tr1::unordered_map;
  52 #endif
  53
  54 using namespace nv50_ir;
  55
  56 int
  57 type_size(const struct glsl_type *type, bool bindless)
  58 {
  59    return glsl_count_attribute_slots(type, false);
  60 }
  61
  62 class Converter : public ConverterCommon
  63 {
  64 public:
  65    Converter(Program *, nir_shader *, nv50_ir_prog_info *);
  66
  67    bool run();
  68 private:
  69    typedef std::vector<LValue*> LValues;
  70    typedef unordered_map<unsigned, LValues> NirDefMap;
  71    typedef unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
  72    typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
  73    typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
  74
  75    CacheMode convert(enum gl_access_qualifier);
  76    TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
  77    LValues& convert(nir_alu_dest *);
  78    BasicBlock* convert(nir_block *);
  79    LValues& convert(nir_dest *);
  80    SVSemantic convert(nir_intrinsic_op);
  81    Value* convert(nir_load_const_instr*, uint8_t);
  82    LValues& convert(nir_register *);
  83    LValues& convert(nir_ssa_def *);
  84
  85    Value* getSrc(nir_alu_src *, uint8_t component = 0);
  86    Value* getSrc(nir_register *, uint8_t);
  87    Value* getSrc(nir_src *, uint8_t, bool indirect = false);
  88    Value* getSrc(nir_ssa_def *, uint8_t);
  89
  90    // returned value is the constant part of the given source (either the
  91    // nir_src or the selected source component of an intrinsic). Even though
  92    // this is mostly an optimization to be able to skip indirects in a few
  93    // cases, sometimes we require immediate values or set some fileds on
  94    // instructions (e.g. tex) in order for codegen to consume those.
  95    // If the found value has not a constant part, the Value gets returned
  96    // through the Value parameter.
  97    uint32_t getIndirect(nir_src *, uint8_t, Value *&);
  98    // isScalar indicates that the addressing is scalar, vec4 addressing is
  99    // assumed otherwise
 100    uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&,
 101                         bool isScalar = false);
 102
 103    uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
 104
 105    void setInterpolate(nv50_ir_varying *,
 106                        uint8_t,
 107                        bool centroid,
 108                        unsigned semantics);
 109
 110    Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
 111                          uint8_t c, Value *indirect0 = NULL,
 112                          Value *indirect1 = NULL, bool patch = false);
 113    void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
 114                 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
 115                 Value *indirect1 = NULL);
 116
 117    bool isFloatType(nir_alu_type);
 118    bool isSignedType(nir_alu_type);
 119    bool isResultFloat(nir_op);
 120    bool isResultSigned(nir_op);
 121
 122    DataType getDType(nir_alu_instr *);
 123    DataType getDType(nir_intrinsic_instr *);
 124    DataType getDType(nir_intrinsic_instr *, bool isSigned);
 125    DataType getDType(nir_op, uint8_t);
 126
 127    std::vector<DataType> getSTypes(nir_alu_instr *);
 128    DataType getSType(nir_src &, bool isFloat, bool isSigned);
 129
 130    operation getOperation(nir_intrinsic_op);
 131    operation getOperation(nir_op);
 132    operation getOperation(nir_texop);
 133    operation preOperationNeeded(nir_op);
 134
 135    int getSubOp(nir_intrinsic_op);
 136    int getSubOp(nir_op);
 137
 138    CondCode getCondCode(nir_op);
 139
 140    bool assignSlots();
 141    bool parseNIR();
 142
 143    bool visit(nir_alu_instr *);
 144    bool visit(nir_block *);
 145    bool visit(nir_cf_node *);
 146    bool visit(nir_function *);
 147    bool visit(nir_if *);
 148    bool visit(nir_instr *);
 149    bool visit(nir_intrinsic_instr *);
 150    bool visit(nir_jump_instr *);
 151    bool visit(nir_load_const_instr*);
 152    bool visit(nir_loop *);
 153    bool visit(nir_ssa_undef_instr *);
 154    bool visit(nir_tex_instr *);
 155
 156    // tex stuff
 157    Value* applyProjection(Value *src, Value *proj);
 158    unsigned int getNIRArgCount(TexInstruction::Target&);
 159
 160    nir_shader *nir;
 161
 162    NirDefMap ssaDefs;
 163    NirDefMap regDefs;
 164    ImmediateMap immediates;
 165    NirArrayLMemOffsets regToLmemOffset;
 166    NirBlockMap blocks;
 167    unsigned int curLoopDepth;
 168
 169    BasicBlock *exit;
 170    Value *zero;
 171    Instruction *immInsertPos;
 172
 173    int clipVertexOutput;
 174
 175    union {
 176       struct {
 177          Value *position;
 178       } fp;
 179    };
 180 };
 181
 182 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
 183    : ConverterCommon(prog, info),
 184      nir(nir),
 185      curLoopDepth(0),
 186      clipVertexOutput(-1)
 187 {
 188    zero = mkImm((uint32_t)0);
 189 }
 190
 191 BasicBlock *
 192 Converter::convert(nir_block *block)
 193 {
 194    NirBlockMap::iterator it = blocks.find(block->index);
 195    if (it != blocks.end())
 196       return it->second;
 197
 198    BasicBlock *bb = new BasicBlock(func);
 199    blocks[block->index] = bb;
 200    return bb;
 201 }
 202
 203 bool
 204 Converter::isFloatType(nir_alu_type type)
 205 {
 206    return nir_alu_type_get_base_type(type) == nir_type_float;
 207 }
 208
 209 bool
 210 Converter::isSignedType(nir_alu_type type)
 211 {
 212    return nir_alu_type_get_base_type(type) == nir_type_int;
 213 }
 214
 215 bool
 216 Converter::isResultFloat(nir_op op)
 217 {
 218    const nir_op_info &info = nir_op_infos[op];
 219    if (info.output_type != nir_type_invalid)
 220       return isFloatType(info.output_type);
 221
 222    ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
 223    assert(false);
 224    return true;
 225 }
 226
 227 bool
 228 Converter::isResultSigned(nir_op op)
 229 {
 230    switch (op) {
 231    // there is no umul and we get wrong results if we treat all muls as signed
 232    case nir_op_imul:
 233    case nir_op_inot:
 234       return false;
 235    default:
 236       const nir_op_info &info = nir_op_infos[op];
 237       if (info.output_type != nir_type_invalid)
 238          return isSignedType(info.output_type);
 239       ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
 240       assert(false);
 241       return true;
 242    }
 243 }
 244
 245 DataType
 246 Converter::getDType(nir_alu_instr *insn)
 247 {
 248    if (insn->dest.dest.is_ssa)
 249       return getDType(insn->op, insn->dest.dest.ssa.bit_size);
 250    else
 251       return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
 252 }
 253
 254 DataType
 255 Converter::getDType(nir_intrinsic_instr *insn)
 256 {
 257    bool isSigned;
 258    switch (insn->intrinsic) {
 259    case nir_intrinsic_shared_atomic_imax:
 260    case nir_intrinsic_shared_atomic_imin:
 261    case nir_intrinsic_ssbo_atomic_imax:
 262    case nir_intrinsic_ssbo_atomic_imin:
 263       isSigned = true;
 264       break;
 265    default:
 266       isSigned = false;
 267       break;
 268    }
 269
 270    return getDType(insn, isSigned);
 271 }
 272
 273 DataType
 274 Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
 275 {
 276    if (insn->dest.is_ssa)
 277       return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
 278    else
 279       return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
 280 }
 281
 282 DataType
 283 Converter::getDType(nir_op op, uint8_t bitSize)
 284 {
 285    DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
 286    if (ty == TYPE_NONE) {
 287       ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
 288       assert(false);
 289    }
 290    return ty;
 291 }
 292
 293 std::vector<DataType>
 294 Converter::getSTypes(nir_alu_instr *insn)
 295 {
 296    const nir_op_info &info = nir_op_infos[insn->op];
 297    std::vector<DataType> res(info.num_inputs);
 298
 299    for (uint8_t i = 0; i < info.num_inputs; ++i) {
 300       if (info.input_types[i] != nir_type_invalid) {
 301          res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
 302       } else {
 303          ERROR("getSType not implemented for %s idx %u\n", info.name, i);
 304          assert(false);
 305          res[i] = TYPE_NONE;
 306          break;
 307       }
 308    }
 309
 310    return res;
 311 }
 312
 313 DataType
 314 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
 315 {
 316    uint8_t bitSize;
 317    if (src.is_ssa)
 318       bitSize = src.ssa->bit_size;
 319    else
 320       bitSize = src.reg.reg->bit_size;
 321
 322    DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
 323    if (ty == TYPE_NONE) {
 324       const char *str;
 325       if (isFloat)
 326          str = "float";
 327       else if (isSigned)
 328          str = "int";
 329       else
 330          str = "uint";
 331       ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
 332       assert(false);
 333    }
 334    return ty;
 335 }
 336
 337 operation
 338 Converter::getOperation(nir_op op)
 339 {
 340    switch (op) {
 341    // basic ops with float and int variants
 342    case nir_op_fabs:
 343    case nir_op_iabs:
 344       return OP_ABS;
 345    case nir_op_fadd:
 346    case nir_op_iadd:
 347       return OP_ADD;
 348    case nir_op_iand:
 349       return OP_AND;
 350    case nir_op_ifind_msb:
 351    case nir_op_ufind_msb:
 352       return OP_BFIND;
 353    case nir_op_fceil:
 354       return OP_CEIL;
 355    case nir_op_fcos:
 356       return OP_COS;
 357    case nir_op_f2f32:
 358    case nir_op_f2f64:
 359    case nir_op_f2i32:
 360    case nir_op_f2i64:
 361    case nir_op_f2u32:
 362    case nir_op_f2u64:
 363    case nir_op_i2f32:
 364    case nir_op_i2f64:
 365    case nir_op_i2i32:
 366    case nir_op_i2i64:
 367    case nir_op_u2f32:
 368    case nir_op_u2f64:
 369    case nir_op_u2u32:
 370    case nir_op_u2u64:
 371       return OP_CVT;
 372    case nir_op_fddx:
 373    case nir_op_fddx_coarse:
 374    case nir_op_fddx_fine:
 375       return OP_DFDX;
 376    case nir_op_fddy:
 377    case nir_op_fddy_coarse:
 378    case nir_op_fddy_fine:
 379       return OP_DFDY;
 380    case nir_op_fdiv:
 381    case nir_op_idiv:
 382    case nir_op_udiv:
 383       return OP_DIV;
 384    case nir_op_fexp2:
 385       return OP_EX2;
 386    case nir_op_ffloor:
 387       return OP_FLOOR;
 388    case nir_op_ffma:
 389       return OP_FMA;
 390    case nir_op_flog2:
 391       return OP_LG2;
 392    case nir_op_fmax:
 393    case nir_op_imax:
 394    case nir_op_umax:
 395       return OP_MAX;
 396    case nir_op_pack_64_2x32_split:
 397       return OP_MERGE;
 398    case nir_op_fmin:
 399    case nir_op_imin:
 400    case nir_op_umin:
 401       return OP_MIN;
 402    case nir_op_fmod:
 403    case nir_op_imod:
 404    case nir_op_umod:
 405    case nir_op_frem:
 406    case nir_op_irem:
 407       return OP_MOD;
 408    case nir_op_fmul:
 409    case nir_op_imul:
 410    case nir_op_imul_high:
 411    case nir_op_umul_high:
 412       return OP_MUL;
 413    case nir_op_fneg:
 414    case nir_op_ineg:
 415       return OP_NEG;
 416    case nir_op_inot:
 417       return OP_NOT;
 418    case nir_op_ior:
 419       return OP_OR;
 420    case nir_op_fpow:
 421       return OP_POW;
 422    case nir_op_frcp:
 423       return OP_RCP;
 424    case nir_op_frsq:
 425       return OP_RSQ;
 426    case nir_op_fsat:
 427       return OP_SAT;
 428    case nir_op_feq32:
 429    case nir_op_ieq32:
 430    case nir_op_fge32:
 431    case nir_op_ige32:
 432    case nir_op_uge32:
 433    case nir_op_flt32:
 434    case nir_op_ilt32:
 435    case nir_op_ult32:
 436    case nir_op_fne32:
 437    case nir_op_ine32:
 438       return OP_SET;
 439    case nir_op_ishl:
 440       return OP_SHL;
 441    case nir_op_ishr:
 442    case nir_op_ushr:
 443       return OP_SHR;
 444    case nir_op_fsin:
 445       return OP_SIN;
 446    case nir_op_fsqrt:
 447       return OP_SQRT;
 448    case nir_op_ftrunc:
 449       return OP_TRUNC;
 450    case nir_op_ixor:
 451       return OP_XOR;
 452    default:
 453       ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
 454       assert(false);
 455       return OP_NOP;
 456    }
 457 }
 458
 459 operation
 460 Converter::getOperation(nir_texop op)
 461 {
 462    switch (op) {
 463    case nir_texop_tex:
 464       return OP_TEX;
 465    case nir_texop_lod:
 466       return OP_TXLQ;
 467    case nir_texop_txb:
 468       return OP_TXB;
 469    case nir_texop_txd:
 470       return OP_TXD;
 471    case nir_texop_txf:
 472    case nir_texop_txf_ms:
 473       return OP_TXF;
 474    case nir_texop_tg4:
 475       return OP_TXG;
 476    case nir_texop_txl:
 477       return OP_TXL;
 478    case nir_texop_query_levels:
 479    case nir_texop_texture_samples:
 480    case nir_texop_txs:
 481       return OP_TXQ;
 482    default:
 483       ERROR("couldn't get operation for nir_texop %u\n", op);
 484       assert(false);
 485       return OP_NOP;
 486    }
 487 }
 488
 489 operation
 490 Converter::getOperation(nir_intrinsic_op op)
 491 {
 492    switch (op) {
 493    case nir_intrinsic_emit_vertex:
 494       return OP_EMIT;
 495    case nir_intrinsic_end_primitive:
 496       return OP_RESTART;
 497    case nir_intrinsic_bindless_image_atomic_add:
 498    case nir_intrinsic_image_atomic_add:
 499    case nir_intrinsic_bindless_image_atomic_and:
 500    case nir_intrinsic_image_atomic_and:
 501    case nir_intrinsic_bindless_image_atomic_comp_swap:
 502    case nir_intrinsic_image_atomic_comp_swap:
 503    case nir_intrinsic_bindless_image_atomic_exchange:
 504    case nir_intrinsic_image_atomic_exchange:
 505    case nir_intrinsic_bindless_image_atomic_imax:
 506    case nir_intrinsic_image_atomic_imax:
 507    case nir_intrinsic_bindless_image_atomic_umax:
 508    case nir_intrinsic_image_atomic_umax:
 509    case nir_intrinsic_bindless_image_atomic_imin:
 510    case nir_intrinsic_image_atomic_imin:
 511    case nir_intrinsic_bindless_image_atomic_umin:
 512    case nir_intrinsic_image_atomic_umin:
 513    case nir_intrinsic_bindless_image_atomic_or:
 514    case nir_intrinsic_image_atomic_or:
 515    case nir_intrinsic_bindless_image_atomic_xor:
 516    case nir_intrinsic_image_atomic_xor:
 517    case nir_intrinsic_bindless_image_atomic_inc_wrap:
 518    case nir_intrinsic_image_atomic_inc_wrap:
 519    case nir_intrinsic_bindless_image_atomic_dec_wrap:
 520    case nir_intrinsic_image_atomic_dec_wrap:
 521       return OP_SUREDP;
 522    case nir_intrinsic_bindless_image_load:
 523    case nir_intrinsic_image_load:
 524       return OP_SULDP;
 525    case nir_intrinsic_bindless_image_samples:
 526    case nir_intrinsic_image_samples:
 527    case nir_intrinsic_bindless_image_size:
 528    case nir_intrinsic_image_size:
 529       return OP_SUQ;
 530    case nir_intrinsic_bindless_image_store:
 531    case nir_intrinsic_image_store:
 532       return OP_SUSTP;
 533    default:
 534       ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
 535       assert(false);
 536       return OP_NOP;
 537    }
 538 }
 539
 540 operation
 541 Converter::preOperationNeeded(nir_op op)
 542 {
 543    switch (op) {
 544    case nir_op_fcos:
 545    case nir_op_fsin:
 546       return OP_PRESIN;
 547    default:
 548       return OP_NOP;
 549    }
 550 }
 551
 552 int
 553 Converter::getSubOp(nir_op op)
 554 {
 555    switch (op) {
 556    case nir_op_imul_high:
 557    case nir_op_umul_high:
 558       return NV50_IR_SUBOP_MUL_HIGH;
 559    case nir_op_ishl:
 560    case nir_op_ishr:
 561    case nir_op_ushr:
 562       return NV50_IR_SUBOP_SHIFT_WRAP;
 563    default:
 564       return 0;
 565    }
 566 }
 567
 568 int
 569 Converter::getSubOp(nir_intrinsic_op op)
 570 {
 571    switch (op) {
 572    case nir_intrinsic_bindless_image_atomic_add:
 573    case nir_intrinsic_global_atomic_add:
 574    case nir_intrinsic_image_atomic_add:
 575    case nir_intrinsic_shared_atomic_add:
 576    case nir_intrinsic_ssbo_atomic_add:
 577       return  NV50_IR_SUBOP_ATOM_ADD;
 578    case nir_intrinsic_bindless_image_atomic_and:
 579    case nir_intrinsic_global_atomic_and:
 580    case nir_intrinsic_image_atomic_and:
 581    case nir_intrinsic_shared_atomic_and:
 582    case nir_intrinsic_ssbo_atomic_and:
 583       return  NV50_IR_SUBOP_ATOM_AND;
 584    case nir_intrinsic_bindless_image_atomic_comp_swap:
 585    case nir_intrinsic_global_atomic_comp_swap:
 586    case nir_intrinsic_image_atomic_comp_swap:
 587    case nir_intrinsic_shared_atomic_comp_swap:
 588    case nir_intrinsic_ssbo_atomic_comp_swap:
 589       return  NV50_IR_SUBOP_ATOM_CAS;
 590    case nir_intrinsic_bindless_image_atomic_exchange:
 591    case nir_intrinsic_global_atomic_exchange:
 592    case nir_intrinsic_image_atomic_exchange:
 593    case nir_intrinsic_shared_atomic_exchange:
 594    case nir_intrinsic_ssbo_atomic_exchange:
 595       return  NV50_IR_SUBOP_ATOM_EXCH;
 596    case nir_intrinsic_bindless_image_atomic_or:
 597    case nir_intrinsic_global_atomic_or:
 598    case nir_intrinsic_image_atomic_or:
 599    case nir_intrinsic_shared_atomic_or:
 600    case nir_intrinsic_ssbo_atomic_or:
 601       return  NV50_IR_SUBOP_ATOM_OR;
 602    case nir_intrinsic_bindless_image_atomic_imax:
 603    case nir_intrinsic_bindless_image_atomic_umax:
 604    case nir_intrinsic_global_atomic_imax:
 605    case nir_intrinsic_global_atomic_umax:
 606    case nir_intrinsic_image_atomic_imax:
 607    case nir_intrinsic_image_atomic_umax:
 608    case nir_intrinsic_shared_atomic_imax:
 609    case nir_intrinsic_shared_atomic_umax:
 610    case nir_intrinsic_ssbo_atomic_imax:
 611    case nir_intrinsic_ssbo_atomic_umax:
 612       return  NV50_IR_SUBOP_ATOM_MAX;
 613    case nir_intrinsic_bindless_image_atomic_imin:
 614    case nir_intrinsic_bindless_image_atomic_umin:
 615    case nir_intrinsic_global_atomic_imin:
 616    case nir_intrinsic_global_atomic_umin:
 617    case nir_intrinsic_image_atomic_imin:
 618    case nir_intrinsic_image_atomic_umin:
 619    case nir_intrinsic_shared_atomic_imin:
 620    case nir_intrinsic_shared_atomic_umin:
 621    case nir_intrinsic_ssbo_atomic_imin:
 622    case nir_intrinsic_ssbo_atomic_umin:
 623       return  NV50_IR_SUBOP_ATOM_MIN;
 624    case nir_intrinsic_bindless_image_atomic_xor:
 625    case nir_intrinsic_global_atomic_xor:
 626    case nir_intrinsic_image_atomic_xor:
 627    case nir_intrinsic_shared_atomic_xor:
 628    case nir_intrinsic_ssbo_atomic_xor:
 629       return  NV50_IR_SUBOP_ATOM_XOR;
 630    case nir_intrinsic_bindless_image_atomic_inc_wrap:
 631    case nir_intrinsic_image_atomic_inc_wrap:
 632       return NV50_IR_SUBOP_ATOM_INC;
 633    case nir_intrinsic_bindless_image_atomic_dec_wrap:
 634    case nir_intrinsic_image_atomic_dec_wrap:
 635       return NV50_IR_SUBOP_ATOM_DEC;
 636
 637    case nir_intrinsic_group_memory_barrier:
 638    case nir_intrinsic_memory_barrier:
 639    case nir_intrinsic_memory_barrier_buffer:
 640    case nir_intrinsic_memory_barrier_image:
 641       return NV50_IR_SUBOP_MEMBAR(M, GL);
 642    case nir_intrinsic_memory_barrier_shared:
 643       return NV50_IR_SUBOP_MEMBAR(M, CTA);
 644
 645    case nir_intrinsic_vote_all:
 646       return NV50_IR_SUBOP_VOTE_ALL;
 647    case nir_intrinsic_vote_any:
 648       return NV50_IR_SUBOP_VOTE_ANY;
 649    case nir_intrinsic_vote_ieq:
 650       return NV50_IR_SUBOP_VOTE_UNI;
 651    default:
 652       return 0;
 653    }
 654 }
 655
 656 CondCode
 657 Converter::getCondCode(nir_op op)
 658 {
 659    switch (op) {
 660    case nir_op_feq32:
 661    case nir_op_ieq32:
 662       return CC_EQ;
 663    case nir_op_fge32:
 664    case nir_op_ige32:
 665    case nir_op_uge32:
 666       return CC_GE;
 667    case nir_op_flt32:
 668    case nir_op_ilt32:
 669    case nir_op_ult32:
 670       return CC_LT;
 671    case nir_op_fne32:
 672       return CC_NEU;
 673    case nir_op_ine32:
 674       return CC_NE;
 675    default:
 676       ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
 677       assert(false);
 678       return CC_FL;
 679    }
 680 }
 681
 682 Converter::LValues&
 683 Converter::convert(nir_alu_dest *dest)
 684 {
 685    return convert(&dest->dest);
 686 }
 687
 688 Converter::LValues&
 689 Converter::convert(nir_dest *dest)
 690 {
 691    if (dest->is_ssa)
 692       return convert(&dest->ssa);
 693    if (dest->reg.indirect) {
 694       ERROR("no support for indirects.");
 695       assert(false);
 696    }
 697    return convert(dest->reg.reg);
 698 }
 699
 700 Converter::LValues&
 701 Converter::convert(nir_register *reg)
 702 {
 703    NirDefMap::iterator it = regDefs.find(reg->index);
 704    if (it != regDefs.end())
 705       return it->second;
 706
 707    LValues newDef(reg->num_components);
 708    for (uint8_t i = 0; i < reg->num_components; i++)
 709       newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
 710    return regDefs[reg->index] = newDef;
 711 }
 712
 713 Converter::LValues&
 714 Converter::convert(nir_ssa_def *def)
 715 {
 716    NirDefMap::iterator it = ssaDefs.find(def->index);
 717    if (it != ssaDefs.end())
 718       return it->second;
 719
 720    LValues newDef(def->num_components);
 721    for (uint8_t i = 0; i < def->num_components; i++)
 722       newDef[i] = getSSA(std::max(4, def->bit_size / 8));
 723    return ssaDefs[def->index] = newDef;
 724 }
 725
 726 Value*
 727 Converter::getSrc(nir_alu_src *src, uint8_t component)
 728 {
 729    if (src->abs || src->negate) {
 730       ERROR("modifiers currently not supported on nir_alu_src\n");
 731       assert(false);
 732    }
 733    return getSrc(&src->src, src->swizzle[component]);
 734 }
 735
 736 Value*
 737 Converter::getSrc(nir_register *reg, uint8_t idx)
 738 {
 739    NirDefMap::iterator it = regDefs.find(reg->index);
 740    if (it == regDefs.end())
 741       return convert(reg)[idx];
 742    return it->second[idx];
 743 }
 744
 745 Value*
 746 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
 747 {
 748    if (src->is_ssa)
 749       return getSrc(src->ssa, idx);
 750
 751    if (src->reg.indirect) {
 752       if (indirect)
 753          return getSrc(src->reg.indirect, idx);
 754       ERROR("no support for indirects.");
 755       assert(false);
 756       return NULL;
 757    }
 758
 759    return getSrc(src->reg.reg, idx);
 760 }
 761
 762 Value*
 763 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
 764 {
 765    ImmediateMap::iterator iit = immediates.find(src->index);
 766    if (iit != immediates.end())
 767       return convert((*iit).second, idx);
 768
 769    NirDefMap::iterator it = ssaDefs.find(src->index);
 770    if (it == ssaDefs.end()) {
 771       ERROR("SSA value %u not found\n", src->index);
 772       assert(false);
 773       return NULL;
 774    }
 775    return it->second[idx];
 776 }
 777
 778 uint32_t
 779 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
 780 {
 781    nir_const_value *offset = nir_src_as_const_value(*src);
 782
 783    if (offset) {
 784       indirect = NULL;
 785       return offset[0].u32;
 786    }
 787
 788    indirect = getSrc(src, idx, true);
 789    return 0;
 790 }
 791
 792 uint32_t
 793 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect, bool isScalar)
 794 {
 795    int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
 796    if (indirect && !isScalar)
 797       indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
 798    return idx;
 799 }
 800
 801 static void
 802 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
 803 {
 804    assert(name && index);
 805
 806    if (slot >= VERT_ATTRIB_MAX) {
 807       ERROR("invalid varying slot %u\n", slot);
 808       assert(false);
 809       return;
 810    }
 811
 812    if (slot >= VERT_ATTRIB_GENERIC0 &&
 813        slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
 814       *name = TGSI_SEMANTIC_GENERIC;
 815       *index = slot - VERT_ATTRIB_GENERIC0;
 816       return;
 817    }
 818
 819    if (slot >= VERT_ATTRIB_TEX0 &&
 820        slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
 821       *name = TGSI_SEMANTIC_TEXCOORD;
 822       *index = slot - VERT_ATTRIB_TEX0;
 823       return;
 824    }
 825
 826    switch (slot) {
 827    case VERT_ATTRIB_COLOR0:
 828       *name = TGSI_SEMANTIC_COLOR;
 829       *index = 0;
 830       break;
 831    case VERT_ATTRIB_COLOR1:
 832       *name = TGSI_SEMANTIC_COLOR;
 833       *index = 1;
 834       break;
 835    case VERT_ATTRIB_EDGEFLAG:
 836       *name = TGSI_SEMANTIC_EDGEFLAG;
 837       *index = 0;
 838       break;
 839    case VERT_ATTRIB_FOG:
 840       *name = TGSI_SEMANTIC_FOG;
 841       *index = 0;
 842       break;
 843    case VERT_ATTRIB_NORMAL:
 844       *name = TGSI_SEMANTIC_NORMAL;
 845       *index = 0;
 846       break;
 847    case VERT_ATTRIB_POS:
 848       *name = TGSI_SEMANTIC_POSITION;
 849       *index = 0;
 850       break;
 851    case VERT_ATTRIB_POINT_SIZE:
 852       *name = TGSI_SEMANTIC_PSIZE;
 853       *index = 0;
 854       break;
 855    default:
 856       ERROR("unknown vert attrib slot %u\n", slot);
 857       assert(false);
 858       break;
 859    }
 860 }
 861
 862 void
 863 Converter::setInterpolate(nv50_ir_varying *var,
 864                           uint8_t mode,
 865                           bool centroid,
 866                           unsigned semantic)
 867 {
 868    switch (mode) {
 869    case INTERP_MODE_FLAT:
 870       var->flat = 1;
 871       break;
 872    case INTERP_MODE_NONE:
 873       if (semantic == TGSI_SEMANTIC_COLOR)
 874          var->sc = 1;
 875       else if (semantic == TGSI_SEMANTIC_POSITION)
 876          var->linear = 1;
 877       break;
 878    case INTERP_MODE_NOPERSPECTIVE:
 879       var->linear = 1;
 880       break;
 881    case INTERP_MODE_SMOOTH:
 882       break;
 883    }
 884    var->centroid = centroid;
 885 }
 886
 887 static uint16_t
 888 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
 889           bool input, const nir_variable *var)
 890 {
 891    if (!type->is_array())
 892       return type->count_attribute_slots(false);
 893
 894    uint16_t slots;
 895    switch (stage) {
 896    case Program::TYPE_GEOMETRY:
 897       slots = type->uniform_locations();
 898       if (input)
 899          slots /= info.gs.vertices_in;
 900       break;
 901    case Program::TYPE_TESSELLATION_CONTROL:
 902    case Program::TYPE_TESSELLATION_EVAL:
 903       // remove first dimension
 904       if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
 905          slots = type->uniform_locations();
 906       else
 907          slots = type->fields.array->uniform_locations();
 908       break;
 909    default:
 910       slots = type->count_attribute_slots(false);
 911       break;
 912    }
 913
 914    return slots;
 915 }
 916
 917 bool Converter::assignSlots() {
 918    unsigned name;
 919    unsigned index;
 920
 921    info->io.viewportId = -1;
 922    info->numInputs = 0;
 923    info->numOutputs = 0;
 924
 925    // we have to fixup the uniform locations for arrays
 926    unsigned numImages = 0;
 927    nir_foreach_variable(var, &nir->uniforms) {
 928       const glsl_type *type = var->type;
 929       if (!type->without_array()->is_image())
 930          continue;
 931       var->data.driver_location = numImages;
 932       numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
 933    }
 934
 935    info->numSysVals = 0;
 936    for (uint8_t i = 0; i < SYSTEM_VALUE_MAX; ++i) {
 937       if (!(nir->info.system_values_read & 1ull << i))
 938          continue;
 939
 940       info->sv[info->numSysVals].sn = tgsi_get_sysval_semantic(i);
 941       info->sv[info->numSysVals].si = 0;
 942       info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
 943
 944       switch (i) {
 945       case SYSTEM_VALUE_INSTANCE_ID:
 946          info->io.instanceId = info->numSysVals;
 947          break;
 948       case SYSTEM_VALUE_TESS_LEVEL_INNER:
 949       case SYSTEM_VALUE_TESS_LEVEL_OUTER:
 950          info->sv[info->numSysVals].patch = 1;
 951          break;
 952       case SYSTEM_VALUE_VERTEX_ID:
 953          info->io.vertexId = info->numSysVals;
 954          break;
 955       default:
 956          break;
 957       }
 958
 959       info->numSysVals += 1;
 960    }
 961
 962    if (prog->getType() == Program::TYPE_COMPUTE)
 963       return true;
 964
 965    nir_foreach_variable(var, &nir->inputs) {
 966       const glsl_type *type = var->type;
 967       int slot = var->data.location;
 968       uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
 969       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
 970                                        : type->component_slots();
 971       uint32_t frac = var->data.location_frac;
 972       uint32_t vary = var->data.driver_location;
 973
 974       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
 975          if (comp > 2)
 976             slots *= 2;
 977       }
 978
 979       assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
 980
 981       switch(prog->getType()) {
 982       case Program::TYPE_FRAGMENT:
 983          tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
 984                                       &name, &index);
 985          for (uint16_t i = 0; i < slots; ++i) {
 986             setInterpolate(&info->in[vary + i], var->data.interpolation,
 987                            var->data.centroid | var->data.sample, name);
 988          }
 989          break;
 990       case Program::TYPE_GEOMETRY:
 991          tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
 992                                       &name, &index);
 993          break;
 994       case Program::TYPE_TESSELLATION_CONTROL:
 995       case Program::TYPE_TESSELLATION_EVAL:
 996          tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
 997                                       &name, &index);
 998          if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
 999             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1000          break;
1001       case Program::TYPE_VERTEX:
1002          vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1003          switch (name) {
1004          case TGSI_SEMANTIC_EDGEFLAG:
1005             info->io.edgeFlagIn = vary;
1006             break;
1007          default:
1008             break;
1009          }
1010          break;
1011       default:
1012          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1013          return false;
1014       }
1015
1016       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1017          info->in[vary].id = vary;
1018          info->in[vary].patch = var->data.patch;
1019          info->in[vary].sn = name;
1020          info->in[vary].si = index + i;
1021          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1022             if (i & 0x1)
1023                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1024             else
1025                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1026          else
1027             info->in[vary].mask |= ((1 << comp) - 1) << frac;
1028       }
1029       info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1030    }
1031
1032    nir_foreach_variable(var, &nir->outputs) {
1033       const glsl_type *type = var->type;
1034       int slot = var->data.location;
1035       uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1036       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1037                                        : type->component_slots();
1038       uint32_t frac = var->data.location_frac;
1039       uint32_t vary = var->data.driver_location;
1040
1041       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1042          if (comp > 2)
1043             slots *= 2;
1044       }
1045
1046       assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1047
1048       switch(prog->getType()) {
1049       case Program::TYPE_FRAGMENT:
1050          tgsi_get_gl_frag_result_semantic((gl_frag_result)slot, &name, &index);
1051          switch (name) {
1052          case TGSI_SEMANTIC_COLOR:
1053             if (!var->data.fb_fetch_output)
1054                info->prop.fp.numColourResults++;
1055
1056             if (var->data.location == FRAG_RESULT_COLOR &&
1057                 nir->info.outputs_written & BITFIELD64_BIT(var->data.location))
1058                info->prop.fp.separateFragData = true;
1059
1060             // sometimes we get FRAG_RESULT_DATAX with data.index 0
1061             // sometimes we get FRAG_RESULT_DATA0 with data.index X
1062             index = index == 0 ? var->data.index : index;
1063             break;
1064          case TGSI_SEMANTIC_POSITION:
1065             info->io.fragDepth = vary;
1066             info->prop.fp.writesDepth = true;
1067             break;
1068          case TGSI_SEMANTIC_SAMPLEMASK:
1069             info->io.sampleMask = vary;
1070             break;
1071          default:
1072             break;
1073          }
1074          break;
1075       case Program::TYPE_GEOMETRY:
1076       case Program::TYPE_TESSELLATION_CONTROL:
1077       case Program::TYPE_TESSELLATION_EVAL:
1078       case Program::TYPE_VERTEX:
1079          tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,
1080                                       &name, &index);
1081
1082          if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1083              name != TGSI_SEMANTIC_TESSOUTER)
1084             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1085
1086          switch (name) {
1087          case TGSI_SEMANTIC_CLIPDIST:
1088             info->io.genUserClip = -1;
1089             break;
1090          case TGSI_SEMANTIC_CLIPVERTEX:
1091             clipVertexOutput = vary;
1092             break;
1093          case TGSI_SEMANTIC_EDGEFLAG:
1094             info->io.edgeFlagOut = vary;
1095             break;
1096          case TGSI_SEMANTIC_POSITION:
1097             if (clipVertexOutput < 0)
1098                clipVertexOutput = vary;
1099             break;
1100          default:
1101             break;
1102          }
1103          break;
1104       default:
1105          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1106          return false;
1107       }
1108
1109       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1110          info->out[vary].id = vary;
1111          info->out[vary].patch = var->data.patch;
1112          info->out[vary].sn = name;
1113          info->out[vary].si = index + i;
1114          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1115             if (i & 0x1)
1116                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1117             else
1118                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1119          else
1120             info->out[vary].mask |= ((1 << comp) - 1) << frac;
1121
1122          if (nir->info.outputs_read & 1ull << slot)
1123             info->out[vary].oread = 1;
1124       }
1125       info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1126    }
1127
1128    if (info->io.genUserClip > 0) {
1129       info->io.clipDistances = info->io.genUserClip;
1130
1131       const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1132
1133       for (unsigned int n = 0; n < nOut; ++n) {
1134          unsigned int i = info->numOutputs++;
1135          info->out[i].id = i;
1136          info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1137          info->out[i].si = n;
1138          info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1139       }
1140    }
1141
1142    return info->assignSlots(info) == 0;
1143 }
1144
1145 uint32_t
1146 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1147 {
1148    DataType ty;
1149    int offset = nir_intrinsic_component(insn);
1150    bool input;
1151
1152    if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1153       ty = getDType(insn);
1154    else
1155       ty = getSType(insn->src[0], false, false);
1156
1157    switch (insn->intrinsic) {
1158    case nir_intrinsic_load_input:
1159    case nir_intrinsic_load_interpolated_input:
1160    case nir_intrinsic_load_per_vertex_input:
1161       input = true;
1162       break;
1163    case nir_intrinsic_load_output:
1164    case nir_intrinsic_load_per_vertex_output:
1165    case nir_intrinsic_store_output:
1166    case nir_intrinsic_store_per_vertex_output:
1167       input = false;
1168       break;
1169    default:
1170       ERROR("unknown intrinsic in getSlotAddress %s",
1171             nir_intrinsic_infos[insn->intrinsic].name);
1172       input = false;
1173       assert(false);
1174       break;
1175    }
1176
1177    if (typeSizeof(ty) == 8) {
1178       slot *= 2;
1179       slot += offset;
1180       if (slot >= 4) {
1181          idx += 1;
1182          slot -= 4;
1183       }
1184    } else {
1185       slot += offset;
1186    }
1187
1188    assert(slot < 4);
1189    assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1190    assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1191
1192    const nv50_ir_varying *vary = input ? info->in : info->out;
1193    return vary[idx].slot[slot] * 4;
1194 }
1195
1196 Instruction *
1197 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1198                     uint32_t base, uint8_t c, Value *indirect0,
1199                     Value *indirect1, bool patch)
1200 {
1201    unsigned int tySize = typeSizeof(ty);
1202
1203    if (tySize == 8 &&
1204        (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1205       Value *lo = getSSA();
1206       Value *hi = getSSA();
1207
1208       Instruction *loi =
1209          mkLoad(TYPE_U32, lo,
1210                 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1211                 indirect0);
1212       loi->setIndirect(0, 1, indirect1);
1213       loi->perPatch = patch;
1214
1215       Instruction *hii =
1216          mkLoad(TYPE_U32, hi,
1217                 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1218                 indirect0);
1219       hii->setIndirect(0, 1, indirect1);
1220       hii->perPatch = patch;
1221
1222       return mkOp2(OP_MERGE, ty, def, lo, hi);
1223    } else {
1224       Instruction *ld =
1225          mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1226       ld->setIndirect(0, 1, indirect1);
1227       ld->perPatch = patch;
1228       return ld;
1229    }
1230 }
1231
1232 void
1233 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1234                    DataType ty, Value *src, uint8_t idx, uint8_t c,
1235                    Value *indirect0, Value *indirect1)
1236 {
1237    uint8_t size = typeSizeof(ty);
1238    uint32_t address = getSlotAddress(insn, idx, c);
1239
1240    if (size == 8 && indirect0) {
1241       Value *split[2];
1242       mkSplit(split, 4, src);
1243
1244       if (op == OP_EXPORT) {
1245          split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1246          split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1247       }
1248
1249       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1250               split[0])->perPatch = info->out[idx].patch;
1251       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1252               split[1])->perPatch = info->out[idx].patch;
1253    } else {
1254       if (op == OP_EXPORT)
1255          src = mkMov(getSSA(size), src, ty)->getDef(0);
1256       mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1257               src)->perPatch = info->out[idx].patch;
1258    }
1259 }
1260
1261 bool
1262 Converter::parseNIR()
1263 {
1264    info->bin.tlsSpace = 0;
1265    info->io.clipDistances = nir->info.clip_distance_array_size;
1266    info->io.cullDistances = nir->info.cull_distance_array_size;
1267
1268    switch(prog->getType()) {
1269    case Program::TYPE_COMPUTE:
1270       info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1271       info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1272       info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1273       info->bin.smemSize = nir->info.cs.shared_size;
1274       break;
1275    case Program::TYPE_FRAGMENT:
1276       info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1277       info->prop.fp.persampleInvocation =
1278          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1279          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1280       info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1281       info->prop.fp.readsSampleLocations =
1282          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1283       info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1284       info->prop.fp.usesSampleMaskIn =
1285          !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1286       break;
1287    case Program::TYPE_GEOMETRY:
1288       info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1289       info->prop.gp.instanceCount = nir->info.gs.invocations;
1290       info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1291       info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1292       break;
1293    case Program::TYPE_TESSELLATION_CONTROL:
1294    case Program::TYPE_TESSELLATION_EVAL:
1295       if (nir->info.tess.primitive_mode == GL_ISOLINES)
1296          info->prop.tp.domain = GL_LINES;
1297       else
1298          info->prop.tp.domain = nir->info.tess.primitive_mode;
1299       info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1300       info->prop.tp.outputPrim =
1301          nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1302       info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1303       info->prop.tp.winding = !nir->info.tess.ccw;
1304       break;
1305    case Program::TYPE_VERTEX:
1306       info->prop.vp.usesDrawParameters =
1307          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1308          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1309          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1310       break;
1311    default:
1312       break;
1313    }
1314
1315    return true;
1316 }
1317
1318 bool
1319 Converter::visit(nir_function *function)
1320 {
1321    assert(function->impl);
1322
1323    // usually the blocks will set everything up, but main is special
1324    BasicBlock *entry = new BasicBlock(prog->main);
1325    exit = new BasicBlock(prog->main);
1326    blocks[nir_start_block(function->impl)->index] = entry;
1327    prog->main->setEntry(entry);
1328    prog->main->setExit(exit);
1329
1330    setPosition(entry, true);
1331
1332    if (info->io.genUserClip > 0) {
1333       for (int c = 0; c < 4; ++c)
1334          clipVtx[c] = getScratch();
1335    }
1336
1337    switch (prog->getType()) {
1338    case Program::TYPE_TESSELLATION_CONTROL:
1339       outBase = mkOp2v(
1340          OP_SUB, TYPE_U32, getSSA(),
1341          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1342          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1343       break;
1344    case Program::TYPE_FRAGMENT: {
1345       Symbol *sv = mkSysVal(SV_POSITION, 3);
1346       fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1347       fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1348       break;
1349    }
1350    default:
1351       break;
1352    }
1353
1354    nir_foreach_register(reg, &function->impl->registers) {
1355       if (reg->num_array_elems) {
1356          // TODO: packed variables would be nice, but MemoryOpt fails
1357          // replace 4 with reg->num_components
1358          uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
1359          regToLmemOffset[reg->index] = info->bin.tlsSpace;
1360          info->bin.tlsSpace += size;
1361       }
1362    }
1363
1364    nir_index_ssa_defs(function->impl);
1365    foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1366       if (!visit(node))
1367          return false;
1368    }
1369
1370    bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1371    setPosition(exit, true);
1372
1373    if ((prog->getType() == Program::TYPE_VERTEX ||
1374         prog->getType() == Program::TYPE_TESSELLATION_EVAL)
1375        && info->io.genUserClip > 0)
1376       handleUserClipPlanes();
1377
1378    // TODO: for non main function this needs to be a OP_RETURN
1379    mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1380    return true;
1381 }
1382
1383 bool
1384 Converter::visit(nir_cf_node *node)
1385 {
1386    switch (node->type) {
1387    case nir_cf_node_block:
1388       return visit(nir_cf_node_as_block(node));
1389    case nir_cf_node_if:
1390       return visit(nir_cf_node_as_if(node));
1391    case nir_cf_node_loop:
1392       return visit(nir_cf_node_as_loop(node));
1393    default:
1394       ERROR("unknown nir_cf_node type %u\n", node->type);
1395       return false;
1396    }
1397 }
1398
1399 bool
1400 Converter::visit(nir_block *block)
1401 {
1402    if (!block->predecessors->entries && block->instr_list.is_empty())
1403       return true;
1404
1405    BasicBlock *bb = convert(block);
1406
1407    setPosition(bb, true);
1408    nir_foreach_instr(insn, block) {
1409       if (!visit(insn))
1410          return false;
1411    }
1412    return true;
1413 }
1414
1415 bool
1416 Converter::visit(nir_if *nif)
1417 {
1418    DataType sType = getSType(nif->condition, false, false);
1419    Value *src = getSrc(&nif->condition, 0);
1420
1421    nir_block *lastThen = nir_if_last_then_block(nif);
1422    nir_block *lastElse = nir_if_last_else_block(nif);
1423
1424    assert(!lastThen->successors[1]);
1425    assert(!lastElse->successors[1]);
1426
1427    BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1428    BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1429
1430    bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1431    bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1432
1433    // we only insert joinats, if both nodes end up at the end of the if again.
1434    // the reason for this to not happens are breaks/continues/ret/... which
1435    // have their own handling
1436    if (lastThen->successors[0] == lastElse->successors[0])
1437       bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1438                           CC_ALWAYS, NULL);
1439
1440    mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1441
1442    foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1443       if (!visit(node))
1444          return false;
1445    }
1446    setPosition(convert(lastThen), true);
1447    if (!bb->getExit() ||
1448        !bb->getExit()->asFlow() ||
1449         bb->getExit()->asFlow()->op == OP_JOIN) {
1450       BasicBlock *tailBB = convert(lastThen->successors[0]);
1451       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1452       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1453    }
1454
1455    foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1456       if (!visit(node))
1457          return false;
1458    }
1459    setPosition(convert(lastElse), true);
1460    if (!bb->getExit() ||
1461        !bb->getExit()->asFlow() ||
1462         bb->getExit()->asFlow()->op == OP_JOIN) {
1463       BasicBlock *tailBB = convert(lastElse->successors[0]);
1464       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1465       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1466    }
1467
1468    if (lastThen->successors[0] == lastElse->successors[0]) {
1469       setPosition(convert(lastThen->successors[0]), true);
1470       mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1471    }
1472
1473    return true;
1474 }
1475
1476 bool
1477 Converter::visit(nir_loop *loop)
1478 {
1479    curLoopDepth += 1;
1480    func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1481
1482    BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1483    BasicBlock *tailBB =
1484       convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1485    bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1486
1487    mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1488    setPosition(loopBB, false);
1489    mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1490
1491    foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1492       if (!visit(node))
1493          return false;
1494    }
1495    Instruction *insn = bb->getExit();
1496    if (bb->cfg.incidentCount() != 0) {
1497       if (!insn || !insn->asFlow()) {
1498          mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1499          bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1500       } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1501                  tailBB->cfg.incidentCount() == 0) {
1502          // RA doesn't like having blocks around with no incident edge,
1503          // so we create a fake one to make it happy
1504          bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1505       }
1506    }
1507
1508    curLoopDepth -= 1;
1509
1510    return true;
1511 }
1512
1513 bool
1514 Converter::visit(nir_instr *insn)
1515 {
1516    // we need an insertion point for on the fly generated immediate loads
1517    immInsertPos = bb->getExit();
1518    switch (insn->type) {
1519    case nir_instr_type_alu:
1520       return visit(nir_instr_as_alu(insn));
1521    case nir_instr_type_intrinsic:
1522       return visit(nir_instr_as_intrinsic(insn));
1523    case nir_instr_type_jump:
1524       return visit(nir_instr_as_jump(insn));
1525    case nir_instr_type_load_const:
1526       return visit(nir_instr_as_load_const(insn));
1527    case nir_instr_type_ssa_undef:
1528       return visit(nir_instr_as_ssa_undef(insn));
1529    case nir_instr_type_tex:
1530       return visit(nir_instr_as_tex(insn));
1531    default:
1532       ERROR("unknown nir_instr type %u\n", insn->type);
1533       return false;
1534    }
1535    return true;
1536 }
1537
1538 SVSemantic
1539 Converter::convert(nir_intrinsic_op intr)
1540 {
1541    switch (intr) {
1542    case nir_intrinsic_load_base_vertex:
1543       return SV_BASEVERTEX;
1544    case nir_intrinsic_load_base_instance:
1545       return SV_BASEINSTANCE;
1546    case nir_intrinsic_load_draw_id:
1547       return SV_DRAWID;
1548    case nir_intrinsic_load_front_face:
1549       return SV_FACE;
1550    case nir_intrinsic_load_helper_invocation:
1551       return SV_THREAD_KILL;
1552    case nir_intrinsic_load_instance_id:
1553       return SV_INSTANCE_ID;
1554    case nir_intrinsic_load_invocation_id:
1555       return SV_INVOCATION_ID;
1556    case nir_intrinsic_load_local_group_size:
1557       return SV_NTID;
1558    case nir_intrinsic_load_local_invocation_id:
1559       return SV_TID;
1560    case nir_intrinsic_load_num_work_groups:
1561       return SV_NCTAID;
1562    case nir_intrinsic_load_patch_vertices_in:
1563       return SV_VERTEX_COUNT;
1564    case nir_intrinsic_load_primitive_id:
1565       return SV_PRIMITIVE_ID;
1566    case nir_intrinsic_load_sample_id:
1567       return SV_SAMPLE_INDEX;
1568    case nir_intrinsic_load_sample_mask_in:
1569       return SV_SAMPLE_MASK;
1570    case nir_intrinsic_load_sample_pos:
1571       return SV_SAMPLE_POS;
1572    case nir_intrinsic_load_subgroup_eq_mask:
1573       return SV_LANEMASK_EQ;
1574    case nir_intrinsic_load_subgroup_ge_mask:
1575       return SV_LANEMASK_GE;
1576    case nir_intrinsic_load_subgroup_gt_mask:
1577       return SV_LANEMASK_GT;
1578    case nir_intrinsic_load_subgroup_le_mask:
1579       return SV_LANEMASK_LE;
1580    case nir_intrinsic_load_subgroup_lt_mask:
1581       return SV_LANEMASK_LT;
1582    case nir_intrinsic_load_subgroup_invocation:
1583       return SV_LANEID;
1584    case nir_intrinsic_load_tess_coord:
1585       return SV_TESS_COORD;
1586    case nir_intrinsic_load_tess_level_inner:
1587       return SV_TESS_INNER;
1588    case nir_intrinsic_load_tess_level_outer:
1589       return SV_TESS_OUTER;
1590    case nir_intrinsic_load_vertex_id:
1591       return SV_VERTEX_ID;
1592    case nir_intrinsic_load_work_group_id:
1593       return SV_CTAID;
1594    default:
1595       ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1596             nir_intrinsic_infos[intr].name);
1597       assert(false);
1598       return SV_LAST;
1599    }
1600 }
1601
1602 bool
1603 Converter::visit(nir_intrinsic_instr *insn)
1604 {
1605    nir_intrinsic_op op = insn->intrinsic;
1606    const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1607    unsigned dest_components = nir_intrinsic_dest_components(insn);
1608
1609    switch (op) {
1610    case nir_intrinsic_load_uniform: {
1611       LValues &newDefs = convert(&insn->dest);
1612       const DataType dType = getDType(insn);
1613       Value *indirect;
1614       uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1615       for (uint8_t i = 0; i < dest_components; ++i) {
1616          loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1617       }
1618       break;
1619    }
1620    case nir_intrinsic_store_output:
1621    case nir_intrinsic_store_per_vertex_output: {
1622       Value *indirect;
1623       DataType dType = getSType(insn->src[0], false, false);
1624       uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1625
1626       for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
1627          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1628             continue;
1629
1630          uint8_t offset = 0;
1631          Value *src = getSrc(&insn->src[0], i);
1632          switch (prog->getType()) {
1633          case Program::TYPE_FRAGMENT: {
1634             if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1635                // TGSI uses a different interface than NIR, TGSI stores that
1636                // value in the z component, NIR in X
1637                offset += 2;
1638                src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1639             }
1640             break;
1641          }
1642          case Program::TYPE_GEOMETRY:
1643          case Program::TYPE_VERTEX: {
1644             if (info->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) {
1645                mkMov(clipVtx[i], src);
1646                src = clipVtx[i];
1647             }
1648             break;
1649          }
1650          default:
1651             break;
1652          }
1653
1654          storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1655       }
1656       break;
1657    }
1658    case nir_intrinsic_load_input:
1659    case nir_intrinsic_load_interpolated_input:
1660    case nir_intrinsic_load_output: {
1661       LValues &newDefs = convert(&insn->dest);
1662
1663       // FBFetch
1664       if (prog->getType() == Program::TYPE_FRAGMENT &&
1665           op == nir_intrinsic_load_output) {
1666          std::vector<Value*> defs, srcs;
1667          uint8_t mask = 0;
1668
1669          srcs.push_back(getSSA());
1670          srcs.push_back(getSSA());
1671          Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1672          Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1673          mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1674          mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1675
1676          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1677          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1678
1679          for (uint8_t i = 0u; i < dest_components; ++i) {
1680             defs.push_back(newDefs[i]);
1681             mask |= 1 << i;
1682          }
1683
1684          TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1685          texi->tex.levelZero = 1;
1686          texi->tex.mask = mask;
1687          texi->tex.useOffsets = 0;
1688          texi->tex.r = 0xffff;
1689          texi->tex.s = 0xffff;
1690
1691          info->prop.fp.readsFramebuffer = true;
1692          break;
1693       }
1694
1695       const DataType dType = getDType(insn);
1696       Value *indirect;
1697       bool input = op != nir_intrinsic_load_output;
1698       operation nvirOp;
1699       uint32_t mode = 0;
1700
1701       uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1702       nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
1703
1704       // see load_barycentric_* handling
1705       if (prog->getType() == Program::TYPE_FRAGMENT) {
1706          mode = translateInterpMode(&vary, nvirOp);
1707          if (op == nir_intrinsic_load_interpolated_input) {
1708             ImmediateValue immMode;
1709             if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1710                mode |= immMode.reg.data.u32;
1711          }
1712       }
1713
1714       for (uint8_t i = 0u; i < dest_components; ++i) {
1715          uint32_t address = getSlotAddress(insn, idx, i);
1716          Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
1717          if (prog->getType() == Program::TYPE_FRAGMENT) {
1718             int s = 1;
1719             if (typeSizeof(dType) == 8) {
1720                Value *lo = getSSA();
1721                Value *hi = getSSA();
1722                Instruction *interp;
1723
1724                interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1725                if (nvirOp == OP_PINTERP)
1726                   interp->setSrc(s++, fp.position);
1727                if (mode & NV50_IR_INTERP_OFFSET)
1728                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1729                interp->setInterpolate(mode);
1730                interp->setIndirect(0, 0, indirect);
1731
1732                Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
1733                interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1734                if (nvirOp == OP_PINTERP)
1735                   interp->setSrc(s++, fp.position);
1736                if (mode & NV50_IR_INTERP_OFFSET)
1737                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1738                interp->setInterpolate(mode);
1739                interp->setIndirect(0, 0, indirect);
1740
1741                mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
1742             } else {
1743                Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
1744                if (nvirOp == OP_PINTERP)
1745                   interp->setSrc(s++, fp.position);
1746                if (mode & NV50_IR_INTERP_OFFSET)
1747                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1748                interp->setInterpolate(mode);
1749                interp->setIndirect(0, 0, indirect);
1750             }
1751          } else {
1752             mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
1753          }
1754       }
1755       break;
1756    }
1757    case nir_intrinsic_load_kernel_input: {
1758       assert(prog->getType() == Program::TYPE_COMPUTE);
1759       assert(insn->num_components == 1);
1760
1761       LValues &newDefs = convert(&insn->dest);
1762       const DataType dType = getDType(insn);
1763       Value *indirect;
1764       uint32_t idx = getIndirect(insn, 0, 0, indirect, true);
1765
1766       mkLoad(dType, newDefs[0], mkSymbol(FILE_SHADER_INPUT, 0, dType, idx), indirect);
1767       break;
1768    }
1769    case nir_intrinsic_load_barycentric_at_offset:
1770    case nir_intrinsic_load_barycentric_at_sample:
1771    case nir_intrinsic_load_barycentric_centroid:
1772    case nir_intrinsic_load_barycentric_pixel:
1773    case nir_intrinsic_load_barycentric_sample: {
1774       LValues &newDefs = convert(&insn->dest);
1775       uint32_t mode;
1776
1777       if (op == nir_intrinsic_load_barycentric_centroid ||
1778           op == nir_intrinsic_load_barycentric_sample) {
1779          mode = NV50_IR_INTERP_CENTROID;
1780       } else if (op == nir_intrinsic_load_barycentric_at_offset) {
1781          Value *offs[2];
1782          for (uint8_t c = 0; c < 2; c++) {
1783             offs[c] = getScratch();
1784             mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
1785             mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
1786             mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
1787             mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
1788          }
1789          mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
1790
1791          mode = NV50_IR_INTERP_OFFSET;
1792       } else if (op == nir_intrinsic_load_barycentric_pixel) {
1793          mode = NV50_IR_INTERP_DEFAULT;
1794       } else if (op == nir_intrinsic_load_barycentric_at_sample) {
1795          info->prop.fp.readsSampleLocations = true;
1796          mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
1797          mode = NV50_IR_INTERP_OFFSET;
1798       } else {
1799          unreachable("all intrinsics already handled above");
1800       }
1801
1802       loadImm(newDefs[1], mode);
1803       break;
1804    }
1805    case nir_intrinsic_discard:
1806       mkOp(OP_DISCARD, TYPE_NONE, NULL);
1807       break;
1808    case nir_intrinsic_discard_if: {
1809       Value *pred = getSSA(1, FILE_PREDICATE);
1810       if (insn->num_components > 1) {
1811          ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
1812          assert(false);
1813          return false;
1814       }
1815       mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1816       mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
1817       break;
1818    }
1819    case nir_intrinsic_load_base_vertex:
1820    case nir_intrinsic_load_base_instance:
1821    case nir_intrinsic_load_draw_id:
1822    case nir_intrinsic_load_front_face:
1823    case nir_intrinsic_load_helper_invocation:
1824    case nir_intrinsic_load_instance_id:
1825    case nir_intrinsic_load_invocation_id:
1826    case nir_intrinsic_load_local_group_size:
1827    case nir_intrinsic_load_local_invocation_id:
1828    case nir_intrinsic_load_num_work_groups:
1829    case nir_intrinsic_load_patch_vertices_in:
1830    case nir_intrinsic_load_primitive_id:
1831    case nir_intrinsic_load_sample_id:
1832    case nir_intrinsic_load_sample_mask_in:
1833    case nir_intrinsic_load_sample_pos:
1834    case nir_intrinsic_load_subgroup_eq_mask:
1835    case nir_intrinsic_load_subgroup_ge_mask:
1836    case nir_intrinsic_load_subgroup_gt_mask:
1837    case nir_intrinsic_load_subgroup_le_mask:
1838    case nir_intrinsic_load_subgroup_lt_mask:
1839    case nir_intrinsic_load_subgroup_invocation:
1840    case nir_intrinsic_load_tess_coord:
1841    case nir_intrinsic_load_tess_level_inner:
1842    case nir_intrinsic_load_tess_level_outer:
1843    case nir_intrinsic_load_vertex_id:
1844    case nir_intrinsic_load_work_group_id: {
1845       const DataType dType = getDType(insn);
1846       SVSemantic sv = convert(op);
1847       LValues &newDefs = convert(&insn->dest);
1848
1849       for (uint8_t i = 0u; i < nir_intrinsic_dest_components(insn); ++i) {
1850          Value *def;
1851          if (typeSizeof(dType) == 8)
1852             def = getSSA();
1853          else
1854             def = newDefs[i];
1855
1856          if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
1857             loadImm(def, 0u);
1858          } else {
1859             Symbol *sym = mkSysVal(sv, i);
1860             Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
1861             if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
1862                rdsv->perPatch = 1;
1863          }
1864
1865          if (typeSizeof(dType) == 8)
1866             mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
1867       }
1868       break;
1869    }
1870    // constants
1871    case nir_intrinsic_load_subgroup_size: {
1872       LValues &newDefs = convert(&insn->dest);
1873       loadImm(newDefs[0], 32u);
1874       break;
1875    }
1876    case nir_intrinsic_vote_all:
1877    case nir_intrinsic_vote_any:
1878    case nir_intrinsic_vote_ieq: {
1879       LValues &newDefs = convert(&insn->dest);
1880       Value *pred = getScratch(1, FILE_PREDICATE);
1881       mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1882       mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
1883       mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
1884       break;
1885    }
1886    case nir_intrinsic_ballot: {
1887       LValues &newDefs = convert(&insn->dest);
1888       Value *pred = getSSA(1, FILE_PREDICATE);
1889       mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1890       mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
1891       break;
1892    }
1893    case nir_intrinsic_read_first_invocation:
1894    case nir_intrinsic_read_invocation: {
1895       LValues &newDefs = convert(&insn->dest);
1896       const DataType dType = getDType(insn);
1897       Value *tmp = getScratch();
1898
1899       if (op == nir_intrinsic_read_first_invocation) {
1900          mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
1901          mkOp1(OP_BREV, TYPE_U32, tmp, tmp);
1902          mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
1903       } else
1904          tmp = getSrc(&insn->src[1], 0);
1905
1906       for (uint8_t i = 0; i < dest_components; ++i) {
1907          mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
1908             ->subOp = NV50_IR_SUBOP_SHFL_IDX;
1909       }
1910       break;
1911    }
1912    case nir_intrinsic_load_per_vertex_input: {
1913       const DataType dType = getDType(insn);
1914       LValues &newDefs = convert(&insn->dest);
1915       Value *indirectVertex;
1916       Value *indirectOffset;
1917       uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
1918       uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
1919
1920       Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
1921                               mkImm(baseVertex), indirectVertex);
1922       for (uint8_t i = 0u; i < dest_components; ++i) {
1923          uint32_t address = getSlotAddress(insn, idx, i);
1924          loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
1925                   indirectOffset, vtxBase, info->in[idx].patch);
1926       }
1927       break;
1928    }
1929    case nir_intrinsic_load_per_vertex_output: {
1930       const DataType dType = getDType(insn);
1931       LValues &newDefs = convert(&insn->dest);
1932       Value *indirectVertex;
1933       Value *indirectOffset;
1934       uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
1935       uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
1936       Value *vtxBase = NULL;
1937
1938       if (indirectVertex)
1939          vtxBase = indirectVertex;
1940       else
1941          vtxBase = loadImm(NULL, baseVertex);
1942
1943       vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
1944
1945       for (uint8_t i = 0u; i < dest_components; ++i) {
1946          uint32_t address = getSlotAddress(insn, idx, i);
1947          loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
1948                   indirectOffset, vtxBase, info->in[idx].patch);
1949       }
1950       break;
1951    }
1952    case nir_intrinsic_emit_vertex:
1953       if (info->io.genUserClip > 0)
1954          handleUserClipPlanes();
1955       // fallthrough
1956    case nir_intrinsic_end_primitive: {
1957       uint32_t idx = nir_intrinsic_stream_id(insn);
1958       mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
1959       break;
1960    }
1961    case nir_intrinsic_load_ubo: {
1962       const DataType dType = getDType(insn);
1963       LValues &newDefs = convert(&insn->dest);
1964       Value *indirectIndex;
1965       Value *indirectOffset;
1966       uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
1967       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
1968
1969       for (uint8_t i = 0u; i < dest_components; ++i) {
1970          loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
1971                   indirectOffset, indirectIndex);
1972       }
1973       break;
1974    }
1975    case nir_intrinsic_get_buffer_size: {
1976       LValues &newDefs = convert(&insn->dest);
1977       const DataType dType = getDType(insn);
1978       Value *indirectBuffer;
1979       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
1980
1981       Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
1982       mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
1983       break;
1984    }
1985    case nir_intrinsic_store_ssbo: {
1986       DataType sType = getSType(insn->src[0], false, false);
1987       Value *indirectBuffer;
1988       Value *indirectOffset;
1989       uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
1990       uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
1991
1992       for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
1993          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1994             continue;
1995          Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
1996                                 offset + i * typeSizeof(sType));
1997          mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))
1998             ->setIndirect(0, 1, indirectBuffer);
1999       }
2000       info->io.globalAccess |= 0x2;
2001       break;
2002    }
2003    case nir_intrinsic_load_ssbo: {
2004       const DataType dType = getDType(insn);
2005       LValues &newDefs = convert(&insn->dest);
2006       Value *indirectBuffer;
2007       Value *indirectOffset;
2008       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2009       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2010
2011       for (uint8_t i = 0u; i < dest_components; ++i)
2012          loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
2013                   indirectOffset, indirectBuffer);
2014
2015       info->io.globalAccess |= 0x1;
2016       break;
2017    }
2018    case nir_intrinsic_shared_atomic_add:
2019    case nir_intrinsic_shared_atomic_and:
2020    case nir_intrinsic_shared_atomic_comp_swap:
2021    case nir_intrinsic_shared_atomic_exchange:
2022    case nir_intrinsic_shared_atomic_or:
2023    case nir_intrinsic_shared_atomic_imax:
2024    case nir_intrinsic_shared_atomic_imin:
2025    case nir_intrinsic_shared_atomic_umax:
2026    case nir_intrinsic_shared_atomic_umin:
2027    case nir_intrinsic_shared_atomic_xor: {
2028       const DataType dType = getDType(insn);
2029       LValues &newDefs = convert(&insn->dest);
2030       Value *indirectOffset;
2031       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2032       Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2033       Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2034       if (op == nir_intrinsic_shared_atomic_comp_swap)
2035          atom->setSrc(2, getSrc(&insn->src[2], 0));
2036       atom->setIndirect(0, 0, indirectOffset);
2037       atom->subOp = getSubOp(op);
2038       break;
2039    }
2040    case nir_intrinsic_ssbo_atomic_add:
2041    case nir_intrinsic_ssbo_atomic_and:
2042    case nir_intrinsic_ssbo_atomic_comp_swap:
2043    case nir_intrinsic_ssbo_atomic_exchange:
2044    case nir_intrinsic_ssbo_atomic_or:
2045    case nir_intrinsic_ssbo_atomic_imax:
2046    case nir_intrinsic_ssbo_atomic_imin:
2047    case nir_intrinsic_ssbo_atomic_umax:
2048    case nir_intrinsic_ssbo_atomic_umin:
2049    case nir_intrinsic_ssbo_atomic_xor: {
2050       const DataType dType = getDType(insn);
2051       LValues &newDefs = convert(&insn->dest);
2052       Value *indirectBuffer;
2053       Value *indirectOffset;
2054       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2055       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2056
2057       Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2058       Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2059                                 getSrc(&insn->src[2], 0));
2060       if (op == nir_intrinsic_ssbo_atomic_comp_swap)
2061          atom->setSrc(2, getSrc(&insn->src[3], 0));
2062       atom->setIndirect(0, 0, indirectOffset);
2063       atom->setIndirect(0, 1, indirectBuffer);
2064       atom->subOp = getSubOp(op);
2065
2066       info->io.globalAccess |= 0x2;
2067       break;
2068    }
2069    case nir_intrinsic_global_atomic_add:
2070    case nir_intrinsic_global_atomic_and:
2071    case nir_intrinsic_global_atomic_comp_swap:
2072    case nir_intrinsic_global_atomic_exchange:
2073    case nir_intrinsic_global_atomic_or:
2074    case nir_intrinsic_global_atomic_imax:
2075    case nir_intrinsic_global_atomic_imin:
2076    case nir_intrinsic_global_atomic_umax:
2077    case nir_intrinsic_global_atomic_umin:
2078    case nir_intrinsic_global_atomic_xor: {
2079       const DataType dType = getDType(insn);
2080       LValues &newDefs = convert(&insn->dest);
2081       Value *address;
2082       uint32_t offset = getIndirect(&insn->src[0], 0, address);
2083
2084       Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, dType, offset);
2085       Instruction *atom =
2086          mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2087       atom->setIndirect(0, 0, address);
2088       atom->subOp = getSubOp(op);
2089
2090       info->io.globalAccess |= 0x2;
2091       break;
2092    }
2093    case nir_intrinsic_bindless_image_atomic_add:
2094    case nir_intrinsic_bindless_image_atomic_and:
2095    case nir_intrinsic_bindless_image_atomic_comp_swap:
2096    case nir_intrinsic_bindless_image_atomic_exchange:
2097    case nir_intrinsic_bindless_image_atomic_imax:
2098    case nir_intrinsic_bindless_image_atomic_umax:
2099    case nir_intrinsic_bindless_image_atomic_imin:
2100    case nir_intrinsic_bindless_image_atomic_umin:
2101    case nir_intrinsic_bindless_image_atomic_or:
2102    case nir_intrinsic_bindless_image_atomic_xor:
2103    case nir_intrinsic_bindless_image_atomic_inc_wrap:
2104    case nir_intrinsic_bindless_image_atomic_dec_wrap:
2105    case nir_intrinsic_bindless_image_load:
2106    case nir_intrinsic_bindless_image_samples:
2107    case nir_intrinsic_bindless_image_size:
2108    case nir_intrinsic_bindless_image_store:
2109    case nir_intrinsic_image_atomic_add:
2110    case nir_intrinsic_image_atomic_and:
2111    case nir_intrinsic_image_atomic_comp_swap:
2112    case nir_intrinsic_image_atomic_exchange:
2113    case nir_intrinsic_image_atomic_imax:
2114    case nir_intrinsic_image_atomic_umax:
2115    case nir_intrinsic_image_atomic_imin:
2116    case nir_intrinsic_image_atomic_umin:
2117    case nir_intrinsic_image_atomic_or:
2118    case nir_intrinsic_image_atomic_xor:
2119    case nir_intrinsic_image_atomic_inc_wrap:
2120    case nir_intrinsic_image_atomic_dec_wrap:
2121    case nir_intrinsic_image_load:
2122    case nir_intrinsic_image_samples:
2123    case nir_intrinsic_image_size:
2124    case nir_intrinsic_image_store: {
2125       std::vector<Value*> srcs, defs;
2126       Value *indirect;
2127       DataType ty;
2128
2129       uint32_t mask = 0;
2130       TexInstruction::Target target =
2131          convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);
2132       unsigned int argCount = getNIRArgCount(target);
2133       uint16_t location = 0;
2134
2135       if (opInfo.has_dest) {
2136          LValues &newDefs = convert(&insn->dest);
2137          for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2138             defs.push_back(newDefs[i]);
2139             mask |= 1 << i;
2140          }
2141       }
2142
2143       int lod_src = -1;
2144       bool bindless = false;
2145       switch (op) {
2146       case nir_intrinsic_bindless_image_atomic_add:
2147       case nir_intrinsic_bindless_image_atomic_and:
2148       case nir_intrinsic_bindless_image_atomic_comp_swap:
2149       case nir_intrinsic_bindless_image_atomic_exchange:
2150       case nir_intrinsic_bindless_image_atomic_imax:
2151       case nir_intrinsic_bindless_image_atomic_umax:
2152       case nir_intrinsic_bindless_image_atomic_imin:
2153       case nir_intrinsic_bindless_image_atomic_umin:
2154       case nir_intrinsic_bindless_image_atomic_or:
2155       case nir_intrinsic_bindless_image_atomic_xor:
2156       case nir_intrinsic_bindless_image_atomic_inc_wrap:
2157       case nir_intrinsic_bindless_image_atomic_dec_wrap:
2158          ty = getDType(insn);
2159          bindless = true;
2160          info->io.globalAccess |= 0x2;
2161          mask = 0x1;
2162          break;
2163       case nir_intrinsic_image_atomic_add:
2164       case nir_intrinsic_image_atomic_and:
2165       case nir_intrinsic_image_atomic_comp_swap:
2166       case nir_intrinsic_image_atomic_exchange:
2167       case nir_intrinsic_image_atomic_imax:
2168       case nir_intrinsic_image_atomic_umax:
2169       case nir_intrinsic_image_atomic_imin:
2170       case nir_intrinsic_image_atomic_umin:
2171       case nir_intrinsic_image_atomic_or:
2172       case nir_intrinsic_image_atomic_xor:
2173       case nir_intrinsic_image_atomic_inc_wrap:
2174       case nir_intrinsic_image_atomic_dec_wrap:
2175          ty = getDType(insn);
2176          bindless = false;
2177          info->io.globalAccess |= 0x2;
2178          mask = 0x1;
2179          break;
2180       case nir_intrinsic_bindless_image_load:
2181       case nir_intrinsic_image_load:
2182          ty = TYPE_U32;
2183          bindless = op == nir_intrinsic_bindless_image_load;
2184          info->io.globalAccess |= 0x1;
2185          lod_src = 4;
2186          break;
2187       case nir_intrinsic_bindless_image_store:
2188       case nir_intrinsic_image_store:
2189          ty = TYPE_U32;
2190          bindless = op == nir_intrinsic_bindless_image_store;
2191          info->io.globalAccess |= 0x2;
2192          lod_src = 5;
2193          mask = 0xf;
2194          break;
2195       case nir_intrinsic_bindless_image_samples:
2196       case nir_intrinsic_image_samples:
2197          ty = TYPE_U32;
2198          bindless = op == nir_intrinsic_bindless_image_samples;
2199          mask = 0x8;
2200          break;
2201       case nir_intrinsic_bindless_image_size:
2202       case nir_intrinsic_image_size:
2203          ty = TYPE_U32;
2204          bindless = op == nir_intrinsic_bindless_image_size;
2205          break;
2206       default:
2207          unreachable("unhandled image opcode");
2208          break;
2209       }
2210
2211       if (bindless)
2212          indirect = getSrc(&insn->src[0], 0);
2213       else
2214          location = getIndirect(&insn->src[0], 0, indirect);
2215
2216       // coords
2217       if (opInfo.num_srcs >= 2)
2218          for (unsigned int i = 0u; i < argCount; ++i)
2219             srcs.push_back(getSrc(&insn->src[1], i));
2220
2221       // the sampler is just another src added after coords
2222       if (opInfo.num_srcs >= 3 && target.isMS())
2223          srcs.push_back(getSrc(&insn->src[2], 0));
2224
2225       if (opInfo.num_srcs >= 4 && lod_src != 4) {
2226          unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2227          for (uint8_t i = 0u; i < components; ++i)
2228             srcs.push_back(getSrc(&insn->src[3], i));
2229       }
2230
2231       if (opInfo.num_srcs >= 5 && lod_src != 5)
2232          // 1 for aotmic swap
2233          for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2234             srcs.push_back(getSrc(&insn->src[4], i));
2235
2236       TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2237       texi->tex.bindless = bindless;
2238       texi->tex.format = nv50_ir::TexInstruction::translateImgFormat(nir_intrinsic_format(insn));
2239       texi->tex.mask = mask;
2240       texi->cache = convert(nir_intrinsic_access(insn));
2241       texi->setType(ty);
2242       texi->subOp = getSubOp(op);
2243
2244       if (indirect)
2245          texi->setIndirectR(indirect);
2246
2247       break;
2248    }
2249    case nir_intrinsic_store_shared: {
2250       DataType sType = getSType(insn->src[0], false, false);
2251       Value *indirectOffset;
2252       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2253
2254       for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
2255          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2256             continue;
2257          Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
2258          mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2259       }
2260       break;
2261    }
2262    case nir_intrinsic_load_shared: {
2263       const DataType dType = getDType(insn);
2264       LValues &newDefs = convert(&insn->dest);
2265       Value *indirectOffset;
2266       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2267
2268       for (uint8_t i = 0u; i < dest_components; ++i)
2269          loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
2270
2271       break;
2272    }
2273    case nir_intrinsic_control_barrier: {
2274       // TODO: add flag to shader_info
2275       info->numBarriers = 1;
2276       Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2277       bar->fixed = 1;
2278       bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2279       break;
2280    }
2281    case nir_intrinsic_group_memory_barrier:
2282    case nir_intrinsic_memory_barrier:
2283    case nir_intrinsic_memory_barrier_buffer:
2284    case nir_intrinsic_memory_barrier_image:
2285    case nir_intrinsic_memory_barrier_shared: {
2286       Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2287       bar->fixed = 1;
2288       bar->subOp = getSubOp(op);
2289       break;
2290    }
2291    case nir_intrinsic_memory_barrier_tcs_patch:
2292       break;
2293    case nir_intrinsic_shader_clock: {
2294       const DataType dType = getDType(insn);
2295       LValues &newDefs = convert(&insn->dest);
2296
2297       loadImm(newDefs[0], 0u);
2298       mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2299       break;
2300    }
2301    case nir_intrinsic_load_global: {
2302       const DataType dType = getDType(insn);
2303       LValues &newDefs = convert(&insn->dest);
2304       Value *indirectOffset;
2305       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2306
2307       for (auto i = 0u; i < dest_components; ++i)
2308          loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset);
2309
2310       info->io.globalAccess |= 0x1;
2311       break;
2312    }
2313    case nir_intrinsic_store_global: {
2314       DataType sType = getSType(insn->src[0], false, false);
2315
2316       for (auto i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
2317          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2318             continue;
2319          if (typeSizeof(sType) == 8) {
2320             Value *split[2];
2321             mkSplit(split, 4, getSrc(&insn->src[0], i));
2322
2323             Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType));
2324             mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[0]);
2325
2326             sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType) + 4);
2327             mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[1]);
2328          } else {
2329             Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, sType, i * typeSizeof(sType));
2330             mkStore(OP_STORE, sType, sym, getSrc(&insn->src[1], 0), getSrc(&insn->src[0], i));
2331          }
2332       }
2333
2334       info->io.globalAccess |= 0x2;
2335       break;
2336    }
2337    default:
2338       ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2339       return false;
2340    }
2341
2342    return true;
2343 }
2344
2345 bool
2346 Converter::visit(nir_jump_instr *insn)
2347 {
2348    switch (insn->type) {
2349    case nir_jump_return:
2350       // TODO: this only works in the main function
2351       mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2352       bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2353       break;
2354    case nir_jump_break:
2355    case nir_jump_continue: {
2356       bool isBreak = insn->type == nir_jump_break;
2357       nir_block *block = insn->instr.block;
2358       assert(!block->successors[1]);
2359       BasicBlock *target = convert(block->successors[0]);
2360       mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2361       bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2362       break;
2363    }
2364    default:
2365       ERROR("unknown nir_jump_type %u\n", insn->type);
2366       return false;
2367    }
2368
2369    return true;
2370 }
2371
2372 Value*
2373 Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2374 {
2375    Value *val;
2376
2377    if (immInsertPos)
2378       setPosition(immInsertPos, true);
2379    else
2380       setPosition(bb, false);
2381
2382    switch (insn->def.bit_size) {
2383    case 64:
2384       val = loadImm(getSSA(8), insn->value[idx].u64);
2385       break;
2386    case 32:
2387       val = loadImm(getSSA(4), insn->value[idx].u32);
2388       break;
2389    case 16:
2390       val = loadImm(getSSA(2), insn->value[idx].u16);
2391       break;
2392    case 8:
2393       val = loadImm(getSSA(1), insn->value[idx].u8);
2394       break;
2395    default:
2396       unreachable("unhandled bit size!\n");
2397    }
2398    setPosition(bb, true);
2399    return val;
2400 }
2401
2402 bool
2403 Converter::visit(nir_load_const_instr *insn)
2404 {
2405    assert(insn->def.bit_size <= 64);
2406    immediates[insn->def.index] = insn;
2407    return true;
2408 }
2409
2410 #define DEFAULT_CHECKS \
2411       if (insn->dest.dest.ssa.num_components > 1) { \
2412          ERROR("nir_alu_instr only supported with 1 component!\n"); \
2413          return false; \
2414       } \
2415       if (insn->dest.write_mask != 1) { \
2416          ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2417          return false; \
2418       }
2419 bool
2420 Converter::visit(nir_alu_instr *insn)
2421 {
2422    const nir_op op = insn->op;
2423    const nir_op_info &info = nir_op_infos[op];
2424    DataType dType = getDType(insn);
2425    const std::vector<DataType> sTypes = getSTypes(insn);
2426
2427    Instruction *oldPos = this->bb->getExit();
2428
2429    switch (op) {
2430    case nir_op_fabs:
2431    case nir_op_iabs:
2432    case nir_op_fadd:
2433    case nir_op_iadd:
2434    case nir_op_iand:
2435    case nir_op_fceil:
2436    case nir_op_fcos:
2437    case nir_op_fddx:
2438    case nir_op_fddx_coarse:
2439    case nir_op_fddx_fine:
2440    case nir_op_fddy:
2441    case nir_op_fddy_coarse:
2442    case nir_op_fddy_fine:
2443    case nir_op_fdiv:
2444    case nir_op_idiv:
2445    case nir_op_udiv:
2446    case nir_op_fexp2:
2447    case nir_op_ffloor:
2448    case nir_op_ffma:
2449    case nir_op_flog2:
2450    case nir_op_fmax:
2451    case nir_op_imax:
2452    case nir_op_umax:
2453    case nir_op_fmin:
2454    case nir_op_imin:
2455    case nir_op_umin:
2456    case nir_op_fmod:
2457    case nir_op_imod:
2458    case nir_op_umod:
2459    case nir_op_fmul:
2460    case nir_op_imul:
2461    case nir_op_imul_high:
2462    case nir_op_umul_high:
2463    case nir_op_fneg:
2464    case nir_op_ineg:
2465    case nir_op_inot:
2466    case nir_op_ior:
2467    case nir_op_pack_64_2x32_split:
2468    case nir_op_fpow:
2469    case nir_op_frcp:
2470    case nir_op_frem:
2471    case nir_op_irem:
2472    case nir_op_frsq:
2473    case nir_op_fsat:
2474    case nir_op_ishr:
2475    case nir_op_ushr:
2476    case nir_op_fsin:
2477    case nir_op_fsqrt:
2478    case nir_op_ftrunc:
2479    case nir_op_ishl:
2480    case nir_op_ixor: {
2481       DEFAULT_CHECKS;
2482       LValues &newDefs = convert(&insn->dest);
2483       operation preOp = preOperationNeeded(op);
2484       if (preOp != OP_NOP) {
2485          assert(info.num_inputs < 2);
2486          Value *tmp = getSSA(typeSizeof(dType));
2487          Instruction *i0 = mkOp(preOp, dType, tmp);
2488          Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2489          if (info.num_inputs) {
2490             i0->setSrc(0, getSrc(&insn->src[0]));
2491             i1->setSrc(0, tmp);
2492          }
2493          i1->subOp = getSubOp(op);
2494       } else {
2495          Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2496          for (unsigned s = 0u; s < info.num_inputs; ++s) {
2497             i->setSrc(s, getSrc(&insn->src[s]));
2498          }
2499          i->subOp = getSubOp(op);
2500       }
2501       break;
2502    }
2503    case nir_op_ifind_msb:
2504    case nir_op_ufind_msb: {
2505       DEFAULT_CHECKS;
2506       LValues &newDefs = convert(&insn->dest);
2507       dType = sTypes[0];
2508       mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2509       break;
2510    }
2511    case nir_op_fround_even: {
2512       DEFAULT_CHECKS;
2513       LValues &newDefs = convert(&insn->dest);
2514       mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2515       break;
2516    }
2517    // convert instructions
2518    case nir_op_f2f32:
2519    case nir_op_f2i32:
2520    case nir_op_f2u32:
2521    case nir_op_i2f32:
2522    case nir_op_i2i32:
2523    case nir_op_u2f32:
2524    case nir_op_u2u32:
2525    case nir_op_f2f64:
2526    case nir_op_f2i64:
2527    case nir_op_f2u64:
2528    case nir_op_i2f64:
2529    case nir_op_i2i64:
2530    case nir_op_u2f64:
2531    case nir_op_u2u64: {
2532       DEFAULT_CHECKS;
2533       LValues &newDefs = convert(&insn->dest);
2534       Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2535       if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2536          i->rnd = ROUND_Z;
2537       i->sType = sTypes[0];
2538       break;
2539    }
2540    // compare instructions
2541    case nir_op_feq32:
2542    case nir_op_ieq32:
2543    case nir_op_fge32:
2544    case nir_op_ige32:
2545    case nir_op_uge32:
2546    case nir_op_flt32:
2547    case nir_op_ilt32:
2548    case nir_op_ult32:
2549    case nir_op_fne32:
2550    case nir_op_ine32: {
2551       DEFAULT_CHECKS;
2552       LValues &newDefs = convert(&insn->dest);
2553       Instruction *i = mkCmp(getOperation(op),
2554                              getCondCode(op),
2555                              dType,
2556                              newDefs[0],
2557                              dType,
2558                              getSrc(&insn->src[0]),
2559                              getSrc(&insn->src[1]));
2560       if (info.num_inputs == 3)
2561          i->setSrc(2, getSrc(&insn->src[2]));
2562       i->sType = sTypes[0];
2563       break;
2564    }
2565    // those are weird ALU ops and need special handling, because
2566    //   1. they are always componend based
2567    //   2. they basically just merge multiple values into one data type
2568    case nir_op_mov:
2569       if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) {
2570          nir_reg_dest& reg = insn->dest.dest.reg;
2571          uint32_t goffset = regToLmemOffset[reg.reg->index];
2572          uint8_t comps = reg.reg->num_components;
2573          uint8_t size = reg.reg->bit_size / 8;
2574          uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2575          uint32_t aoffset = csize * reg.base_offset;
2576          Value *indirect = NULL;
2577
2578          if (reg.indirect)
2579             indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
2580                               getSrc(reg.indirect, 0), mkImm(csize));
2581
2582          for (uint8_t i = 0u; i < comps; ++i) {
2583             if (!((1u << i) & insn->dest.write_mask))
2584                continue;
2585
2586             Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size);
2587             mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
2588          }
2589          break;
2590       } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) {
2591          LValues &newDefs = convert(&insn->dest);
2592          nir_reg_src& reg = insn->src[0].src.reg;
2593          uint32_t goffset = regToLmemOffset[reg.reg->index];
2594          // uint8_t comps = reg.reg->num_components;
2595          uint8_t size = reg.reg->bit_size / 8;
2596          uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2597          uint32_t aoffset = csize * reg.base_offset;
2598          Value *indirect = NULL;
2599
2600          if (reg.indirect)
2601             indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize));
2602
2603          for (uint8_t i = 0u; i < newDefs.size(); ++i)
2604             loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect);
2605
2606          break;
2607       } else {
2608          LValues &newDefs = convert(&insn->dest);
2609          for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2610             mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2611          }
2612       }
2613       break;
2614    case nir_op_vec2:
2615    case nir_op_vec3:
2616    case nir_op_vec4:
2617    case nir_op_vec8:
2618    case nir_op_vec16: {
2619       LValues &newDefs = convert(&insn->dest);
2620       for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2621          mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2622       }
2623       break;
2624    }
2625    // (un)pack
2626    case nir_op_pack_64_2x32: {
2627       LValues &newDefs = convert(&insn->dest);
2628       Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2629       merge->setSrc(0, getSrc(&insn->src[0], 0));
2630       merge->setSrc(1, getSrc(&insn->src[0], 1));
2631       break;
2632    }
2633    case nir_op_pack_half_2x16_split: {
2634       LValues &newDefs = convert(&insn->dest);
2635       Value *tmpH = getSSA();
2636       Value *tmpL = getSSA();
2637
2638       mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2639       mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2640       mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2641       break;
2642    }
2643    case nir_op_unpack_half_2x16_split_x:
2644    case nir_op_unpack_half_2x16_split_y: {
2645       LValues &newDefs = convert(&insn->dest);
2646       Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2647       if (op == nir_op_unpack_half_2x16_split_y)
2648          cvt->subOp = 1;
2649       break;
2650    }
2651    case nir_op_unpack_64_2x32: {
2652       LValues &newDefs = convert(&insn->dest);
2653       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2654       break;
2655    }
2656    case nir_op_unpack_64_2x32_split_x: {
2657       LValues &newDefs = convert(&insn->dest);
2658       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2659       break;
2660    }
2661    case nir_op_unpack_64_2x32_split_y: {
2662       LValues &newDefs = convert(&insn->dest);
2663       mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2664       break;
2665    }
2666    // special instructions
2667    case nir_op_fsign:
2668    case nir_op_isign: {
2669       DEFAULT_CHECKS;
2670       DataType iType;
2671       if (::isFloatType(dType))
2672          iType = TYPE_F32;
2673       else
2674          iType = TYPE_S32;
2675
2676       LValues &newDefs = convert(&insn->dest);
2677       LValue *val0 = getScratch();
2678       LValue *val1 = getScratch();
2679       mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2680       mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2681
2682       if (dType == TYPE_F64) {
2683          mkOp2(OP_SUB, iType, val0, val0, val1);
2684          mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2685       } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2686          mkOp2(OP_SUB, iType, val0, val1, val0);
2687          mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2688          mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2689       } else if (::isFloatType(dType))
2690          mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2691       else
2692          mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2693       break;
2694    }
2695    case nir_op_fcsel:
2696    case nir_op_b32csel: {
2697       DEFAULT_CHECKS;
2698       LValues &newDefs = convert(&insn->dest);
2699       mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2700       break;
2701    }
2702    case nir_op_ibitfield_extract:
2703    case nir_op_ubitfield_extract: {
2704       DEFAULT_CHECKS;
2705       Value *tmp = getSSA();
2706       LValues &newDefs = convert(&insn->dest);
2707       mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2708       mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2709       break;
2710    }
2711    case nir_op_bfm: {
2712       DEFAULT_CHECKS;
2713       LValues &newDefs = convert(&insn->dest);
2714       mkOp2(OP_BMSK, dType, newDefs[0], getSrc(&insn->src[1]), getSrc(&insn->src[0]))->subOp = NV50_IR_SUBOP_BMSK_W;
2715       break;
2716    }
2717    case nir_op_bitfield_insert: {
2718       DEFAULT_CHECKS;
2719       LValues &newDefs = convert(&insn->dest);
2720       LValue *temp = getSSA();
2721       mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2722       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2723       break;
2724    }
2725    case nir_op_bit_count: {
2726       DEFAULT_CHECKS;
2727       LValues &newDefs = convert(&insn->dest);
2728       mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
2729       break;
2730    }
2731    case nir_op_bitfield_reverse: {
2732       DEFAULT_CHECKS;
2733       LValues &newDefs = convert(&insn->dest);
2734       mkOp1(OP_BREV, TYPE_U32, newDefs[0], getSrc(&insn->src[0]));
2735       break;
2736    }
2737    case nir_op_find_lsb: {
2738       DEFAULT_CHECKS;
2739       LValues &newDefs = convert(&insn->dest);
2740       Value *tmp = getSSA();
2741       mkOp1(OP_BREV, TYPE_U32, tmp, getSrc(&insn->src[0]));
2742       mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2743       break;
2744    }
2745    case nir_op_extract_u8: {
2746       DEFAULT_CHECKS;
2747       LValues &newDefs = convert(&insn->dest);
2748       Value *prmt = getSSA();
2749       mkOp2(OP_OR, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x4440));
2750       mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2751       break;
2752    }
2753    case nir_op_extract_i8: {
2754       DEFAULT_CHECKS;
2755       LValues &newDefs = convert(&insn->dest);
2756       Value *prmt = getSSA();
2757       mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x1111), loadImm(NULL, 0x8880));
2758       mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2759       break;
2760    }
2761    case nir_op_extract_u16: {
2762       DEFAULT_CHECKS;
2763       LValues &newDefs = convert(&insn->dest);
2764       Value *prmt = getSSA();
2765       mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x22), loadImm(NULL, 0x4410));
2766       mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2767       break;
2768    }
2769    case nir_op_extract_i16: {
2770       DEFAULT_CHECKS;
2771       LValues &newDefs = convert(&insn->dest);
2772       Value *prmt = getSSA();
2773       mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x2222), loadImm(NULL, 0x9910));
2774       mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
2775       break;
2776    }
2777    case nir_op_urol: {
2778       DEFAULT_CHECKS;
2779       LValues &newDefs = convert(&insn->dest);
2780       mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),
2781             getSrc(&insn->src[1]), getSrc(&insn->src[0]))
2782          ->subOp = NV50_IR_SUBOP_SHF_L |
2783                    NV50_IR_SUBOP_SHF_W |
2784                    NV50_IR_SUBOP_SHF_HI;
2785       break;
2786    }
2787    case nir_op_uror: {
2788       DEFAULT_CHECKS;
2789       LValues &newDefs = convert(&insn->dest);
2790       mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),
2791             getSrc(&insn->src[1]), getSrc(&insn->src[0]))
2792          ->subOp = NV50_IR_SUBOP_SHF_R |
2793                    NV50_IR_SUBOP_SHF_W |
2794                    NV50_IR_SUBOP_SHF_LO;
2795       break;
2796    }
2797    // boolean conversions
2798    case nir_op_b2f32: {
2799       DEFAULT_CHECKS;
2800       LValues &newDefs = convert(&insn->dest);
2801       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
2802       break;
2803    }
2804    case nir_op_b2f64: {
2805       DEFAULT_CHECKS;
2806       LValues &newDefs = convert(&insn->dest);
2807       Value *tmp = getSSA(4);
2808       mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
2809       mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
2810       break;
2811    }
2812    case nir_op_f2b32:
2813    case nir_op_i2b32: {
2814       DEFAULT_CHECKS;
2815       LValues &newDefs = convert(&insn->dest);
2816       Value *src1;
2817       if (typeSizeof(sTypes[0]) == 8) {
2818          src1 = loadImm(getSSA(8), 0.0);
2819       } else {
2820          src1 = zero;
2821       }
2822       CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
2823       mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
2824       break;
2825    }
2826    case nir_op_b2i32: {
2827       DEFAULT_CHECKS;
2828       LValues &newDefs = convert(&insn->dest);
2829       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
2830       break;
2831    }
2832    case nir_op_b2i64: {
2833       DEFAULT_CHECKS;
2834       LValues &newDefs = convert(&insn->dest);
2835       LValue *def = getScratch();
2836       mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
2837       mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
2838       break;
2839    }
2840    default:
2841       ERROR("unknown nir_op %s\n", info.name);
2842       return false;
2843    }
2844
2845    if (!oldPos) {
2846       oldPos = this->bb->getEntry();
2847       oldPos->precise = insn->exact;
2848    }
2849
2850    if (unlikely(!oldPos))
2851       return true;
2852
2853    while (oldPos->next) {
2854       oldPos = oldPos->next;
2855       oldPos->precise = insn->exact;
2856    }
2857    oldPos->saturate = insn->dest.saturate;
2858
2859    return true;
2860 }
2861 #undef DEFAULT_CHECKS
2862
2863 bool
2864 Converter::visit(nir_ssa_undef_instr *insn)
2865 {
2866    LValues &newDefs = convert(&insn->def);
2867    for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
2868       mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
2869    }
2870    return true;
2871 }
2872
2873 #define CASE_SAMPLER(ty) \
2874    case GLSL_SAMPLER_DIM_ ## ty : \
2875       if (isArray && !isShadow) \
2876          return TEX_TARGET_ ## ty ## _ARRAY; \
2877       else if (!isArray && isShadow) \
2878          return TEX_TARGET_## ty ## _SHADOW; \
2879       else if (isArray && isShadow) \
2880          return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
2881       else \
2882          return TEX_TARGET_ ## ty
2883
2884 TexTarget
2885 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
2886 {
2887    switch (dim) {
2888    CASE_SAMPLER(1D);
2889    CASE_SAMPLER(2D);
2890    CASE_SAMPLER(CUBE);
2891    case GLSL_SAMPLER_DIM_3D:
2892       return TEX_TARGET_3D;
2893    case GLSL_SAMPLER_DIM_MS:
2894       if (isArray)
2895          return TEX_TARGET_2D_MS_ARRAY;
2896       return TEX_TARGET_2D_MS;
2897    case GLSL_SAMPLER_DIM_RECT:
2898       if (isShadow)
2899          return TEX_TARGET_RECT_SHADOW;
2900       return TEX_TARGET_RECT;
2901    case GLSL_SAMPLER_DIM_BUF:
2902       return TEX_TARGET_BUFFER;
2903    case GLSL_SAMPLER_DIM_EXTERNAL:
2904       return TEX_TARGET_2D;
2905    default:
2906       ERROR("unknown glsl_sampler_dim %u\n", dim);
2907       assert(false);
2908       return TEX_TARGET_COUNT;
2909    }
2910 }
2911 #undef CASE_SAMPLER
2912
2913 Value*
2914 Converter::applyProjection(Value *src, Value *proj)
2915 {
2916    if (!proj)
2917       return src;
2918    return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
2919 }
2920
2921 unsigned int
2922 Converter::getNIRArgCount(TexInstruction::Target& target)
2923 {
2924    unsigned int result = target.getArgCount();
2925    if (target.isCube() && target.isArray())
2926       result--;
2927    if (target.isMS())
2928       result--;
2929    return result;
2930 }
2931
2932 CacheMode
2933 Converter::convert(enum gl_access_qualifier access)
2934 {
2935    switch (access) {
2936    case ACCESS_VOLATILE:
2937       return CACHE_CV;
2938    case ACCESS_COHERENT:
2939       return CACHE_CG;
2940    default:
2941       return CACHE_CA;
2942    }
2943 }
2944
2945 bool
2946 Converter::visit(nir_tex_instr *insn)
2947 {
2948    switch (insn->op) {
2949    case nir_texop_lod:
2950    case nir_texop_query_levels:
2951    case nir_texop_tex:
2952    case nir_texop_texture_samples:
2953    case nir_texop_tg4:
2954    case nir_texop_txb:
2955    case nir_texop_txd:
2956    case nir_texop_txf:
2957    case nir_texop_txf_ms:
2958    case nir_texop_txl:
2959    case nir_texop_txs: {
2960       LValues &newDefs = convert(&insn->dest);
2961       std::vector<Value*> srcs;
2962       std::vector<Value*> defs;
2963       std::vector<nir_src*> offsets;
2964       uint8_t mask = 0;
2965       bool lz = false;
2966       Value *proj = NULL;
2967       TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
2968       operation op = getOperation(insn->op);
2969
2970       int r, s;
2971       int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
2972       int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
2973       int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
2974       int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
2975       int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
2976       int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
2977       int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
2978       int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
2979       int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
2980       int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
2981       int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
2982       int sampHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_handle);
2983       int texHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_handle);
2984
2985       bool bindless = sampHandleIdx != -1 || texHandleIdx != -1;
2986       assert((sampHandleIdx != -1) == (texHandleIdx != -1));
2987
2988       if (projIdx != -1)
2989          proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
2990
2991       srcs.resize(insn->coord_components);
2992       for (uint8_t i = 0u; i < insn->coord_components; ++i)
2993          srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
2994
2995       // sometimes we get less args than target.getArgCount, but codegen expects the latter
2996       if (insn->coord_components) {
2997          uint32_t argCount = target.getArgCount();
2998
2999          if (target.isMS())
3000             argCount -= 1;
3001
3002          for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
3003             srcs.push_back(getSSA());
3004       }
3005
3006       if (insn->op == nir_texop_texture_samples)
3007          srcs.push_back(zero);
3008       else if (!insn->num_srcs)
3009          srcs.push_back(loadImm(NULL, 0));
3010       if (biasIdx != -1)
3011          srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
3012       if (lodIdx != -1)
3013          srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
3014       else if (op == OP_TXF)
3015          lz = true;
3016       if (msIdx != -1)
3017          srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
3018       if (offsetIdx != -1)
3019          offsets.push_back(&insn->src[offsetIdx].src);
3020       if (compIdx != -1)
3021          srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
3022       if (texOffIdx != -1) {
3023          srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
3024          texOffIdx = srcs.size() - 1;
3025       }
3026       if (sampOffIdx != -1) {
3027          srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
3028          sampOffIdx = srcs.size() - 1;
3029       }
3030       if (bindless) {
3031          // currently we use the lower bits
3032          Value *split[2];
3033          Value *handle = getSrc(&insn->src[sampHandleIdx].src, 0);
3034
3035          mkSplit(split, 4, handle);
3036
3037          srcs.push_back(split[0]);
3038          texOffIdx = srcs.size() - 1;
3039       }
3040
3041       r = bindless ? 0xff : insn->texture_index;
3042       s = bindless ? 0x1f : insn->sampler_index;
3043
3044       defs.resize(newDefs.size());
3045       for (uint8_t d = 0u; d < newDefs.size(); ++d) {
3046          defs[d] = newDefs[d];
3047          mask |= 1 << d;
3048       }
3049       if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
3050          lz = true;
3051
3052       TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
3053       texi->tex.levelZero = lz;
3054       texi->tex.mask = mask;
3055       texi->tex.bindless = bindless;
3056
3057       if (texOffIdx != -1)
3058          texi->tex.rIndirectSrc = texOffIdx;
3059       if (sampOffIdx != -1)
3060          texi->tex.sIndirectSrc = sampOffIdx;
3061
3062       switch (insn->op) {
3063       case nir_texop_tg4:
3064          if (!target.isShadow())
3065             texi->tex.gatherComp = insn->component;
3066          break;
3067       case nir_texop_txs:
3068          texi->tex.query = TXQ_DIMS;
3069          break;
3070       case nir_texop_texture_samples:
3071          texi->tex.mask = 0x4;
3072          texi->tex.query = TXQ_TYPE;
3073          break;
3074       case nir_texop_query_levels:
3075          texi->tex.mask = 0x8;
3076          texi->tex.query = TXQ_DIMS;
3077          break;
3078       default:
3079          break;
3080       }
3081
3082       texi->tex.useOffsets = offsets.size();
3083       if (texi->tex.useOffsets) {
3084          for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
3085             for (uint32_t c = 0u; c < 3; ++c) {
3086                uint8_t s2 = std::min(c, target.getDim() - 1);
3087                texi->offset[s][c].set(getSrc(offsets[s], s2));
3088                texi->offset[s][c].setInsn(texi);
3089             }
3090          }
3091       }
3092
3093       if (op == OP_TXG && offsetIdx == -1) {
3094          if (nir_tex_instr_has_explicit_tg4_offsets(insn)) {
3095             texi->tex.useOffsets = 4;
3096             setPosition(texi, false);
3097             for (uint8_t i = 0; i < 4; ++i) {
3098                for (uint8_t j = 0; j < 2; ++j) {
3099                   texi->offset[i][j].set(loadImm(NULL, insn->tg4_offsets[i][j]));
3100                   texi->offset[i][j].setInsn(texi);
3101                }
3102             }
3103             setPosition(texi, true);
3104          }
3105       }
3106
3107       if (ddxIdx != -1 && ddyIdx != -1) {
3108          for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
3109             texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
3110             texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
3111          }
3112       }
3113
3114       break;
3115    }
3116    default:
3117       ERROR("unknown nir_texop %u\n", insn->op);
3118       return false;
3119    }
3120    return true;
3121 }
3122
3123 bool
3124 Converter::run()
3125 {
3126    bool progress;
3127
3128    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
3129       nir_print_shader(nir, stderr);
3130
3131    struct nir_lower_subgroups_options subgroup_options = {
3132       .subgroup_size = 32,
3133       .ballot_bit_size = 32,
3134    };
3135
3136    NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
3137    NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3138    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3139    NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
3140    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3141    NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
3142    NIR_PASS_V(nir, nir_lower_phis_to_scalar);
3143
3144    /*TODO: improve this lowering/optimisation loop so that we can use
3145     *      nir_opt_idiv_const effectively before this.
3146     */
3147    NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_precise);
3148
3149    do {
3150       progress = false;
3151       NIR_PASS(progress, nir, nir_copy_prop);
3152       NIR_PASS(progress, nir, nir_opt_remove_phis);
3153       NIR_PASS(progress, nir, nir_opt_trivial_continues);
3154       NIR_PASS(progress, nir, nir_opt_cse);
3155       NIR_PASS(progress, nir, nir_opt_algebraic);
3156       NIR_PASS(progress, nir, nir_opt_constant_folding);
3157       NIR_PASS(progress, nir, nir_copy_prop);
3158       NIR_PASS(progress, nir, nir_opt_dce);
3159       NIR_PASS(progress, nir, nir_opt_dead_cf);
3160    } while (progress);
3161
3162    NIR_PASS_V(nir, nir_lower_bool_to_int32);
3163    NIR_PASS_V(nir, nir_lower_locals_to_regs);
3164    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3165    NIR_PASS_V(nir, nir_convert_from_ssa, true);
3166
3167    // Garbage collect dead instructions
3168    nir_sweep(nir);
3169
3170    if (!parseNIR()) {
3171       ERROR("Couldn't prase NIR!\n");
3172       return false;
3173    }
3174
3175    if (!assignSlots()) {
3176       ERROR("Couldn't assign slots!\n");
3177       return false;
3178    }
3179
3180    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
3181       nir_print_shader(nir, stderr);
3182
3183    nir_foreach_function(function, nir) {
3184       if (!visit(function))
3185          return false;
3186    }
3187
3188    return true;
3189 }
3190
3191 } // unnamed namespace
3192
3193 namespace nv50_ir {
3194
3195 bool
3196 Program::makeFromNIR(struct nv50_ir_prog_info *info)
3197 {
3198    nir_shader *nir = (nir_shader*)info->bin.source;
3199    Converter converter(this, nir, info);
3200    bool result = converter.run();
3201    if (!result)
3202       return result;
3203    LoweringHelper lowering;
3204    lowering.run(this);
3205    tlsSize = info->bin.tlsSpace;
3206    return result;
3207 }
3208
3209 } // namespace nv50_ir
3210
3211 static nir_shader_compiler_options
3212 nvir_nir_shader_compiler_options(int chipset)
3213 {
3214    nir_shader_compiler_options op = {};
3215    op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET);
3216    op.lower_ffma = false;
3217    op.fuse_ffma = false; /* nir doesn't track mad vs fma */
3218    op.lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET);
3219    op.lower_flrp32 = true;
3220    op.lower_flrp64 = true;
3221    op.lower_fpow = false; // TODO: nir's lowering is broken, or we could use it
3222    op.lower_fsat = false;
3223    op.lower_fsqrt = false; // TODO: only before gm200
3224    op.lower_sincos = false;
3225    op.lower_fmod = true;
3226    op.lower_bitfield_extract = false;
3227    op.lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET);
3228    op.lower_bitfield_insert = false;
3229    op.lower_bitfield_insert_to_shifts = (chipset >= NVISA_GV100_CHIPSET);
3230    op.lower_bitfield_insert_to_bitfield_select = false;
3231    op.lower_bitfield_reverse = false;
3232    op.lower_bit_count = false;
3233    op.lower_ifind_msb = false;
3234    op.lower_find_lsb = false;
3235    op.lower_uadd_carry = true; // TODO
3236    op.lower_usub_borrow = true; // TODO
3237    op.lower_mul_high = false;
3238    op.lower_negate = false;
3239    op.lower_sub = true;
3240    op.lower_scmp = true; // TODO: not implemented yet
3241    op.lower_vector_cmp = false;
3242    op.lower_idiv = true;
3243    op.lower_bitops = false;
3244    op.lower_isign = (chipset >= NVISA_GV100_CHIPSET);
3245    op.lower_fsign = (chipset >= NVISA_GV100_CHIPSET);
3246    op.lower_fdph = false;
3247    op.lower_fdot = false;
3248    op.fdot_replicates = false; // TODO
3249    op.lower_ffloor = false; // TODO
3250    op.lower_ffract = true;
3251    op.lower_fceil = false; // TODO
3252    op.lower_ftrunc = false;
3253    op.lower_ldexp = true;
3254    op.lower_pack_half_2x16 = true;
3255    op.lower_pack_unorm_2x16 = true;
3256    op.lower_pack_snorm_2x16 = true;
3257    op.lower_pack_unorm_4x8 = true;
3258    op.lower_pack_snorm_4x8 = true;
3259    op.lower_unpack_half_2x16 = true;
3260    op.lower_unpack_unorm_2x16 = true;
3261    op.lower_unpack_snorm_2x16 = true;
3262    op.lower_unpack_unorm_4x8 = true;
3263    op.lower_unpack_snorm_4x8 = true;
3264    op.lower_pack_split = false;
3265    op.lower_extract_byte = (chipset < NVISA_GM107_CHIPSET);
3266    op.lower_extract_word = (chipset < NVISA_GM107_CHIPSET);
3267    op.lower_all_io_to_temps = false;
3268    op.lower_all_io_to_elements = false;
3269    op.vertex_id_zero_based = false;
3270    op.lower_base_vertex = false;
3271    op.lower_helper_invocation = false;
3272    op.optimize_sample_mask_in = false;
3273    op.lower_cs_local_index_from_id = true;
3274    op.lower_cs_local_id_from_index = false;
3275    op.lower_device_index_to_zero = false; // TODO
3276    op.lower_wpos_pntc = false; // TODO
3277    op.lower_hadd = true; // TODO
3278    op.lower_add_sat = true; // TODO
3279    op.vectorize_io = false;
3280    op.lower_to_scalar = true;
3281    op.unify_interfaces = false;
3282    op.use_interpolated_input_intrinsics = true;
3283    op.lower_mul_2x32_64 = true; // TODO
3284    op.lower_rotate = (chipset < NVISA_GV100_CHIPSET);
3285    op.has_imul24 = false;
3286    op.intel_vec4 = false;
3287    op.max_unroll_iterations = 32;
3288    op.lower_int64_options = (nir_lower_int64_options) (
3289       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul64 : 0) |
3290       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_isign64 : 0) |
3291       nir_lower_divmod64 |
3292       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_high64 : 0) |
3293       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_mov64 : 0) |
3294       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_icmp64 : 0) |
3295       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_iabs64 : 0) |
3296       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ineg64 : 0) |
3297       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) |
3298       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) |
3299       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) |
3300       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_2x32_64 : 0) |
3301       ((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) |
3302       nir_lower_ufind_msb64
3303    );
3304    op.lower_doubles_options = (nir_lower_doubles_options) (
3305       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drcp : 0) |
3306       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsqrt : 0) |
3307       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drsq : 0) |
3308       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dfract : 0) |
3309       nir_lower_dmod |
3310       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsub : 0) |
3311       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ddiv : 0)
3312    );
3313    return op;
3314 }
3315
3316 static const nir_shader_compiler_options gf100_nir_shader_compiler_options =
3317 nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET);
3318 static const nir_shader_compiler_options gm107_nir_shader_compiler_options =
3319 nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET);
3320 static const nir_shader_compiler_options gv100_nir_shader_compiler_options =
3321 nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET);
3322
3323 const nir_shader_compiler_options *
3324 nv50_ir_nir_shader_compiler_options(int chipset)
3325 {
3326    if (chipset >= NVISA_GV100_CHIPSET)
3327       return &gv100_nir_shader_compiler_options;
3328    if (chipset >= NVISA_GM107_CHIPSET)
3329       return &gm107_nir_shader_compiler_options;
3330    return &gf100_nir_shader_compiler_options;
3331 }