src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

   1 /*
   2  * Copyright 2017 Red Hat Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * Authors: Karol Herbst <kherbst@redhat.com>
  23  */
  24
  25 #include "compiler/nir/nir.h"
  26
  27 #include "util/u_debug.h"
  28
  29 #include "codegen/nv50_ir.h"
  30 #include "codegen/nv50_ir_from_common.h"
  31 #include "codegen/nv50_ir_lowering_helper.h"
  32 #include "codegen/nv50_ir_util.h"
  33
  34 #if __cplusplus >= 201103L
  35 #include <unordered_map>
  36 #else
  37 #include <tr1/unordered_map>
  38 #endif
  39 #include <cstring>
  40 #include <list>
  41 #include <vector>
  42
  43 namespace {
  44
  45 #if __cplusplus >= 201103L
  46 using std::hash;
  47 using std::unordered_map;
  48 #else
  49 using std::tr1::hash;
  50 using std::tr1::unordered_map;
  51 #endif
  52
  53 using namespace nv50_ir;
  54
  55 int
  56 type_size(const struct glsl_type *type, bool bindless)
  57 {
  58    return glsl_count_attribute_slots(type, false);
  59 }
  60
  61 class Converter : public ConverterCommon
  62 {
  63 public:
  64    Converter(Program *, nir_shader *, nv50_ir_prog_info *);
  65
  66    bool run();
  67 private:
  68    typedef std::vector<LValue*> LValues;
  69    typedef unordered_map<unsigned, LValues> NirDefMap;
  70    typedef unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
  71    typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
  72    typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
  73
  74    CacheMode convert(enum gl_access_qualifier);
  75    TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
  76    LValues& convert(nir_alu_dest *);
  77    BasicBlock* convert(nir_block *);
  78    LValues& convert(nir_dest *);
  79    SVSemantic convert(nir_intrinsic_op);
  80    Value* convert(nir_load_const_instr*, uint8_t);
  81    LValues& convert(nir_register *);
  82    LValues& convert(nir_ssa_def *);
  83
  84    ImgFormat convertGLImgFormat(GLuint);
  85
  86    Value* getSrc(nir_alu_src *, uint8_t component = 0);
  87    Value* getSrc(nir_register *, uint8_t);
  88    Value* getSrc(nir_src *, uint8_t, bool indirect = false);
  89    Value* getSrc(nir_ssa_def *, uint8_t);
  90
  91    // returned value is the constant part of the given source (either the
  92    // nir_src or the selected source component of an intrinsic). Even though
  93    // this is mostly an optimization to be able to skip indirects in a few
  94    // cases, sometimes we require immediate values or set some fileds on
  95    // instructions (e.g. tex) in order for codegen to consume those.
  96    // If the found value has not a constant part, the Value gets returned
  97    // through the Value parameter.
  98    uint32_t getIndirect(nir_src *, uint8_t, Value *&);
  99    // isScalar indicates that the addressing is scalar, vec4 addressing is
 100    // assumed otherwise
 101    uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&,
 102                         bool isScalar = false);
 103
 104    uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
 105
 106    void setInterpolate(nv50_ir_varying *,
 107                        uint8_t,
 108                        bool centroid,
 109                        unsigned semantics);
 110
 111    Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
 112                          uint8_t c, Value *indirect0 = NULL,
 113                          Value *indirect1 = NULL, bool patch = false);
 114    void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
 115                 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
 116                 Value *indirect1 = NULL);
 117
 118    bool isFloatType(nir_alu_type);
 119    bool isSignedType(nir_alu_type);
 120    bool isResultFloat(nir_op);
 121    bool isResultSigned(nir_op);
 122
 123    DataType getDType(nir_alu_instr *);
 124    DataType getDType(nir_intrinsic_instr *);
 125    DataType getDType(nir_intrinsic_instr *, bool isSigned);
 126    DataType getDType(nir_op, uint8_t);
 127
 128    std::vector<DataType> getSTypes(nir_alu_instr *);
 129    DataType getSType(nir_src &, bool isFloat, bool isSigned);
 130
 131    operation getOperation(nir_intrinsic_op);
 132    operation getOperation(nir_op);
 133    operation getOperation(nir_texop);
 134    operation preOperationNeeded(nir_op);
 135
 136    int getSubOp(nir_intrinsic_op);
 137    int getSubOp(nir_op);
 138
 139    CondCode getCondCode(nir_op);
 140
 141    bool assignSlots();
 142    bool parseNIR();
 143
 144    bool visit(nir_alu_instr *);
 145    bool visit(nir_block *);
 146    bool visit(nir_cf_node *);
 147    bool visit(nir_deref_instr *);
 148    bool visit(nir_function *);
 149    bool visit(nir_if *);
 150    bool visit(nir_instr *);
 151    bool visit(nir_intrinsic_instr *);
 152    bool visit(nir_jump_instr *);
 153    bool visit(nir_load_const_instr*);
 154    bool visit(nir_loop *);
 155    bool visit(nir_ssa_undef_instr *);
 156    bool visit(nir_tex_instr *);
 157
 158    // tex stuff
 159    Value* applyProjection(Value *src, Value *proj);
 160    unsigned int getNIRArgCount(TexInstruction::Target&);
 161
 162    // image stuff
 163    uint16_t handleDeref(nir_deref_instr *, Value * & indirect, const nir_variable * &);
 164    CacheMode getCacheModeFromVar(const nir_variable *);
 165
 166    nir_shader *nir;
 167
 168    NirDefMap ssaDefs;
 169    NirDefMap regDefs;
 170    ImmediateMap immediates;
 171    NirArrayLMemOffsets regToLmemOffset;
 172    NirBlockMap blocks;
 173    unsigned int curLoopDepth;
 174
 175    BasicBlock *exit;
 176    Value *zero;
 177    Instruction *immInsertPos;
 178
 179    int clipVertexOutput;
 180
 181    union {
 182       struct {
 183          Value *position;
 184       } fp;
 185    };
 186 };
 187
 188 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
 189    : ConverterCommon(prog, info),
 190      nir(nir),
 191      curLoopDepth(0),
 192      clipVertexOutput(-1)
 193 {
 194    zero = mkImm((uint32_t)0);
 195 }
 196
 197 BasicBlock *
 198 Converter::convert(nir_block *block)
 199 {
 200    NirBlockMap::iterator it = blocks.find(block->index);
 201    if (it != blocks.end())
 202       return it->second;
 203
 204    BasicBlock *bb = new BasicBlock(func);
 205    blocks[block->index] = bb;
 206    return bb;
 207 }
 208
 209 bool
 210 Converter::isFloatType(nir_alu_type type)
 211 {
 212    return nir_alu_type_get_base_type(type) == nir_type_float;
 213 }
 214
 215 bool
 216 Converter::isSignedType(nir_alu_type type)
 217 {
 218    return nir_alu_type_get_base_type(type) == nir_type_int;
 219 }
 220
 221 bool
 222 Converter::isResultFloat(nir_op op)
 223 {
 224    const nir_op_info &info = nir_op_infos[op];
 225    if (info.output_type != nir_type_invalid)
 226       return isFloatType(info.output_type);
 227
 228    ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
 229    assert(false);
 230    return true;
 231 }
 232
 233 bool
 234 Converter::isResultSigned(nir_op op)
 235 {
 236    switch (op) {
 237    // there is no umul and we get wrong results if we treat all muls as signed
 238    case nir_op_imul:
 239    case nir_op_inot:
 240       return false;
 241    default:
 242       const nir_op_info &info = nir_op_infos[op];
 243       if (info.output_type != nir_type_invalid)
 244          return isSignedType(info.output_type);
 245       ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
 246       assert(false);
 247       return true;
 248    }
 249 }
 250
 251 DataType
 252 Converter::getDType(nir_alu_instr *insn)
 253 {
 254    if (insn->dest.dest.is_ssa)
 255       return getDType(insn->op, insn->dest.dest.ssa.bit_size);
 256    else
 257       return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
 258 }
 259
 260 DataType
 261 Converter::getDType(nir_intrinsic_instr *insn)
 262 {
 263    bool isSigned;
 264    switch (insn->intrinsic) {
 265    case nir_intrinsic_shared_atomic_imax:
 266    case nir_intrinsic_shared_atomic_imin:
 267    case nir_intrinsic_ssbo_atomic_imax:
 268    case nir_intrinsic_ssbo_atomic_imin:
 269       isSigned = true;
 270       break;
 271    default:
 272       isSigned = false;
 273       break;
 274    }
 275
 276    return getDType(insn, isSigned);
 277 }
 278
 279 DataType
 280 Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
 281 {
 282    if (insn->dest.is_ssa)
 283       return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
 284    else
 285       return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
 286 }
 287
 288 DataType
 289 Converter::getDType(nir_op op, uint8_t bitSize)
 290 {
 291    DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
 292    if (ty == TYPE_NONE) {
 293       ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
 294       assert(false);
 295    }
 296    return ty;
 297 }
 298
 299 std::vector<DataType>
 300 Converter::getSTypes(nir_alu_instr *insn)
 301 {
 302    const nir_op_info &info = nir_op_infos[insn->op];
 303    std::vector<DataType> res(info.num_inputs);
 304
 305    for (uint8_t i = 0; i < info.num_inputs; ++i) {
 306       if (info.input_types[i] != nir_type_invalid) {
 307          res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
 308       } else {
 309          ERROR("getSType not implemented for %s idx %u\n", info.name, i);
 310          assert(false);
 311          res[i] = TYPE_NONE;
 312          break;
 313       }
 314    }
 315
 316    return res;
 317 }
 318
 319 DataType
 320 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
 321 {
 322    uint8_t bitSize;
 323    if (src.is_ssa)
 324       bitSize = src.ssa->bit_size;
 325    else
 326       bitSize = src.reg.reg->bit_size;
 327
 328    DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
 329    if (ty == TYPE_NONE) {
 330       const char *str;
 331       if (isFloat)
 332          str = "float";
 333       else if (isSigned)
 334          str = "int";
 335       else
 336          str = "uint";
 337       ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
 338       assert(false);
 339    }
 340    return ty;
 341 }
 342
 343 operation
 344 Converter::getOperation(nir_op op)
 345 {
 346    switch (op) {
 347    // basic ops with float and int variants
 348    case nir_op_fabs:
 349    case nir_op_iabs:
 350       return OP_ABS;
 351    case nir_op_fadd:
 352    case nir_op_iadd:
 353       return OP_ADD;
 354    case nir_op_iand:
 355       return OP_AND;
 356    case nir_op_ifind_msb:
 357    case nir_op_ufind_msb:
 358       return OP_BFIND;
 359    case nir_op_fceil:
 360       return OP_CEIL;
 361    case nir_op_fcos:
 362       return OP_COS;
 363    case nir_op_f2f32:
 364    case nir_op_f2f64:
 365    case nir_op_f2i32:
 366    case nir_op_f2i64:
 367    case nir_op_f2u32:
 368    case nir_op_f2u64:
 369    case nir_op_i2f32:
 370    case nir_op_i2f64:
 371    case nir_op_i2i32:
 372    case nir_op_i2i64:
 373    case nir_op_u2f32:
 374    case nir_op_u2f64:
 375    case nir_op_u2u32:
 376    case nir_op_u2u64:
 377       return OP_CVT;
 378    case nir_op_fddx:
 379    case nir_op_fddx_coarse:
 380    case nir_op_fddx_fine:
 381       return OP_DFDX;
 382    case nir_op_fddy:
 383    case nir_op_fddy_coarse:
 384    case nir_op_fddy_fine:
 385       return OP_DFDY;
 386    case nir_op_fdiv:
 387    case nir_op_idiv:
 388    case nir_op_udiv:
 389       return OP_DIV;
 390    case nir_op_fexp2:
 391       return OP_EX2;
 392    case nir_op_ffloor:
 393       return OP_FLOOR;
 394    case nir_op_ffma:
 395       return OP_FMA;
 396    case nir_op_flog2:
 397       return OP_LG2;
 398    case nir_op_fmax:
 399    case nir_op_imax:
 400    case nir_op_umax:
 401       return OP_MAX;
 402    case nir_op_pack_64_2x32_split:
 403       return OP_MERGE;
 404    case nir_op_fmin:
 405    case nir_op_imin:
 406    case nir_op_umin:
 407       return OP_MIN;
 408    case nir_op_fmod:
 409    case nir_op_imod:
 410    case nir_op_umod:
 411    case nir_op_frem:
 412    case nir_op_irem:
 413       return OP_MOD;
 414    case nir_op_fmul:
 415    case nir_op_imul:
 416    case nir_op_imul_high:
 417    case nir_op_umul_high:
 418       return OP_MUL;
 419    case nir_op_fneg:
 420    case nir_op_ineg:
 421       return OP_NEG;
 422    case nir_op_inot:
 423       return OP_NOT;
 424    case nir_op_ior:
 425       return OP_OR;
 426    case nir_op_fpow:
 427       return OP_POW;
 428    case nir_op_frcp:
 429       return OP_RCP;
 430    case nir_op_frsq:
 431       return OP_RSQ;
 432    case nir_op_fsat:
 433       return OP_SAT;
 434    case nir_op_feq32:
 435    case nir_op_ieq32:
 436    case nir_op_fge32:
 437    case nir_op_ige32:
 438    case nir_op_uge32:
 439    case nir_op_flt32:
 440    case nir_op_ilt32:
 441    case nir_op_ult32:
 442    case nir_op_fne32:
 443    case nir_op_ine32:
 444       return OP_SET;
 445    case nir_op_ishl:
 446       return OP_SHL;
 447    case nir_op_ishr:
 448    case nir_op_ushr:
 449       return OP_SHR;
 450    case nir_op_fsin:
 451       return OP_SIN;
 452    case nir_op_fsqrt:
 453       return OP_SQRT;
 454    case nir_op_ftrunc:
 455       return OP_TRUNC;
 456    case nir_op_ixor:
 457       return OP_XOR;
 458    default:
 459       ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
 460       assert(false);
 461       return OP_NOP;
 462    }
 463 }
 464
 465 operation
 466 Converter::getOperation(nir_texop op)
 467 {
 468    switch (op) {
 469    case nir_texop_tex:
 470       return OP_TEX;
 471    case nir_texop_lod:
 472       return OP_TXLQ;
 473    case nir_texop_txb:
 474       return OP_TXB;
 475    case nir_texop_txd:
 476       return OP_TXD;
 477    case nir_texop_txf:
 478    case nir_texop_txf_ms:
 479       return OP_TXF;
 480    case nir_texop_tg4:
 481       return OP_TXG;
 482    case nir_texop_txl:
 483       return OP_TXL;
 484    case nir_texop_query_levels:
 485    case nir_texop_texture_samples:
 486    case nir_texop_txs:
 487       return OP_TXQ;
 488    default:
 489       ERROR("couldn't get operation for nir_texop %u\n", op);
 490       assert(false);
 491       return OP_NOP;
 492    }
 493 }
 494
 495 operation
 496 Converter::getOperation(nir_intrinsic_op op)
 497 {
 498    switch (op) {
 499    case nir_intrinsic_emit_vertex:
 500       return OP_EMIT;
 501    case nir_intrinsic_end_primitive:
 502       return OP_RESTART;
 503    case nir_intrinsic_bindless_image_atomic_add:
 504    case nir_intrinsic_image_atomic_add:
 505    case nir_intrinsic_image_deref_atomic_add:
 506    case nir_intrinsic_bindless_image_atomic_and:
 507    case nir_intrinsic_image_atomic_and:
 508    case nir_intrinsic_image_deref_atomic_and:
 509    case nir_intrinsic_bindless_image_atomic_comp_swap:
 510    case nir_intrinsic_image_atomic_comp_swap:
 511    case nir_intrinsic_image_deref_atomic_comp_swap:
 512    case nir_intrinsic_bindless_image_atomic_exchange:
 513    case nir_intrinsic_image_atomic_exchange:
 514    case nir_intrinsic_image_deref_atomic_exchange:
 515    case nir_intrinsic_bindless_image_atomic_imax:
 516    case nir_intrinsic_image_atomic_imax:
 517    case nir_intrinsic_image_deref_atomic_imax:
 518    case nir_intrinsic_bindless_image_atomic_umax:
 519    case nir_intrinsic_image_atomic_umax:
 520    case nir_intrinsic_image_deref_atomic_umax:
 521    case nir_intrinsic_bindless_image_atomic_imin:
 522    case nir_intrinsic_image_atomic_imin:
 523    case nir_intrinsic_image_deref_atomic_imin:
 524    case nir_intrinsic_bindless_image_atomic_umin:
 525    case nir_intrinsic_image_atomic_umin:
 526    case nir_intrinsic_image_deref_atomic_umin:
 527    case nir_intrinsic_bindless_image_atomic_or:
 528    case nir_intrinsic_image_atomic_or:
 529    case nir_intrinsic_image_deref_atomic_or:
 530    case nir_intrinsic_bindless_image_atomic_xor:
 531    case nir_intrinsic_image_atomic_xor:
 532    case nir_intrinsic_image_deref_atomic_xor:
 533       return OP_SUREDP;
 534    case nir_intrinsic_bindless_image_load:
 535    case nir_intrinsic_image_load:
 536    case nir_intrinsic_image_deref_load:
 537       return OP_SULDP;
 538    case nir_intrinsic_bindless_image_samples:
 539    case nir_intrinsic_image_samples:
 540    case nir_intrinsic_image_deref_samples:
 541    case nir_intrinsic_bindless_image_size:
 542    case nir_intrinsic_image_size:
 543    case nir_intrinsic_image_deref_size:
 544       return OP_SUQ;
 545    case nir_intrinsic_bindless_image_store:
 546    case nir_intrinsic_image_store:
 547    case nir_intrinsic_image_deref_store:
 548       return OP_SUSTP;
 549    default:
 550       ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
 551       assert(false);
 552       return OP_NOP;
 553    }
 554 }
 555
 556 operation
 557 Converter::preOperationNeeded(nir_op op)
 558 {
 559    switch (op) {
 560    case nir_op_fcos:
 561    case nir_op_fsin:
 562       return OP_PRESIN;
 563    default:
 564       return OP_NOP;
 565    }
 566 }
 567
 568 int
 569 Converter::getSubOp(nir_op op)
 570 {
 571    switch (op) {
 572    case nir_op_imul_high:
 573    case nir_op_umul_high:
 574       return NV50_IR_SUBOP_MUL_HIGH;
 575    default:
 576       return 0;
 577    }
 578 }
 579
 580 int
 581 Converter::getSubOp(nir_intrinsic_op op)
 582 {
 583    switch (op) {
 584    case nir_intrinsic_bindless_image_atomic_add:
 585    case nir_intrinsic_image_atomic_add:
 586    case nir_intrinsic_image_deref_atomic_add:
 587    case nir_intrinsic_shared_atomic_add:
 588    case nir_intrinsic_ssbo_atomic_add:
 589       return  NV50_IR_SUBOP_ATOM_ADD;
 590    case nir_intrinsic_bindless_image_atomic_and:
 591    case nir_intrinsic_image_atomic_and:
 592    case nir_intrinsic_image_deref_atomic_and:
 593    case nir_intrinsic_shared_atomic_and:
 594    case nir_intrinsic_ssbo_atomic_and:
 595       return  NV50_IR_SUBOP_ATOM_AND;
 596    case nir_intrinsic_bindless_image_atomic_comp_swap:
 597    case nir_intrinsic_image_atomic_comp_swap:
 598    case nir_intrinsic_image_deref_atomic_comp_swap:
 599    case nir_intrinsic_shared_atomic_comp_swap:
 600    case nir_intrinsic_ssbo_atomic_comp_swap:
 601       return  NV50_IR_SUBOP_ATOM_CAS;
 602    case nir_intrinsic_bindless_image_atomic_exchange:
 603    case nir_intrinsic_image_atomic_exchange:
 604    case nir_intrinsic_image_deref_atomic_exchange:
 605    case nir_intrinsic_shared_atomic_exchange:
 606    case nir_intrinsic_ssbo_atomic_exchange:
 607       return  NV50_IR_SUBOP_ATOM_EXCH;
 608    case nir_intrinsic_bindless_image_atomic_or:
 609    case nir_intrinsic_image_atomic_or:
 610    case nir_intrinsic_image_deref_atomic_or:
 611    case nir_intrinsic_shared_atomic_or:
 612    case nir_intrinsic_ssbo_atomic_or:
 613       return  NV50_IR_SUBOP_ATOM_OR;
 614    case nir_intrinsic_bindless_image_atomic_imax:
 615    case nir_intrinsic_image_atomic_imax:
 616    case nir_intrinsic_image_deref_atomic_imax:
 617    case nir_intrinsic_bindless_image_atomic_umax:
 618    case nir_intrinsic_image_atomic_umax:
 619    case nir_intrinsic_image_deref_atomic_umax:
 620    case nir_intrinsic_shared_atomic_imax:
 621    case nir_intrinsic_shared_atomic_umax:
 622    case nir_intrinsic_ssbo_atomic_imax:
 623    case nir_intrinsic_ssbo_atomic_umax:
 624       return  NV50_IR_SUBOP_ATOM_MAX;
 625    case nir_intrinsic_bindless_image_atomic_imin:
 626    case nir_intrinsic_image_atomic_imin:
 627    case nir_intrinsic_image_deref_atomic_imin:
 628    case nir_intrinsic_bindless_image_atomic_umin:
 629    case nir_intrinsic_image_atomic_umin:
 630    case nir_intrinsic_image_deref_atomic_umin:
 631    case nir_intrinsic_shared_atomic_imin:
 632    case nir_intrinsic_shared_atomic_umin:
 633    case nir_intrinsic_ssbo_atomic_imin:
 634    case nir_intrinsic_ssbo_atomic_umin:
 635       return  NV50_IR_SUBOP_ATOM_MIN;
 636    case nir_intrinsic_bindless_image_atomic_xor:
 637    case nir_intrinsic_image_atomic_xor:
 638    case nir_intrinsic_image_deref_atomic_xor:
 639    case nir_intrinsic_shared_atomic_xor:
 640    case nir_intrinsic_ssbo_atomic_xor:
 641       return  NV50_IR_SUBOP_ATOM_XOR;
 642
 643    case nir_intrinsic_group_memory_barrier:
 644    case nir_intrinsic_memory_barrier:
 645    case nir_intrinsic_memory_barrier_atomic_counter:
 646    case nir_intrinsic_memory_barrier_buffer:
 647    case nir_intrinsic_memory_barrier_image:
 648       return NV50_IR_SUBOP_MEMBAR(M, GL);
 649    case nir_intrinsic_memory_barrier_shared:
 650       return NV50_IR_SUBOP_MEMBAR(M, CTA);
 651
 652    case nir_intrinsic_vote_all:
 653       return NV50_IR_SUBOP_VOTE_ALL;
 654    case nir_intrinsic_vote_any:
 655       return NV50_IR_SUBOP_VOTE_ANY;
 656    case nir_intrinsic_vote_ieq:
 657       return NV50_IR_SUBOP_VOTE_UNI;
 658    default:
 659       return 0;
 660    }
 661 }
 662
 663 CondCode
 664 Converter::getCondCode(nir_op op)
 665 {
 666    switch (op) {
 667    case nir_op_feq32:
 668    case nir_op_ieq32:
 669       return CC_EQ;
 670    case nir_op_fge32:
 671    case nir_op_ige32:
 672    case nir_op_uge32:
 673       return CC_GE;
 674    case nir_op_flt32:
 675    case nir_op_ilt32:
 676    case nir_op_ult32:
 677       return CC_LT;
 678    case nir_op_fne32:
 679       return CC_NEU;
 680    case nir_op_ine32:
 681       return CC_NE;
 682    default:
 683       ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
 684       assert(false);
 685       return CC_FL;
 686    }
 687 }
 688
 689 Converter::LValues&
 690 Converter::convert(nir_alu_dest *dest)
 691 {
 692    return convert(&dest->dest);
 693 }
 694
 695 Converter::LValues&
 696 Converter::convert(nir_dest *dest)
 697 {
 698    if (dest->is_ssa)
 699       return convert(&dest->ssa);
 700    if (dest->reg.indirect) {
 701       ERROR("no support for indirects.");
 702       assert(false);
 703    }
 704    return convert(dest->reg.reg);
 705 }
 706
 707 Converter::LValues&
 708 Converter::convert(nir_register *reg)
 709 {
 710    NirDefMap::iterator it = regDefs.find(reg->index);
 711    if (it != regDefs.end())
 712       return it->second;
 713
 714    LValues newDef(reg->num_components);
 715    for (uint8_t i = 0; i < reg->num_components; i++)
 716       newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
 717    return regDefs[reg->index] = newDef;
 718 }
 719
 720 Converter::LValues&
 721 Converter::convert(nir_ssa_def *def)
 722 {
 723    NirDefMap::iterator it = ssaDefs.find(def->index);
 724    if (it != ssaDefs.end())
 725       return it->second;
 726
 727    LValues newDef(def->num_components);
 728    for (uint8_t i = 0; i < def->num_components; i++)
 729       newDef[i] = getSSA(std::max(4, def->bit_size / 8));
 730    return ssaDefs[def->index] = newDef;
 731 }
 732
 733 Value*
 734 Converter::getSrc(nir_alu_src *src, uint8_t component)
 735 {
 736    if (src->abs || src->negate) {
 737       ERROR("modifiers currently not supported on nir_alu_src\n");
 738       assert(false);
 739    }
 740    return getSrc(&src->src, src->swizzle[component]);
 741 }
 742
 743 Value*
 744 Converter::getSrc(nir_register *reg, uint8_t idx)
 745 {
 746    NirDefMap::iterator it = regDefs.find(reg->index);
 747    if (it == regDefs.end())
 748       return convert(reg)[idx];
 749    return it->second[idx];
 750 }
 751
 752 Value*
 753 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
 754 {
 755    if (src->is_ssa)
 756       return getSrc(src->ssa, idx);
 757
 758    if (src->reg.indirect) {
 759       if (indirect)
 760          return getSrc(src->reg.indirect, idx);
 761       ERROR("no support for indirects.");
 762       assert(false);
 763       return NULL;
 764    }
 765
 766    return getSrc(src->reg.reg, idx);
 767 }
 768
 769 Value*
 770 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
 771 {
 772    ImmediateMap::iterator iit = immediates.find(src->index);
 773    if (iit != immediates.end())
 774       return convert((*iit).second, idx);
 775
 776    NirDefMap::iterator it = ssaDefs.find(src->index);
 777    if (it == ssaDefs.end()) {
 778       ERROR("SSA value %u not found\n", src->index);
 779       assert(false);
 780       return NULL;
 781    }
 782    return it->second[idx];
 783 }
 784
 785 uint32_t
 786 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
 787 {
 788    nir_const_value *offset = nir_src_as_const_value(*src);
 789
 790    if (offset) {
 791       indirect = NULL;
 792       return offset[0].u32;
 793    }
 794
 795    indirect = getSrc(src, idx, true);
 796    return 0;
 797 }
 798
 799 uint32_t
 800 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect, bool isScalar)
 801 {
 802    int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
 803    if (indirect && !isScalar)
 804       indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
 805    return idx;
 806 }
 807
 808 static void
 809 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
 810 {
 811    assert(name && index);
 812
 813    if (slot >= VERT_ATTRIB_MAX) {
 814       ERROR("invalid varying slot %u\n", slot);
 815       assert(false);
 816       return;
 817    }
 818
 819    if (slot >= VERT_ATTRIB_GENERIC0 &&
 820        slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
 821       *name = TGSI_SEMANTIC_GENERIC;
 822       *index = slot - VERT_ATTRIB_GENERIC0;
 823       return;
 824    }
 825
 826    if (slot >= VERT_ATTRIB_TEX0 &&
 827        slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
 828       *name = TGSI_SEMANTIC_TEXCOORD;
 829       *index = slot - VERT_ATTRIB_TEX0;
 830       return;
 831    }
 832
 833    switch (slot) {
 834    case VERT_ATTRIB_COLOR0:
 835       *name = TGSI_SEMANTIC_COLOR;
 836       *index = 0;
 837       break;
 838    case VERT_ATTRIB_COLOR1:
 839       *name = TGSI_SEMANTIC_COLOR;
 840       *index = 1;
 841       break;
 842    case VERT_ATTRIB_EDGEFLAG:
 843       *name = TGSI_SEMANTIC_EDGEFLAG;
 844       *index = 0;
 845       break;
 846    case VERT_ATTRIB_FOG:
 847       *name = TGSI_SEMANTIC_FOG;
 848       *index = 0;
 849       break;
 850    case VERT_ATTRIB_NORMAL:
 851       *name = TGSI_SEMANTIC_NORMAL;
 852       *index = 0;
 853       break;
 854    case VERT_ATTRIB_POS:
 855       *name = TGSI_SEMANTIC_POSITION;
 856       *index = 0;
 857       break;
 858    case VERT_ATTRIB_POINT_SIZE:
 859       *name = TGSI_SEMANTIC_PSIZE;
 860       *index = 0;
 861       break;
 862    default:
 863       ERROR("unknown vert attrib slot %u\n", slot);
 864       assert(false);
 865       break;
 866    }
 867 }
 868
 869 static void
 870 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
 871 {
 872    assert(name && index);
 873
 874    if (slot >= VARYING_SLOT_TESS_MAX) {
 875       ERROR("invalid varying slot %u\n", slot);
 876       assert(false);
 877       return;
 878    }
 879
 880    if (slot >= VARYING_SLOT_PATCH0) {
 881       *name = TGSI_SEMANTIC_PATCH;
 882       *index = slot - VARYING_SLOT_PATCH0;
 883       return;
 884    }
 885
 886    if (slot >= VARYING_SLOT_VAR0) {
 887       *name = TGSI_SEMANTIC_GENERIC;
 888       *index = slot - VARYING_SLOT_VAR0;
 889       return;
 890    }
 891
 892    if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
 893       *name = TGSI_SEMANTIC_TEXCOORD;
 894       *index = slot - VARYING_SLOT_TEX0;
 895       return;
 896    }
 897
 898    switch (slot) {
 899    case VARYING_SLOT_BFC0:
 900       *name = TGSI_SEMANTIC_BCOLOR;
 901       *index = 0;
 902       break;
 903    case VARYING_SLOT_BFC1:
 904       *name = TGSI_SEMANTIC_BCOLOR;
 905       *index = 1;
 906       break;
 907    case VARYING_SLOT_CLIP_DIST0:
 908       *name = TGSI_SEMANTIC_CLIPDIST;
 909       *index = 0;
 910       break;
 911    case VARYING_SLOT_CLIP_DIST1:
 912       *name = TGSI_SEMANTIC_CLIPDIST;
 913       *index = 1;
 914       break;
 915    case VARYING_SLOT_CLIP_VERTEX:
 916       *name = TGSI_SEMANTIC_CLIPVERTEX;
 917       *index = 0;
 918       break;
 919    case VARYING_SLOT_COL0:
 920       *name = TGSI_SEMANTIC_COLOR;
 921       *index = 0;
 922       break;
 923    case VARYING_SLOT_COL1:
 924       *name = TGSI_SEMANTIC_COLOR;
 925       *index = 1;
 926       break;
 927    case VARYING_SLOT_EDGE:
 928       *name = TGSI_SEMANTIC_EDGEFLAG;
 929       *index = 0;
 930       break;
 931    case VARYING_SLOT_FACE:
 932       *name = TGSI_SEMANTIC_FACE;
 933       *index = 0;
 934       break;
 935    case VARYING_SLOT_FOGC:
 936       *name = TGSI_SEMANTIC_FOG;
 937       *index = 0;
 938       break;
 939    case VARYING_SLOT_LAYER:
 940       *name = TGSI_SEMANTIC_LAYER;
 941       *index = 0;
 942       break;
 943    case VARYING_SLOT_PNTC:
 944       *name = TGSI_SEMANTIC_PCOORD;
 945       *index = 0;
 946       break;
 947    case VARYING_SLOT_POS:
 948       *name = TGSI_SEMANTIC_POSITION;
 949       *index = 0;
 950       break;
 951    case VARYING_SLOT_PRIMITIVE_ID:
 952       *name = TGSI_SEMANTIC_PRIMID;
 953       *index = 0;
 954       break;
 955    case VARYING_SLOT_PSIZ:
 956       *name = TGSI_SEMANTIC_PSIZE;
 957       *index = 0;
 958       break;
 959    case VARYING_SLOT_TESS_LEVEL_INNER:
 960       *name = TGSI_SEMANTIC_TESSINNER;
 961       *index = 0;
 962       break;
 963    case VARYING_SLOT_TESS_LEVEL_OUTER:
 964       *name = TGSI_SEMANTIC_TESSOUTER;
 965       *index = 0;
 966       break;
 967    case VARYING_SLOT_VIEWPORT:
 968       *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
 969       *index = 0;
 970       break;
 971    default:
 972       ERROR("unknown varying slot %u\n", slot);
 973       assert(false);
 974       break;
 975    }
 976 }
 977
 978 static void
 979 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
 980 {
 981    if (slot >= FRAG_RESULT_DATA0) {
 982       *name = TGSI_SEMANTIC_COLOR;
 983       *index = slot - FRAG_RESULT_COLOR - 2; // intentional
 984       return;
 985    }
 986
 987    switch (slot) {
 988    case FRAG_RESULT_COLOR:
 989       *name = TGSI_SEMANTIC_COLOR;
 990       *index = 0;
 991       break;
 992    case FRAG_RESULT_DEPTH:
 993       *name = TGSI_SEMANTIC_POSITION;
 994       *index = 0;
 995       break;
 996    case FRAG_RESULT_SAMPLE_MASK:
 997       *name = TGSI_SEMANTIC_SAMPLEMASK;
 998       *index = 0;
 999       break;
1000    default:
1001       ERROR("unknown frag result slot %u\n", slot);
1002       assert(false);
1003       break;
1004    }
1005 }
1006
1007 // copy of _mesa_sysval_to_semantic
1008 static void
1009 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
1010 {
1011    *index = 0;
1012    switch (val) {
1013    // Vertex shader
1014    case SYSTEM_VALUE_VERTEX_ID:
1015       *name = TGSI_SEMANTIC_VERTEXID;
1016       break;
1017    case SYSTEM_VALUE_INSTANCE_ID:
1018       *name = TGSI_SEMANTIC_INSTANCEID;
1019       break;
1020    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
1021       *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
1022       break;
1023    case SYSTEM_VALUE_BASE_VERTEX:
1024       *name = TGSI_SEMANTIC_BASEVERTEX;
1025       break;
1026    case SYSTEM_VALUE_BASE_INSTANCE:
1027       *name = TGSI_SEMANTIC_BASEINSTANCE;
1028       break;
1029    case SYSTEM_VALUE_DRAW_ID:
1030       *name = TGSI_SEMANTIC_DRAWID;
1031       break;
1032
1033    // Geometry shader
1034    case SYSTEM_VALUE_INVOCATION_ID:
1035       *name = TGSI_SEMANTIC_INVOCATIONID;
1036       break;
1037
1038    // Fragment shader
1039    case SYSTEM_VALUE_FRAG_COORD:
1040       *name = TGSI_SEMANTIC_POSITION;
1041       break;
1042    case SYSTEM_VALUE_FRONT_FACE:
1043       *name = TGSI_SEMANTIC_FACE;
1044       break;
1045    case SYSTEM_VALUE_SAMPLE_ID:
1046       *name = TGSI_SEMANTIC_SAMPLEID;
1047       break;
1048    case SYSTEM_VALUE_SAMPLE_POS:
1049       *name = TGSI_SEMANTIC_SAMPLEPOS;
1050       break;
1051    case SYSTEM_VALUE_SAMPLE_MASK_IN:
1052       *name = TGSI_SEMANTIC_SAMPLEMASK;
1053       break;
1054    case SYSTEM_VALUE_HELPER_INVOCATION:
1055       *name = TGSI_SEMANTIC_HELPER_INVOCATION;
1056       break;
1057
1058    // Tessellation shader
1059    case SYSTEM_VALUE_TESS_COORD:
1060       *name = TGSI_SEMANTIC_TESSCOORD;
1061       break;
1062    case SYSTEM_VALUE_VERTICES_IN:
1063       *name = TGSI_SEMANTIC_VERTICESIN;
1064       break;
1065    case SYSTEM_VALUE_PRIMITIVE_ID:
1066       *name = TGSI_SEMANTIC_PRIMID;
1067       break;
1068    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1069       *name = TGSI_SEMANTIC_TESSOUTER;
1070       break;
1071    case SYSTEM_VALUE_TESS_LEVEL_INNER:
1072       *name = TGSI_SEMANTIC_TESSINNER;
1073       break;
1074
1075    // Compute shader
1076    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
1077       *name = TGSI_SEMANTIC_THREAD_ID;
1078       break;
1079    case SYSTEM_VALUE_WORK_GROUP_ID:
1080       *name = TGSI_SEMANTIC_BLOCK_ID;
1081       break;
1082    case SYSTEM_VALUE_NUM_WORK_GROUPS:
1083       *name = TGSI_SEMANTIC_GRID_SIZE;
1084       break;
1085    case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
1086       *name = TGSI_SEMANTIC_BLOCK_SIZE;
1087       break;
1088
1089    // ARB_shader_ballot
1090    case SYSTEM_VALUE_SUBGROUP_SIZE:
1091       *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
1092       break;
1093    case SYSTEM_VALUE_SUBGROUP_INVOCATION:
1094       *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
1095       break;
1096    case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
1097       *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
1098       break;
1099    case SYSTEM_VALUE_SUBGROUP_GE_MASK:
1100       *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
1101       break;
1102    case SYSTEM_VALUE_SUBGROUP_GT_MASK:
1103       *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
1104       break;
1105    case SYSTEM_VALUE_SUBGROUP_LE_MASK:
1106       *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
1107       break;
1108    case SYSTEM_VALUE_SUBGROUP_LT_MASK:
1109       *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
1110       break;
1111
1112    default:
1113       ERROR("unknown system value %u\n", val);
1114       assert(false);
1115       break;
1116    }
1117 }
1118
1119 void
1120 Converter::setInterpolate(nv50_ir_varying *var,
1121                           uint8_t mode,
1122                           bool centroid,
1123                           unsigned semantic)
1124 {
1125    switch (mode) {
1126    case INTERP_MODE_FLAT:
1127       var->flat = 1;
1128       break;
1129    case INTERP_MODE_NONE:
1130       if (semantic == TGSI_SEMANTIC_COLOR)
1131          var->sc = 1;
1132       else if (semantic == TGSI_SEMANTIC_POSITION)
1133          var->linear = 1;
1134       break;
1135    case INTERP_MODE_NOPERSPECTIVE:
1136       var->linear = 1;
1137       break;
1138    case INTERP_MODE_SMOOTH:
1139       break;
1140    }
1141    var->centroid = centroid;
1142 }
1143
1144 static uint16_t
1145 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
1146           bool input, const nir_variable *var)
1147 {
1148    if (!type->is_array())
1149       return type->count_attribute_slots(false);
1150
1151    uint16_t slots;
1152    switch (stage) {
1153    case Program::TYPE_GEOMETRY:
1154       slots = type->uniform_locations();
1155       if (input)
1156          slots /= info.gs.vertices_in;
1157       break;
1158    case Program::TYPE_TESSELLATION_CONTROL:
1159    case Program::TYPE_TESSELLATION_EVAL:
1160       // remove first dimension
1161       if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
1162          slots = type->uniform_locations();
1163       else
1164          slots = type->fields.array->uniform_locations();
1165       break;
1166    default:
1167       slots = type->count_attribute_slots(false);
1168       break;
1169    }
1170
1171    return slots;
1172 }
1173
1174 bool Converter::assignSlots() {
1175    unsigned name;
1176    unsigned index;
1177
1178    info->io.viewportId = -1;
1179    info->numInputs = 0;
1180    info->numOutputs = 0;
1181
1182    // we have to fixup the uniform locations for arrays
1183    unsigned numImages = 0;
1184    nir_foreach_variable(var, &nir->uniforms) {
1185       const glsl_type *type = var->type;
1186       if (!type->without_array()->is_image())
1187          continue;
1188       var->data.driver_location = numImages;
1189       numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
1190    }
1191
1192    info->numSysVals = 0;
1193    for (uint8_t i = 0; i < SYSTEM_VALUE_MAX; ++i) {
1194       if (!(nir->info.system_values_read & 1ull << i))
1195          continue;
1196
1197       system_val_to_tgsi_semantic(i, &name, &index);
1198       info->sv[info->numSysVals].sn = name;
1199       info->sv[info->numSysVals].si = index;
1200       info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1201
1202       switch (i) {
1203       case SYSTEM_VALUE_INSTANCE_ID:
1204          info->io.instanceId = info->numSysVals;
1205          break;
1206       case SYSTEM_VALUE_TESS_LEVEL_INNER:
1207       case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1208          info->sv[info->numSysVals].patch = 1;
1209          break;
1210       case SYSTEM_VALUE_VERTEX_ID:
1211          info->io.vertexId = info->numSysVals;
1212          break;
1213       default:
1214          break;
1215       }
1216
1217       info->numSysVals += 1;
1218    }
1219
1220    if (prog->getType() == Program::TYPE_COMPUTE)
1221       return true;
1222
1223    nir_foreach_variable(var, &nir->inputs) {
1224       const glsl_type *type = var->type;
1225       int slot = var->data.location;
1226       uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
1227       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1228                                        : type->component_slots();
1229       uint32_t frac = var->data.location_frac;
1230       uint32_t vary = var->data.driver_location;
1231
1232       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1233          if (comp > 2)
1234             slots *= 2;
1235       }
1236
1237       assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
1238
1239       switch(prog->getType()) {
1240       case Program::TYPE_FRAGMENT:
1241          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1242          for (uint16_t i = 0; i < slots; ++i) {
1243             setInterpolate(&info->in[vary + i], var->data.interpolation,
1244                            var->data.centroid | var->data.sample, name);
1245          }
1246          break;
1247       case Program::TYPE_GEOMETRY:
1248          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1249          break;
1250       case Program::TYPE_TESSELLATION_CONTROL:
1251       case Program::TYPE_TESSELLATION_EVAL:
1252          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1253          if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1254             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1255          break;
1256       case Program::TYPE_VERTEX:
1257          vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1258          switch (name) {
1259          case TGSI_SEMANTIC_EDGEFLAG:
1260             info->io.edgeFlagIn = vary;
1261             break;
1262          default:
1263             break;
1264          }
1265          break;
1266       default:
1267          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1268          return false;
1269       }
1270
1271       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1272          info->in[vary].id = vary;
1273          info->in[vary].patch = var->data.patch;
1274          info->in[vary].sn = name;
1275          info->in[vary].si = index + i;
1276          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1277             if (i & 0x1)
1278                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1279             else
1280                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1281          else
1282             info->in[vary].mask |= ((1 << comp) - 1) << frac;
1283       }
1284       info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1285    }
1286
1287    nir_foreach_variable(var, &nir->outputs) {
1288       const glsl_type *type = var->type;
1289       int slot = var->data.location;
1290       uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1291       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1292                                        : type->component_slots();
1293       uint32_t frac = var->data.location_frac;
1294       uint32_t vary = var->data.driver_location;
1295
1296       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1297          if (comp > 2)
1298             slots *= 2;
1299       }
1300
1301       assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1302
1303       switch(prog->getType()) {
1304       case Program::TYPE_FRAGMENT:
1305          frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1306          switch (name) {
1307          case TGSI_SEMANTIC_COLOR:
1308             if (!var->data.fb_fetch_output)
1309                info->prop.fp.numColourResults++;
1310             info->prop.fp.separateFragData = true;
1311             // sometimes we get FRAG_RESULT_DATAX with data.index 0
1312             // sometimes we get FRAG_RESULT_DATA0 with data.index X
1313             index = index == 0 ? var->data.index : index;
1314             break;
1315          case TGSI_SEMANTIC_POSITION:
1316             info->io.fragDepth = vary;
1317             info->prop.fp.writesDepth = true;
1318             break;
1319          case TGSI_SEMANTIC_SAMPLEMASK:
1320             info->io.sampleMask = vary;
1321             break;
1322          default:
1323             break;
1324          }
1325          break;
1326       case Program::TYPE_GEOMETRY:
1327       case Program::TYPE_TESSELLATION_CONTROL:
1328       case Program::TYPE_TESSELLATION_EVAL:
1329       case Program::TYPE_VERTEX:
1330          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1331
1332          if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1333              name != TGSI_SEMANTIC_TESSOUTER)
1334             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1335
1336          switch (name) {
1337          case TGSI_SEMANTIC_CLIPDIST:
1338             info->io.genUserClip = -1;
1339             break;
1340          case TGSI_SEMANTIC_CLIPVERTEX:
1341             clipVertexOutput = vary;
1342             break;
1343          case TGSI_SEMANTIC_EDGEFLAG:
1344             info->io.edgeFlagOut = vary;
1345             break;
1346          case TGSI_SEMANTIC_POSITION:
1347             if (clipVertexOutput < 0)
1348                clipVertexOutput = vary;
1349             break;
1350          default:
1351             break;
1352          }
1353          break;
1354       default:
1355          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1356          return false;
1357       }
1358
1359       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1360          info->out[vary].id = vary;
1361          info->out[vary].patch = var->data.patch;
1362          info->out[vary].sn = name;
1363          info->out[vary].si = index + i;
1364          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1365             if (i & 0x1)
1366                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1367             else
1368                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1369          else
1370             info->out[vary].mask |= ((1 << comp) - 1) << frac;
1371
1372          if (nir->info.outputs_read & 1ull << slot)
1373             info->out[vary].oread = 1;
1374       }
1375       info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1376    }
1377
1378    if (info->io.genUserClip > 0) {
1379       info->io.clipDistances = info->io.genUserClip;
1380
1381       const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1382
1383       for (unsigned int n = 0; n < nOut; ++n) {
1384          unsigned int i = info->numOutputs++;
1385          info->out[i].id = i;
1386          info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1387          info->out[i].si = n;
1388          info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1389       }
1390    }
1391
1392    return info->assignSlots(info) == 0;
1393 }
1394
1395 uint32_t
1396 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1397 {
1398    DataType ty;
1399    int offset = nir_intrinsic_component(insn);
1400    bool input;
1401
1402    if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1403       ty = getDType(insn);
1404    else
1405       ty = getSType(insn->src[0], false, false);
1406
1407    switch (insn->intrinsic) {
1408    case nir_intrinsic_load_input:
1409    case nir_intrinsic_load_interpolated_input:
1410    case nir_intrinsic_load_per_vertex_input:
1411       input = true;
1412       break;
1413    case nir_intrinsic_load_output:
1414    case nir_intrinsic_load_per_vertex_output:
1415    case nir_intrinsic_store_output:
1416    case nir_intrinsic_store_per_vertex_output:
1417       input = false;
1418       break;
1419    default:
1420       ERROR("unknown intrinsic in getSlotAddress %s",
1421             nir_intrinsic_infos[insn->intrinsic].name);
1422       input = false;
1423       assert(false);
1424       break;
1425    }
1426
1427    if (typeSizeof(ty) == 8) {
1428       slot *= 2;
1429       slot += offset;
1430       if (slot >= 4) {
1431          idx += 1;
1432          slot -= 4;
1433       }
1434    } else {
1435       slot += offset;
1436    }
1437
1438    assert(slot < 4);
1439    assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1440    assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1441
1442    const nv50_ir_varying *vary = input ? info->in : info->out;
1443    return vary[idx].slot[slot] * 4;
1444 }
1445
1446 Instruction *
1447 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1448                     uint32_t base, uint8_t c, Value *indirect0,
1449                     Value *indirect1, bool patch)
1450 {
1451    unsigned int tySize = typeSizeof(ty);
1452
1453    if (tySize == 8 &&
1454        (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1455       Value *lo = getSSA();
1456       Value *hi = getSSA();
1457
1458       Instruction *loi =
1459          mkLoad(TYPE_U32, lo,
1460                 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1461                 indirect0);
1462       loi->setIndirect(0, 1, indirect1);
1463       loi->perPatch = patch;
1464
1465       Instruction *hii =
1466          mkLoad(TYPE_U32, hi,
1467                 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1468                 indirect0);
1469       hii->setIndirect(0, 1, indirect1);
1470       hii->perPatch = patch;
1471
1472       return mkOp2(OP_MERGE, ty, def, lo, hi);
1473    } else {
1474       Instruction *ld =
1475          mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1476       ld->setIndirect(0, 1, indirect1);
1477       ld->perPatch = patch;
1478       return ld;
1479    }
1480 }
1481
1482 void
1483 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1484                    DataType ty, Value *src, uint8_t idx, uint8_t c,
1485                    Value *indirect0, Value *indirect1)
1486 {
1487    uint8_t size = typeSizeof(ty);
1488    uint32_t address = getSlotAddress(insn, idx, c);
1489
1490    if (size == 8 && indirect0) {
1491       Value *split[2];
1492       mkSplit(split, 4, src);
1493
1494       if (op == OP_EXPORT) {
1495          split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1496          split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1497       }
1498
1499       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1500               split[0])->perPatch = info->out[idx].patch;
1501       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1502               split[1])->perPatch = info->out[idx].patch;
1503    } else {
1504       if (op == OP_EXPORT)
1505          src = mkMov(getSSA(size), src, ty)->getDef(0);
1506       mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1507               src)->perPatch = info->out[idx].patch;
1508    }
1509 }
1510
1511 bool
1512 Converter::parseNIR()
1513 {
1514    info->bin.tlsSpace = 0;
1515    info->io.clipDistances = nir->info.clip_distance_array_size;
1516    info->io.cullDistances = nir->info.cull_distance_array_size;
1517
1518    switch(prog->getType()) {
1519    case Program::TYPE_COMPUTE:
1520       info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1521       info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1522       info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1523       info->bin.smemSize = nir->info.cs.shared_size;
1524       break;
1525    case Program::TYPE_FRAGMENT:
1526       info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1527       info->prop.fp.persampleInvocation =
1528          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1529          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1530       info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1531       info->prop.fp.readsSampleLocations =
1532          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1533       info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1534       info->prop.fp.usesSampleMaskIn =
1535          !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1536       break;
1537    case Program::TYPE_GEOMETRY:
1538       info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1539       info->prop.gp.instanceCount = nir->info.gs.invocations;
1540       info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1541       info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1542       break;
1543    case Program::TYPE_TESSELLATION_CONTROL:
1544    case Program::TYPE_TESSELLATION_EVAL:
1545       if (nir->info.tess.primitive_mode == GL_ISOLINES)
1546          info->prop.tp.domain = GL_LINES;
1547       else
1548          info->prop.tp.domain = nir->info.tess.primitive_mode;
1549       info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1550       info->prop.tp.outputPrim =
1551          nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1552       info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1553       info->prop.tp.winding = !nir->info.tess.ccw;
1554       break;
1555    case Program::TYPE_VERTEX:
1556       info->prop.vp.usesDrawParameters =
1557          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1558          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1559          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1560       break;
1561    default:
1562       break;
1563    }
1564
1565    return true;
1566 }
1567
1568 bool
1569 Converter::visit(nir_function *function)
1570 {
1571    assert(function->impl);
1572
1573    // usually the blocks will set everything up, but main is special
1574    BasicBlock *entry = new BasicBlock(prog->main);
1575    exit = new BasicBlock(prog->main);
1576    blocks[nir_start_block(function->impl)->index] = entry;
1577    prog->main->setEntry(entry);
1578    prog->main->setExit(exit);
1579
1580    setPosition(entry, true);
1581
1582    if (info->io.genUserClip > 0) {
1583       for (int c = 0; c < 4; ++c)
1584          clipVtx[c] = getScratch();
1585    }
1586
1587    switch (prog->getType()) {
1588    case Program::TYPE_TESSELLATION_CONTROL:
1589       outBase = mkOp2v(
1590          OP_SUB, TYPE_U32, getSSA(),
1591          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1592          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1593       break;
1594    case Program::TYPE_FRAGMENT: {
1595       Symbol *sv = mkSysVal(SV_POSITION, 3);
1596       fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1597       fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1598       break;
1599    }
1600    default:
1601       break;
1602    }
1603
1604    nir_foreach_register(reg, &function->impl->registers) {
1605       if (reg->num_array_elems) {
1606          // TODO: packed variables would be nice, but MemoryOpt fails
1607          // replace 4 with reg->num_components
1608          uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
1609          regToLmemOffset[reg->index] = info->bin.tlsSpace;
1610          info->bin.tlsSpace += size;
1611       }
1612    }
1613
1614    nir_index_ssa_defs(function->impl);
1615    foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1616       if (!visit(node))
1617          return false;
1618    }
1619
1620    bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1621    setPosition(exit, true);
1622
1623    if ((prog->getType() == Program::TYPE_VERTEX ||
1624         prog->getType() == Program::TYPE_TESSELLATION_EVAL)
1625        && info->io.genUserClip > 0)
1626       handleUserClipPlanes();
1627
1628    // TODO: for non main function this needs to be a OP_RETURN
1629    mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1630    return true;
1631 }
1632
1633 bool
1634 Converter::visit(nir_cf_node *node)
1635 {
1636    switch (node->type) {
1637    case nir_cf_node_block:
1638       return visit(nir_cf_node_as_block(node));
1639    case nir_cf_node_if:
1640       return visit(nir_cf_node_as_if(node));
1641    case nir_cf_node_loop:
1642       return visit(nir_cf_node_as_loop(node));
1643    default:
1644       ERROR("unknown nir_cf_node type %u\n", node->type);
1645       return false;
1646    }
1647 }
1648
1649 bool
1650 Converter::visit(nir_block *block)
1651 {
1652    if (!block->predecessors->entries && block->instr_list.is_empty())
1653       return true;
1654
1655    BasicBlock *bb = convert(block);
1656
1657    setPosition(bb, true);
1658    nir_foreach_instr(insn, block) {
1659       if (!visit(insn))
1660          return false;
1661    }
1662    return true;
1663 }
1664
1665 bool
1666 Converter::visit(nir_if *nif)
1667 {
1668    DataType sType = getSType(nif->condition, false, false);
1669    Value *src = getSrc(&nif->condition, 0);
1670
1671    nir_block *lastThen = nir_if_last_then_block(nif);
1672    nir_block *lastElse = nir_if_last_else_block(nif);
1673
1674    assert(!lastThen->successors[1]);
1675    assert(!lastElse->successors[1]);
1676
1677    BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1678    BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1679
1680    bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1681    bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1682
1683    // we only insert joinats, if both nodes end up at the end of the if again.
1684    // the reason for this to not happens are breaks/continues/ret/... which
1685    // have their own handling
1686    if (lastThen->successors[0] == lastElse->successors[0])
1687       bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1688                           CC_ALWAYS, NULL);
1689
1690    mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1691
1692    foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1693       if (!visit(node))
1694          return false;
1695    }
1696    setPosition(convert(lastThen), true);
1697    if (!bb->getExit() ||
1698        !bb->getExit()->asFlow() ||
1699         bb->getExit()->asFlow()->op == OP_JOIN) {
1700       BasicBlock *tailBB = convert(lastThen->successors[0]);
1701       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1702       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1703    }
1704
1705    foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1706       if (!visit(node))
1707          return false;
1708    }
1709    setPosition(convert(lastElse), true);
1710    if (!bb->getExit() ||
1711        !bb->getExit()->asFlow() ||
1712         bb->getExit()->asFlow()->op == OP_JOIN) {
1713       BasicBlock *tailBB = convert(lastElse->successors[0]);
1714       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1715       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1716    }
1717
1718    if (lastThen->successors[0] == lastElse->successors[0]) {
1719       setPosition(convert(lastThen->successors[0]), true);
1720       mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1721    }
1722
1723    return true;
1724 }
1725
1726 bool
1727 Converter::visit(nir_loop *loop)
1728 {
1729    curLoopDepth += 1;
1730    func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1731
1732    BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1733    BasicBlock *tailBB =
1734       convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1735    bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1736
1737    mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1738    setPosition(loopBB, false);
1739    mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1740
1741    foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1742       if (!visit(node))
1743          return false;
1744    }
1745    Instruction *insn = bb->getExit();
1746    if (bb->cfg.incidentCount() != 0) {
1747       if (!insn || !insn->asFlow()) {
1748          mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1749          bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1750       } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1751                  tailBB->cfg.incidentCount() == 0) {
1752          // RA doesn't like having blocks around with no incident edge,
1753          // so we create a fake one to make it happy
1754          bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1755       }
1756    }
1757
1758    curLoopDepth -= 1;
1759
1760    return true;
1761 }
1762
1763 bool
1764 Converter::visit(nir_instr *insn)
1765 {
1766    // we need an insertion point for on the fly generated immediate loads
1767    immInsertPos = bb->getExit();
1768    switch (insn->type) {
1769    case nir_instr_type_alu:
1770       return visit(nir_instr_as_alu(insn));
1771    case nir_instr_type_deref:
1772       return visit(nir_instr_as_deref(insn));
1773    case nir_instr_type_intrinsic:
1774       return visit(nir_instr_as_intrinsic(insn));
1775    case nir_instr_type_jump:
1776       return visit(nir_instr_as_jump(insn));
1777    case nir_instr_type_load_const:
1778       return visit(nir_instr_as_load_const(insn));
1779    case nir_instr_type_ssa_undef:
1780       return visit(nir_instr_as_ssa_undef(insn));
1781    case nir_instr_type_tex:
1782       return visit(nir_instr_as_tex(insn));
1783    default:
1784       ERROR("unknown nir_instr type %u\n", insn->type);
1785       return false;
1786    }
1787    return true;
1788 }
1789
1790 SVSemantic
1791 Converter::convert(nir_intrinsic_op intr)
1792 {
1793    switch (intr) {
1794    case nir_intrinsic_load_base_vertex:
1795       return SV_BASEVERTEX;
1796    case nir_intrinsic_load_base_instance:
1797       return SV_BASEINSTANCE;
1798    case nir_intrinsic_load_draw_id:
1799       return SV_DRAWID;
1800    case nir_intrinsic_load_front_face:
1801       return SV_FACE;
1802    case nir_intrinsic_load_helper_invocation:
1803       return SV_THREAD_KILL;
1804    case nir_intrinsic_load_instance_id:
1805       return SV_INSTANCE_ID;
1806    case nir_intrinsic_load_invocation_id:
1807       return SV_INVOCATION_ID;
1808    case nir_intrinsic_load_local_group_size:
1809       return SV_NTID;
1810    case nir_intrinsic_load_local_invocation_id:
1811       return SV_TID;
1812    case nir_intrinsic_load_num_work_groups:
1813       return SV_NCTAID;
1814    case nir_intrinsic_load_patch_vertices_in:
1815       return SV_VERTEX_COUNT;
1816    case nir_intrinsic_load_primitive_id:
1817       return SV_PRIMITIVE_ID;
1818    case nir_intrinsic_load_sample_id:
1819       return SV_SAMPLE_INDEX;
1820    case nir_intrinsic_load_sample_mask_in:
1821       return SV_SAMPLE_MASK;
1822    case nir_intrinsic_load_sample_pos:
1823       return SV_SAMPLE_POS;
1824    case nir_intrinsic_load_subgroup_eq_mask:
1825       return SV_LANEMASK_EQ;
1826    case nir_intrinsic_load_subgroup_ge_mask:
1827       return SV_LANEMASK_GE;
1828    case nir_intrinsic_load_subgroup_gt_mask:
1829       return SV_LANEMASK_GT;
1830    case nir_intrinsic_load_subgroup_le_mask:
1831       return SV_LANEMASK_LE;
1832    case nir_intrinsic_load_subgroup_lt_mask:
1833       return SV_LANEMASK_LT;
1834    case nir_intrinsic_load_subgroup_invocation:
1835       return SV_LANEID;
1836    case nir_intrinsic_load_tess_coord:
1837       return SV_TESS_COORD;
1838    case nir_intrinsic_load_tess_level_inner:
1839       return SV_TESS_INNER;
1840    case nir_intrinsic_load_tess_level_outer:
1841       return SV_TESS_OUTER;
1842    case nir_intrinsic_load_vertex_id:
1843       return SV_VERTEX_ID;
1844    case nir_intrinsic_load_work_group_id:
1845       return SV_CTAID;
1846    default:
1847       ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1848             nir_intrinsic_infos[intr].name);
1849       assert(false);
1850       return SV_LAST;
1851    }
1852 }
1853
1854 ImgFormat
1855 Converter::convertGLImgFormat(GLuint format)
1856 {
1857 #define FMT_CASE(a, b) \
1858   case GL_ ## a: return nv50_ir::FMT_ ## b
1859
1860    switch (format) {
1861    FMT_CASE(NONE, NONE);
1862
1863    FMT_CASE(RGBA32F, RGBA32F);
1864    FMT_CASE(RGBA16F, RGBA16F);
1865    FMT_CASE(RG32F, RG32F);
1866    FMT_CASE(RG16F, RG16F);
1867    FMT_CASE(R11F_G11F_B10F, R11G11B10F);
1868    FMT_CASE(R32F, R32F);
1869    FMT_CASE(R16F, R16F);
1870
1871    FMT_CASE(RGBA32UI, RGBA32UI);
1872    FMT_CASE(RGBA16UI, RGBA16UI);
1873    FMT_CASE(RGB10_A2UI, RGB10A2UI);
1874    FMT_CASE(RGBA8UI, RGBA8UI);
1875    FMT_CASE(RG32UI, RG32UI);
1876    FMT_CASE(RG16UI, RG16UI);
1877    FMT_CASE(RG8UI, RG8UI);
1878    FMT_CASE(R32UI, R32UI);
1879    FMT_CASE(R16UI, R16UI);
1880    FMT_CASE(R8UI, R8UI);
1881
1882    FMT_CASE(RGBA32I, RGBA32I);
1883    FMT_CASE(RGBA16I, RGBA16I);
1884    FMT_CASE(RGBA8I, RGBA8I);
1885    FMT_CASE(RG32I, RG32I);
1886    FMT_CASE(RG16I, RG16I);
1887    FMT_CASE(RG8I, RG8I);
1888    FMT_CASE(R32I, R32I);
1889    FMT_CASE(R16I, R16I);
1890    FMT_CASE(R8I, R8I);
1891
1892    FMT_CASE(RGBA16, RGBA16);
1893    FMT_CASE(RGB10_A2, RGB10A2);
1894    FMT_CASE(RGBA8, RGBA8);
1895    FMT_CASE(RG16, RG16);
1896    FMT_CASE(RG8, RG8);
1897    FMT_CASE(R16, R16);
1898    FMT_CASE(R8, R8);
1899
1900    FMT_CASE(RGBA16_SNORM, RGBA16_SNORM);
1901    FMT_CASE(RGBA8_SNORM, RGBA8_SNORM);
1902    FMT_CASE(RG16_SNORM, RG16_SNORM);
1903    FMT_CASE(RG8_SNORM, RG8_SNORM);
1904    FMT_CASE(R16_SNORM, R16_SNORM);
1905    FMT_CASE(R8_SNORM, R8_SNORM);
1906
1907    FMT_CASE(BGRA_INTEGER, BGRA8);
1908    default:
1909       ERROR("unknown format %x\n", format);
1910       assert(false);
1911       return nv50_ir::FMT_NONE;
1912    }
1913 #undef FMT_CASE
1914 }
1915
1916 bool
1917 Converter::visit(nir_intrinsic_instr *insn)
1918 {
1919    nir_intrinsic_op op = insn->intrinsic;
1920    const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
1921
1922    switch (op) {
1923    case nir_intrinsic_load_uniform: {
1924       LValues &newDefs = convert(&insn->dest);
1925       const DataType dType = getDType(insn);
1926       Value *indirect;
1927       uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1928       for (uint8_t i = 0; i < insn->num_components; ++i) {
1929          loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1930       }
1931       break;
1932    }
1933    case nir_intrinsic_store_output:
1934    case nir_intrinsic_store_per_vertex_output: {
1935       Value *indirect;
1936       DataType dType = getSType(insn->src[0], false, false);
1937       uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1938
1939       for (uint8_t i = 0u; i < insn->num_components; ++i) {
1940          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1941             continue;
1942
1943          uint8_t offset = 0;
1944          Value *src = getSrc(&insn->src[0], i);
1945          switch (prog->getType()) {
1946          case Program::TYPE_FRAGMENT: {
1947             if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1948                // TGSI uses a different interface than NIR, TGSI stores that
1949                // value in the z component, NIR in X
1950                offset += 2;
1951                src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1952             }
1953             break;
1954          }
1955          case Program::TYPE_GEOMETRY:
1956          case Program::TYPE_VERTEX: {
1957             if (info->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) {
1958                mkMov(clipVtx[i], src);
1959                src = clipVtx[i];
1960             }
1961             break;
1962          }
1963          default:
1964             break;
1965          }
1966
1967          storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1968       }
1969       break;
1970    }
1971    case nir_intrinsic_load_input:
1972    case nir_intrinsic_load_interpolated_input:
1973    case nir_intrinsic_load_output: {
1974       LValues &newDefs = convert(&insn->dest);
1975
1976       // FBFetch
1977       if (prog->getType() == Program::TYPE_FRAGMENT &&
1978           op == nir_intrinsic_load_output) {
1979          std::vector<Value*> defs, srcs;
1980          uint8_t mask = 0;
1981
1982          srcs.push_back(getSSA());
1983          srcs.push_back(getSSA());
1984          Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1985          Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1986          mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1987          mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1988
1989          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1990          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1991
1992          for (uint8_t i = 0u; i < insn->num_components; ++i) {
1993             defs.push_back(newDefs[i]);
1994             mask |= 1 << i;
1995          }
1996
1997          TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1998          texi->tex.levelZero = 1;
1999          texi->tex.mask = mask;
2000          texi->tex.useOffsets = 0;
2001          texi->tex.r = 0xffff;
2002          texi->tex.s = 0xffff;
2003
2004          info->prop.fp.readsFramebuffer = true;
2005          break;
2006       }
2007
2008       const DataType dType = getDType(insn);
2009       Value *indirect;
2010       bool input = op != nir_intrinsic_load_output;
2011       operation nvirOp;
2012       uint32_t mode = 0;
2013
2014       uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
2015       nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
2016
2017       // see load_barycentric_* handling
2018       if (prog->getType() == Program::TYPE_FRAGMENT) {
2019          mode = translateInterpMode(&vary, nvirOp);
2020          if (op == nir_intrinsic_load_interpolated_input) {
2021             ImmediateValue immMode;
2022             if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
2023                mode |= immMode.reg.data.u32;
2024          }
2025       }
2026
2027       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2028          uint32_t address = getSlotAddress(insn, idx, i);
2029          Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
2030          if (prog->getType() == Program::TYPE_FRAGMENT) {
2031             int s = 1;
2032             if (typeSizeof(dType) == 8) {
2033                Value *lo = getSSA();
2034                Value *hi = getSSA();
2035                Instruction *interp;
2036
2037                interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
2038                if (nvirOp == OP_PINTERP)
2039                   interp->setSrc(s++, fp.position);
2040                if (mode & NV50_IR_INTERP_OFFSET)
2041                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
2042                interp->setInterpolate(mode);
2043                interp->setIndirect(0, 0, indirect);
2044
2045                Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
2046                interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
2047                if (nvirOp == OP_PINTERP)
2048                   interp->setSrc(s++, fp.position);
2049                if (mode & NV50_IR_INTERP_OFFSET)
2050                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
2051                interp->setInterpolate(mode);
2052                interp->setIndirect(0, 0, indirect);
2053
2054                mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
2055             } else {
2056                Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
2057                if (nvirOp == OP_PINTERP)
2058                   interp->setSrc(s++, fp.position);
2059                if (mode & NV50_IR_INTERP_OFFSET)
2060                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
2061                interp->setInterpolate(mode);
2062                interp->setIndirect(0, 0, indirect);
2063             }
2064          } else {
2065             mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
2066          }
2067       }
2068       break;
2069    }
2070    case nir_intrinsic_load_kernel_input: {
2071       assert(prog->getType() == Program::TYPE_COMPUTE);
2072       assert(insn->num_components == 1);
2073
2074       LValues &newDefs = convert(&insn->dest);
2075       const DataType dType = getDType(insn);
2076       Value *indirect;
2077       uint32_t idx = getIndirect(insn, 0, 0, indirect, true);
2078
2079       mkLoad(dType, newDefs[0], mkSymbol(FILE_SHADER_INPUT, 0, dType, idx), indirect);
2080       break;
2081    }
2082    case nir_intrinsic_load_barycentric_at_offset:
2083    case nir_intrinsic_load_barycentric_at_sample:
2084    case nir_intrinsic_load_barycentric_centroid:
2085    case nir_intrinsic_load_barycentric_pixel:
2086    case nir_intrinsic_load_barycentric_sample: {
2087       LValues &newDefs = convert(&insn->dest);
2088       uint32_t mode;
2089
2090       if (op == nir_intrinsic_load_barycentric_centroid ||
2091           op == nir_intrinsic_load_barycentric_sample) {
2092          mode = NV50_IR_INTERP_CENTROID;
2093       } else if (op == nir_intrinsic_load_barycentric_at_offset) {
2094          Value *offs[2];
2095          for (uint8_t c = 0; c < 2; c++) {
2096             offs[c] = getScratch();
2097             mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
2098             mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
2099             mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
2100             mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
2101          }
2102          mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
2103
2104          mode = NV50_IR_INTERP_OFFSET;
2105       } else if (op == nir_intrinsic_load_barycentric_pixel) {
2106          mode = NV50_IR_INTERP_DEFAULT;
2107       } else if (op == nir_intrinsic_load_barycentric_at_sample) {
2108          info->prop.fp.readsSampleLocations = true;
2109          mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
2110          mode = NV50_IR_INTERP_OFFSET;
2111       } else {
2112          unreachable("all intrinsics already handled above");
2113       }
2114
2115       loadImm(newDefs[1], mode);
2116       break;
2117    }
2118    case nir_intrinsic_discard:
2119       mkOp(OP_DISCARD, TYPE_NONE, NULL);
2120       break;
2121    case nir_intrinsic_discard_if: {
2122       Value *pred = getSSA(1, FILE_PREDICATE);
2123       if (insn->num_components > 1) {
2124          ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
2125          assert(false);
2126          return false;
2127       }
2128       mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2129       mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
2130       break;
2131    }
2132    case nir_intrinsic_load_base_vertex:
2133    case nir_intrinsic_load_base_instance:
2134    case nir_intrinsic_load_draw_id:
2135    case nir_intrinsic_load_front_face:
2136    case nir_intrinsic_load_helper_invocation:
2137    case nir_intrinsic_load_instance_id:
2138    case nir_intrinsic_load_invocation_id:
2139    case nir_intrinsic_load_local_group_size:
2140    case nir_intrinsic_load_local_invocation_id:
2141    case nir_intrinsic_load_num_work_groups:
2142    case nir_intrinsic_load_patch_vertices_in:
2143    case nir_intrinsic_load_primitive_id:
2144    case nir_intrinsic_load_sample_id:
2145    case nir_intrinsic_load_sample_mask_in:
2146    case nir_intrinsic_load_sample_pos:
2147    case nir_intrinsic_load_subgroup_eq_mask:
2148    case nir_intrinsic_load_subgroup_ge_mask:
2149    case nir_intrinsic_load_subgroup_gt_mask:
2150    case nir_intrinsic_load_subgroup_le_mask:
2151    case nir_intrinsic_load_subgroup_lt_mask:
2152    case nir_intrinsic_load_subgroup_invocation:
2153    case nir_intrinsic_load_tess_coord:
2154    case nir_intrinsic_load_tess_level_inner:
2155    case nir_intrinsic_load_tess_level_outer:
2156    case nir_intrinsic_load_vertex_id:
2157    case nir_intrinsic_load_work_group_id: {
2158       const DataType dType = getDType(insn);
2159       SVSemantic sv = convert(op);
2160       LValues &newDefs = convert(&insn->dest);
2161
2162       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2163          Value *def;
2164          if (typeSizeof(dType) == 8)
2165             def = getSSA();
2166          else
2167             def = newDefs[i];
2168
2169          if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
2170             loadImm(def, 0u);
2171          } else {
2172             Symbol *sym = mkSysVal(sv, i);
2173             Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
2174             if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
2175                rdsv->perPatch = 1;
2176          }
2177
2178          if (typeSizeof(dType) == 8)
2179             mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
2180       }
2181       break;
2182    }
2183    // constants
2184    case nir_intrinsic_load_subgroup_size: {
2185       LValues &newDefs = convert(&insn->dest);
2186       loadImm(newDefs[0], 32u);
2187       break;
2188    }
2189    case nir_intrinsic_vote_all:
2190    case nir_intrinsic_vote_any:
2191    case nir_intrinsic_vote_ieq: {
2192       LValues &newDefs = convert(&insn->dest);
2193       Value *pred = getScratch(1, FILE_PREDICATE);
2194       mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2195       mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
2196       mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
2197       break;
2198    }
2199    case nir_intrinsic_ballot: {
2200       LValues &newDefs = convert(&insn->dest);
2201       Value *pred = getSSA(1, FILE_PREDICATE);
2202       mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
2203       mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
2204       break;
2205    }
2206    case nir_intrinsic_read_first_invocation:
2207    case nir_intrinsic_read_invocation: {
2208       LValues &newDefs = convert(&insn->dest);
2209       const DataType dType = getDType(insn);
2210       Value *tmp = getScratch();
2211
2212       if (op == nir_intrinsic_read_first_invocation) {
2213          mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
2214          mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2215          mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2216       } else
2217          tmp = getSrc(&insn->src[1], 0);
2218
2219       for (uint8_t i = 0; i < insn->num_components; ++i) {
2220          mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
2221             ->subOp = NV50_IR_SUBOP_SHFL_IDX;
2222       }
2223       break;
2224    }
2225    case nir_intrinsic_load_per_vertex_input: {
2226       const DataType dType = getDType(insn);
2227       LValues &newDefs = convert(&insn->dest);
2228       Value *indirectVertex;
2229       Value *indirectOffset;
2230       uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2231       uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2232
2233       Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
2234                               mkImm(baseVertex), indirectVertex);
2235       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2236          uint32_t address = getSlotAddress(insn, idx, i);
2237          loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
2238                   indirectOffset, vtxBase, info->in[idx].patch);
2239       }
2240       break;
2241    }
2242    case nir_intrinsic_load_per_vertex_output: {
2243       const DataType dType = getDType(insn);
2244       LValues &newDefs = convert(&insn->dest);
2245       Value *indirectVertex;
2246       Value *indirectOffset;
2247       uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);
2248       uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
2249       Value *vtxBase = NULL;
2250
2251       if (indirectVertex)
2252          vtxBase = indirectVertex;
2253       else
2254          vtxBase = loadImm(NULL, baseVertex);
2255
2256       vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
2257
2258       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2259          uint32_t address = getSlotAddress(insn, idx, i);
2260          loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
2261                   indirectOffset, vtxBase, info->in[idx].patch);
2262       }
2263       break;
2264    }
2265    case nir_intrinsic_emit_vertex:
2266       if (info->io.genUserClip > 0)
2267          handleUserClipPlanes();
2268       // fallthrough
2269    case nir_intrinsic_end_primitive: {
2270       uint32_t idx = nir_intrinsic_stream_id(insn);
2271       mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
2272       break;
2273    }
2274    case nir_intrinsic_load_ubo: {
2275       const DataType dType = getDType(insn);
2276       LValues &newDefs = convert(&insn->dest);
2277       Value *indirectIndex;
2278       Value *indirectOffset;
2279       uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
2280       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2281
2282       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2283          loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
2284                   indirectOffset, indirectIndex);
2285       }
2286       break;
2287    }
2288    case nir_intrinsic_get_buffer_size: {
2289       LValues &newDefs = convert(&insn->dest);
2290       const DataType dType = getDType(insn);
2291       Value *indirectBuffer;
2292       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2293
2294       Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
2295       mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);
2296       break;
2297    }
2298    case nir_intrinsic_store_ssbo: {
2299       DataType sType = getSType(insn->src[0], false, false);
2300       Value *indirectBuffer;
2301       Value *indirectOffset;
2302       uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
2303       uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
2304
2305       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2306          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2307             continue;
2308          Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
2309                                 offset + i * typeSizeof(sType));
2310          mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))
2311             ->setIndirect(0, 1, indirectBuffer);
2312       }
2313       info->io.globalAccess |= 0x2;
2314       break;
2315    }
2316    case nir_intrinsic_load_ssbo: {
2317       const DataType dType = getDType(insn);
2318       LValues &newDefs = convert(&insn->dest);
2319       Value *indirectBuffer;
2320       Value *indirectOffset;
2321       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2322       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2323
2324       for (uint8_t i = 0u; i < insn->num_components; ++i)
2325          loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
2326                   indirectOffset, indirectBuffer);
2327
2328       info->io.globalAccess |= 0x1;
2329       break;
2330    }
2331    case nir_intrinsic_shared_atomic_add:
2332    case nir_intrinsic_shared_atomic_and:
2333    case nir_intrinsic_shared_atomic_comp_swap:
2334    case nir_intrinsic_shared_atomic_exchange:
2335    case nir_intrinsic_shared_atomic_or:
2336    case nir_intrinsic_shared_atomic_imax:
2337    case nir_intrinsic_shared_atomic_imin:
2338    case nir_intrinsic_shared_atomic_umax:
2339    case nir_intrinsic_shared_atomic_umin:
2340    case nir_intrinsic_shared_atomic_xor: {
2341       const DataType dType = getDType(insn);
2342       LValues &newDefs = convert(&insn->dest);
2343       Value *indirectOffset;
2344       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2345       Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);
2346       Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));
2347       if (op == nir_intrinsic_shared_atomic_comp_swap)
2348          atom->setSrc(2, getSrc(&insn->src[2], 0));
2349       atom->setIndirect(0, 0, indirectOffset);
2350       atom->subOp = getSubOp(op);
2351       break;
2352    }
2353    case nir_intrinsic_ssbo_atomic_add:
2354    case nir_intrinsic_ssbo_atomic_and:
2355    case nir_intrinsic_ssbo_atomic_comp_swap:
2356    case nir_intrinsic_ssbo_atomic_exchange:
2357    case nir_intrinsic_ssbo_atomic_or:
2358    case nir_intrinsic_ssbo_atomic_imax:
2359    case nir_intrinsic_ssbo_atomic_imin:
2360    case nir_intrinsic_ssbo_atomic_umax:
2361    case nir_intrinsic_ssbo_atomic_umin:
2362    case nir_intrinsic_ssbo_atomic_xor: {
2363       const DataType dType = getDType(insn);
2364       LValues &newDefs = convert(&insn->dest);
2365       Value *indirectBuffer;
2366       Value *indirectOffset;
2367       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
2368       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2369
2370       Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
2371       Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
2372                                 getSrc(&insn->src[2], 0));
2373       if (op == nir_intrinsic_ssbo_atomic_comp_swap)
2374          atom->setSrc(2, getSrc(&insn->src[3], 0));
2375       atom->setIndirect(0, 0, indirectOffset);
2376       atom->setIndirect(0, 1, indirectBuffer);
2377       atom->subOp = getSubOp(op);
2378
2379       info->io.globalAccess |= 0x2;
2380       break;
2381    }
2382    case nir_intrinsic_bindless_image_atomic_add:
2383    case nir_intrinsic_bindless_image_atomic_and:
2384    case nir_intrinsic_bindless_image_atomic_comp_swap:
2385    case nir_intrinsic_bindless_image_atomic_exchange:
2386    case nir_intrinsic_bindless_image_atomic_imax:
2387    case nir_intrinsic_bindless_image_atomic_umax:
2388    case nir_intrinsic_bindless_image_atomic_imin:
2389    case nir_intrinsic_bindless_image_atomic_umin:
2390    case nir_intrinsic_bindless_image_atomic_or:
2391    case nir_intrinsic_bindless_image_atomic_xor:
2392    case nir_intrinsic_bindless_image_load:
2393    case nir_intrinsic_bindless_image_samples:
2394    case nir_intrinsic_bindless_image_size:
2395    case nir_intrinsic_bindless_image_store: {
2396       std::vector<Value*> srcs, defs;
2397       Value *indirect = getSrc(&insn->src[0], 0);
2398       DataType ty;
2399
2400       uint32_t mask = 0;
2401       TexInstruction::Target target =
2402          convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);
2403       unsigned int argCount = getNIRArgCount(target);
2404       uint16_t location = 0;
2405
2406       if (opInfo.has_dest) {
2407          LValues &newDefs = convert(&insn->dest);
2408          for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2409             defs.push_back(newDefs[i]);
2410             mask |= 1 << i;
2411          }
2412       }
2413
2414       switch (op) {
2415       case nir_intrinsic_bindless_image_atomic_add:
2416       case nir_intrinsic_bindless_image_atomic_and:
2417       case nir_intrinsic_bindless_image_atomic_comp_swap:
2418       case nir_intrinsic_bindless_image_atomic_exchange:
2419       case nir_intrinsic_bindless_image_atomic_imax:
2420       case nir_intrinsic_bindless_image_atomic_umax:
2421       case nir_intrinsic_bindless_image_atomic_imin:
2422       case nir_intrinsic_bindless_image_atomic_umin:
2423       case nir_intrinsic_bindless_image_atomic_or:
2424       case nir_intrinsic_bindless_image_atomic_xor:
2425          ty = getDType(insn);
2426          mask = 0x1;
2427          info->io.globalAccess |= 0x2;
2428          break;
2429       case nir_intrinsic_bindless_image_load:
2430          ty = TYPE_U32;
2431          info->io.globalAccess |= 0x1;
2432          break;
2433       case nir_intrinsic_bindless_image_store:
2434          ty = TYPE_U32;
2435          mask = 0xf;
2436          info->io.globalAccess |= 0x2;
2437          break;
2438       case nir_intrinsic_bindless_image_samples:
2439          mask = 0x8;
2440          ty = TYPE_U32;
2441          break;
2442       case nir_intrinsic_bindless_image_size:
2443          ty = TYPE_U32;
2444          break;
2445       default:
2446          unreachable("unhandled image opcode");
2447          break;
2448       }
2449
2450       // coords
2451       if (opInfo.num_srcs >= 2)
2452          for (unsigned int i = 0u; i < argCount; ++i)
2453             srcs.push_back(getSrc(&insn->src[1], i));
2454
2455       // the sampler is just another src added after coords
2456       if (opInfo.num_srcs >= 3 && target.isMS())
2457          srcs.push_back(getSrc(&insn->src[2], 0));
2458
2459       if (opInfo.num_srcs >= 4) {
2460          unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2461          for (uint8_t i = 0u; i < components; ++i)
2462             srcs.push_back(getSrc(&insn->src[3], i));
2463       }
2464
2465       if (opInfo.num_srcs >= 5)
2466          // 1 for aotmic swap
2467          for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2468             srcs.push_back(getSrc(&insn->src[4], i));
2469
2470       TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2471       texi->tex.bindless = false;
2472       texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(nir_intrinsic_format(insn))];
2473       texi->tex.mask = mask;
2474       texi->tex.bindless = true;
2475       texi->cache = convert(nir_intrinsic_access(insn));
2476       texi->setType(ty);
2477       texi->subOp = getSubOp(op);
2478
2479       if (indirect)
2480          texi->setIndirectR(indirect);
2481
2482       break;
2483    }
2484    case nir_intrinsic_image_deref_atomic_add:
2485    case nir_intrinsic_image_deref_atomic_and:
2486    case nir_intrinsic_image_deref_atomic_comp_swap:
2487    case nir_intrinsic_image_deref_atomic_exchange:
2488    case nir_intrinsic_image_deref_atomic_imax:
2489    case nir_intrinsic_image_deref_atomic_umax:
2490    case nir_intrinsic_image_deref_atomic_imin:
2491    case nir_intrinsic_image_deref_atomic_umin:
2492    case nir_intrinsic_image_deref_atomic_or:
2493    case nir_intrinsic_image_deref_atomic_xor:
2494    case nir_intrinsic_image_deref_load:
2495    case nir_intrinsic_image_deref_samples:
2496    case nir_intrinsic_image_deref_size:
2497    case nir_intrinsic_image_deref_store: {
2498       const nir_variable *tex;
2499       std::vector<Value*> srcs, defs;
2500       Value *indirect;
2501       DataType ty;
2502
2503       uint32_t mask = 0;
2504       nir_deref_instr *deref = nir_src_as_deref(insn->src[0]);
2505       const glsl_type *type = deref->type;
2506       TexInstruction::Target target =
2507          convert((glsl_sampler_dim)type->sampler_dimensionality,
2508                  type->sampler_array, type->sampler_shadow);
2509       unsigned int argCount = getNIRArgCount(target);
2510       uint16_t location = handleDeref(deref, indirect, tex);
2511
2512       if (opInfo.has_dest) {
2513          LValues &newDefs = convert(&insn->dest);
2514          for (uint8_t i = 0u; i < newDefs.size(); ++i) {
2515             defs.push_back(newDefs[i]);
2516             mask |= 1 << i;
2517          }
2518       }
2519
2520       switch (op) {
2521       case nir_intrinsic_image_deref_atomic_add:
2522       case nir_intrinsic_image_deref_atomic_and:
2523       case nir_intrinsic_image_deref_atomic_comp_swap:
2524       case nir_intrinsic_image_deref_atomic_exchange:
2525       case nir_intrinsic_image_deref_atomic_imax:
2526       case nir_intrinsic_image_deref_atomic_umax:
2527       case nir_intrinsic_image_deref_atomic_imin:
2528       case nir_intrinsic_image_deref_atomic_umin:
2529       case nir_intrinsic_image_deref_atomic_or:
2530       case nir_intrinsic_image_deref_atomic_xor:
2531          ty = getDType(insn);
2532          mask = 0x1;
2533          info->io.globalAccess |= 0x2;
2534          break;
2535       case nir_intrinsic_image_deref_load:
2536          ty = TYPE_U32;
2537          info->io.globalAccess |= 0x1;
2538          break;
2539       case nir_intrinsic_image_deref_store:
2540          ty = TYPE_U32;
2541          mask = 0xf;
2542          info->io.globalAccess |= 0x2;
2543          break;
2544       case nir_intrinsic_image_deref_samples:
2545          mask = 0x8;
2546          ty = TYPE_U32;
2547          break;
2548       case nir_intrinsic_image_deref_size:
2549          ty = TYPE_U32;
2550          break;
2551       default:
2552          unreachable("unhandled image opcode");
2553          break;
2554       }
2555
2556       // coords
2557       if (opInfo.num_srcs >= 2)
2558          for (unsigned int i = 0u; i < argCount; ++i)
2559             srcs.push_back(getSrc(&insn->src[1], i));
2560
2561       // the sampler is just another src added after coords
2562       if (opInfo.num_srcs >= 3 && target.isMS())
2563          srcs.push_back(getSrc(&insn->src[2], 0));
2564
2565       if (opInfo.num_srcs >= 4) {
2566          unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;
2567          for (uint8_t i = 0u; i < components; ++i)
2568             srcs.push_back(getSrc(&insn->src[3], i));
2569       }
2570
2571       if (opInfo.num_srcs >= 5)
2572          // 1 for aotmic swap
2573          for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)
2574             srcs.push_back(getSrc(&insn->src[4], i));
2575
2576       TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);
2577       texi->tex.bindless = false;
2578       texi->tex.format = &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(tex->data.image.format)];
2579       texi->tex.mask = mask;
2580       texi->cache = getCacheModeFromVar(tex);
2581       texi->setType(ty);
2582       texi->subOp = getSubOp(op);
2583
2584       if (indirect)
2585          texi->setIndirectR(indirect);
2586
2587       break;
2588    }
2589    case nir_intrinsic_store_shared: {
2590       DataType sType = getSType(insn->src[0], false, false);
2591       Value *indirectOffset;
2592       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
2593
2594       for (uint8_t i = 0u; i < insn->num_components; ++i) {
2595          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2596             continue;
2597          Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
2598          mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
2599       }
2600       break;
2601    }
2602    case nir_intrinsic_load_shared: {
2603       const DataType dType = getDType(insn);
2604       LValues &newDefs = convert(&insn->dest);
2605       Value *indirectOffset;
2606       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2607
2608       for (uint8_t i = 0u; i < insn->num_components; ++i)
2609          loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
2610
2611       break;
2612    }
2613    case nir_intrinsic_barrier: {
2614       // TODO: add flag to shader_info
2615       info->numBarriers = 1;
2616       Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2617       bar->fixed = 1;
2618       bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
2619       break;
2620    }
2621    case nir_intrinsic_group_memory_barrier:
2622    case nir_intrinsic_memory_barrier:
2623    case nir_intrinsic_memory_barrier_atomic_counter:
2624    case nir_intrinsic_memory_barrier_buffer:
2625    case nir_intrinsic_memory_barrier_image:
2626    case nir_intrinsic_memory_barrier_shared: {
2627       Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2628       bar->fixed = 1;
2629       bar->subOp = getSubOp(op);
2630       break;
2631    }
2632    case nir_intrinsic_shader_clock: {
2633       const DataType dType = getDType(insn);
2634       LValues &newDefs = convert(&insn->dest);
2635
2636       loadImm(newDefs[0], 0u);
2637       mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
2638       break;
2639    }
2640    case nir_intrinsic_load_global: {
2641       const DataType dType = getDType(insn);
2642       LValues &newDefs = convert(&insn->dest);
2643       Value *indirectOffset;
2644       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
2645
2646       for (auto i = 0u; i < insn->num_components; ++i)
2647          loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset);
2648
2649       info->io.globalAccess |= 0x1;
2650       break;
2651    }
2652    case nir_intrinsic_store_global: {
2653       DataType sType = getSType(insn->src[0], false, false);
2654
2655       for (auto i = 0u; i < insn->num_components; ++i) {
2656          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
2657             continue;
2658          if (typeSizeof(sType) == 8) {
2659             Value *split[2];
2660             mkSplit(split, 4, getSrc(&insn->src[0], i));
2661
2662             Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType));
2663             mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[0]);
2664
2665             sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType) + 4);
2666             mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[1]);
2667          } else {
2668             Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, sType, i * typeSizeof(sType));
2669             mkStore(OP_STORE, sType, sym, getSrc(&insn->src[1], 0), getSrc(&insn->src[0], i));
2670          }
2671       }
2672
2673       info->io.globalAccess |= 0x2;
2674       break;
2675    }
2676    default:
2677       ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
2678       return false;
2679    }
2680
2681    return true;
2682 }
2683
2684 bool
2685 Converter::visit(nir_jump_instr *insn)
2686 {
2687    switch (insn->type) {
2688    case nir_jump_return:
2689       // TODO: this only works in the main function
2690       mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2691       bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2692       break;
2693    case nir_jump_break:
2694    case nir_jump_continue: {
2695       bool isBreak = insn->type == nir_jump_break;
2696       nir_block *block = insn->instr.block;
2697       assert(!block->successors[1]);
2698       BasicBlock *target = convert(block->successors[0]);
2699       mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2700       bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2701       break;
2702    }
2703    default:
2704       ERROR("unknown nir_jump_type %u\n", insn->type);
2705       return false;
2706    }
2707
2708    return true;
2709 }
2710
2711 Value*
2712 Converter::convert(nir_load_const_instr *insn, uint8_t idx)
2713 {
2714    Value *val;
2715
2716    if (immInsertPos)
2717       setPosition(immInsertPos, true);
2718    else
2719       setPosition(bb, false);
2720
2721    switch (insn->def.bit_size) {
2722    case 64:
2723       val = loadImm(getSSA(8), insn->value[idx].u64);
2724       break;
2725    case 32:
2726       val = loadImm(getSSA(4), insn->value[idx].u32);
2727       break;
2728    case 16:
2729       val = loadImm(getSSA(2), insn->value[idx].u16);
2730       break;
2731    case 8:
2732       val = loadImm(getSSA(1), insn->value[idx].u8);
2733       break;
2734    default:
2735       unreachable("unhandled bit size!\n");
2736    }
2737    setPosition(bb, true);
2738    return val;
2739 }
2740
2741 bool
2742 Converter::visit(nir_load_const_instr *insn)
2743 {
2744    assert(insn->def.bit_size <= 64);
2745    immediates[insn->def.index] = insn;
2746    return true;
2747 }
2748
2749 #define DEFAULT_CHECKS \
2750       if (insn->dest.dest.ssa.num_components > 1) { \
2751          ERROR("nir_alu_instr only supported with 1 component!\n"); \
2752          return false; \
2753       } \
2754       if (insn->dest.write_mask != 1) { \
2755          ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2756          return false; \
2757       }
2758 bool
2759 Converter::visit(nir_alu_instr *insn)
2760 {
2761    const nir_op op = insn->op;
2762    const nir_op_info &info = nir_op_infos[op];
2763    DataType dType = getDType(insn);
2764    const std::vector<DataType> sTypes = getSTypes(insn);
2765
2766    Instruction *oldPos = this->bb->getExit();
2767
2768    switch (op) {
2769    case nir_op_fabs:
2770    case nir_op_iabs:
2771    case nir_op_fadd:
2772    case nir_op_iadd:
2773    case nir_op_iand:
2774    case nir_op_fceil:
2775    case nir_op_fcos:
2776    case nir_op_fddx:
2777    case nir_op_fddx_coarse:
2778    case nir_op_fddx_fine:
2779    case nir_op_fddy:
2780    case nir_op_fddy_coarse:
2781    case nir_op_fddy_fine:
2782    case nir_op_fdiv:
2783    case nir_op_idiv:
2784    case nir_op_udiv:
2785    case nir_op_fexp2:
2786    case nir_op_ffloor:
2787    case nir_op_ffma:
2788    case nir_op_flog2:
2789    case nir_op_fmax:
2790    case nir_op_imax:
2791    case nir_op_umax:
2792    case nir_op_fmin:
2793    case nir_op_imin:
2794    case nir_op_umin:
2795    case nir_op_fmod:
2796    case nir_op_imod:
2797    case nir_op_umod:
2798    case nir_op_fmul:
2799    case nir_op_imul:
2800    case nir_op_imul_high:
2801    case nir_op_umul_high:
2802    case nir_op_fneg:
2803    case nir_op_ineg:
2804    case nir_op_inot:
2805    case nir_op_ior:
2806    case nir_op_pack_64_2x32_split:
2807    case nir_op_fpow:
2808    case nir_op_frcp:
2809    case nir_op_frem:
2810    case nir_op_irem:
2811    case nir_op_frsq:
2812    case nir_op_fsat:
2813    case nir_op_ishr:
2814    case nir_op_ushr:
2815    case nir_op_fsin:
2816    case nir_op_fsqrt:
2817    case nir_op_ftrunc:
2818    case nir_op_ishl:
2819    case nir_op_ixor: {
2820       DEFAULT_CHECKS;
2821       LValues &newDefs = convert(&insn->dest);
2822       operation preOp = preOperationNeeded(op);
2823       if (preOp != OP_NOP) {
2824          assert(info.num_inputs < 2);
2825          Value *tmp = getSSA(typeSizeof(dType));
2826          Instruction *i0 = mkOp(preOp, dType, tmp);
2827          Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2828          if (info.num_inputs) {
2829             i0->setSrc(0, getSrc(&insn->src[0]));
2830             i1->setSrc(0, tmp);
2831          }
2832          i1->subOp = getSubOp(op);
2833       } else {
2834          Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2835          for (unsigned s = 0u; s < info.num_inputs; ++s) {
2836             i->setSrc(s, getSrc(&insn->src[s]));
2837          }
2838          i->subOp = getSubOp(op);
2839       }
2840       break;
2841    }
2842    case nir_op_ifind_msb:
2843    case nir_op_ufind_msb: {
2844       DEFAULT_CHECKS;
2845       LValues &newDefs = convert(&insn->dest);
2846       dType = sTypes[0];
2847       mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2848       break;
2849    }
2850    case nir_op_fround_even: {
2851       DEFAULT_CHECKS;
2852       LValues &newDefs = convert(&insn->dest);
2853       mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2854       break;
2855    }
2856    // convert instructions
2857    case nir_op_f2f32:
2858    case nir_op_f2i32:
2859    case nir_op_f2u32:
2860    case nir_op_i2f32:
2861    case nir_op_i2i32:
2862    case nir_op_u2f32:
2863    case nir_op_u2u32:
2864    case nir_op_f2f64:
2865    case nir_op_f2i64:
2866    case nir_op_f2u64:
2867    case nir_op_i2f64:
2868    case nir_op_i2i64:
2869    case nir_op_u2f64:
2870    case nir_op_u2u64: {
2871       DEFAULT_CHECKS;
2872       LValues &newDefs = convert(&insn->dest);
2873       Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2874       if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2875          i->rnd = ROUND_Z;
2876       i->sType = sTypes[0];
2877       break;
2878    }
2879    // compare instructions
2880    case nir_op_feq32:
2881    case nir_op_ieq32:
2882    case nir_op_fge32:
2883    case nir_op_ige32:
2884    case nir_op_uge32:
2885    case nir_op_flt32:
2886    case nir_op_ilt32:
2887    case nir_op_ult32:
2888    case nir_op_fne32:
2889    case nir_op_ine32: {
2890       DEFAULT_CHECKS;
2891       LValues &newDefs = convert(&insn->dest);
2892       Instruction *i = mkCmp(getOperation(op),
2893                              getCondCode(op),
2894                              dType,
2895                              newDefs[0],
2896                              dType,
2897                              getSrc(&insn->src[0]),
2898                              getSrc(&insn->src[1]));
2899       if (info.num_inputs == 3)
2900          i->setSrc(2, getSrc(&insn->src[2]));
2901       i->sType = sTypes[0];
2902       break;
2903    }
2904    // those are weird ALU ops and need special handling, because
2905    //   1. they are always componend based
2906    //   2. they basically just merge multiple values into one data type
2907    case nir_op_mov:
2908       if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) {
2909          nir_reg_dest& reg = insn->dest.dest.reg;
2910          uint32_t goffset = regToLmemOffset[reg.reg->index];
2911          uint8_t comps = reg.reg->num_components;
2912          uint8_t size = reg.reg->bit_size / 8;
2913          uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2914          uint32_t aoffset = csize * reg.base_offset;
2915          Value *indirect = NULL;
2916
2917          if (reg.indirect)
2918             indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
2919                               getSrc(reg.indirect, 0), mkImm(csize));
2920
2921          for (uint8_t i = 0u; i < comps; ++i) {
2922             if (!((1u << i) & insn->dest.write_mask))
2923                continue;
2924
2925             Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size);
2926             mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
2927          }
2928          break;
2929       } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) {
2930          LValues &newDefs = convert(&insn->dest);
2931          nir_reg_src& reg = insn->src[0].src.reg;
2932          uint32_t goffset = regToLmemOffset[reg.reg->index];
2933          // uint8_t comps = reg.reg->num_components;
2934          uint8_t size = reg.reg->bit_size / 8;
2935          uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2936          uint32_t aoffset = csize * reg.base_offset;
2937          Value *indirect = NULL;
2938
2939          if (reg.indirect)
2940             indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize));
2941
2942          for (uint8_t i = 0u; i < newDefs.size(); ++i)
2943             loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect);
2944
2945          break;
2946       } else {
2947          LValues &newDefs = convert(&insn->dest);
2948          for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2949             mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2950          }
2951       }
2952       break;
2953    case nir_op_vec2:
2954    case nir_op_vec3:
2955    case nir_op_vec4: {
2956       LValues &newDefs = convert(&insn->dest);
2957       for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2958          mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2959       }
2960       break;
2961    }
2962    // (un)pack
2963    case nir_op_pack_64_2x32: {
2964       LValues &newDefs = convert(&insn->dest);
2965       Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2966       merge->setSrc(0, getSrc(&insn->src[0], 0));
2967       merge->setSrc(1, getSrc(&insn->src[0], 1));
2968       break;
2969    }
2970    case nir_op_pack_half_2x16_split: {
2971       LValues &newDefs = convert(&insn->dest);
2972       Value *tmpH = getSSA();
2973       Value *tmpL = getSSA();
2974
2975       mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2976       mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2977       mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2978       break;
2979    }
2980    case nir_op_unpack_half_2x16_split_x:
2981    case nir_op_unpack_half_2x16_split_y: {
2982       LValues &newDefs = convert(&insn->dest);
2983       Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2984       if (op == nir_op_unpack_half_2x16_split_y)
2985          cvt->subOp = 1;
2986       break;
2987    }
2988    case nir_op_unpack_64_2x32: {
2989       LValues &newDefs = convert(&insn->dest);
2990       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2991       break;
2992    }
2993    case nir_op_unpack_64_2x32_split_x: {
2994       LValues &newDefs = convert(&insn->dest);
2995       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2996       break;
2997    }
2998    case nir_op_unpack_64_2x32_split_y: {
2999       LValues &newDefs = convert(&insn->dest);
3000       mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
3001       break;
3002    }
3003    // special instructions
3004    case nir_op_fsign:
3005    case nir_op_isign: {
3006       DEFAULT_CHECKS;
3007       DataType iType;
3008       if (::isFloatType(dType))
3009          iType = TYPE_F32;
3010       else
3011          iType = TYPE_S32;
3012
3013       LValues &newDefs = convert(&insn->dest);
3014       LValue *val0 = getScratch();
3015       LValue *val1 = getScratch();
3016       mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
3017       mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
3018
3019       if (dType == TYPE_F64) {
3020          mkOp2(OP_SUB, iType, val0, val0, val1);
3021          mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
3022       } else if (dType == TYPE_S64 || dType == TYPE_U64) {
3023          mkOp2(OP_SUB, iType, val0, val1, val0);
3024          mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
3025          mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
3026       } else if (::isFloatType(dType))
3027          mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
3028       else
3029          mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
3030       break;
3031    }
3032    case nir_op_fcsel:
3033    case nir_op_b32csel: {
3034       DEFAULT_CHECKS;
3035       LValues &newDefs = convert(&insn->dest);
3036       mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
3037       break;
3038    }
3039    case nir_op_ibitfield_extract:
3040    case nir_op_ubitfield_extract: {
3041       DEFAULT_CHECKS;
3042       Value *tmp = getSSA();
3043       LValues &newDefs = convert(&insn->dest);
3044       mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
3045       mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
3046       break;
3047    }
3048    case nir_op_bfm: {
3049       DEFAULT_CHECKS;
3050       LValues &newDefs = convert(&insn->dest);
3051       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
3052       break;
3053    }
3054    case nir_op_bitfield_insert: {
3055       DEFAULT_CHECKS;
3056       LValues &newDefs = convert(&insn->dest);
3057       LValue *temp = getSSA();
3058       mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
3059       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
3060       break;
3061    }
3062    case nir_op_bit_count: {
3063       DEFAULT_CHECKS;
3064       LValues &newDefs = convert(&insn->dest);
3065       mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
3066       break;
3067    }
3068    case nir_op_bitfield_reverse: {
3069       DEFAULT_CHECKS;
3070       LValues &newDefs = convert(&insn->dest);
3071       mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
3072       break;
3073    }
3074    case nir_op_find_lsb: {
3075       DEFAULT_CHECKS;
3076       LValues &newDefs = convert(&insn->dest);
3077       Value *tmp = getSSA();
3078       mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
3079       mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
3080       break;
3081    }
3082    // boolean conversions
3083    case nir_op_b2f32: {
3084       DEFAULT_CHECKS;
3085       LValues &newDefs = convert(&insn->dest);
3086       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
3087       break;
3088    }
3089    case nir_op_b2f64: {
3090       DEFAULT_CHECKS;
3091       LValues &newDefs = convert(&insn->dest);
3092       Value *tmp = getSSA(4);
3093       mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
3094       mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
3095       break;
3096    }
3097    case nir_op_f2b32:
3098    case nir_op_i2b32: {
3099       DEFAULT_CHECKS;
3100       LValues &newDefs = convert(&insn->dest);
3101       Value *src1;
3102       if (typeSizeof(sTypes[0]) == 8) {
3103          src1 = loadImm(getSSA(8), 0.0);
3104       } else {
3105          src1 = zero;
3106       }
3107       CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
3108       mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
3109       break;
3110    }
3111    case nir_op_b2i32: {
3112       DEFAULT_CHECKS;
3113       LValues &newDefs = convert(&insn->dest);
3114       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
3115       break;
3116    }
3117    case nir_op_b2i64: {
3118       DEFAULT_CHECKS;
3119       LValues &newDefs = convert(&insn->dest);
3120       LValue *def = getScratch();
3121       mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
3122       mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
3123       break;
3124    }
3125    default:
3126       ERROR("unknown nir_op %s\n", info.name);
3127       return false;
3128    }
3129
3130    if (!oldPos) {
3131       oldPos = this->bb->getEntry();
3132       oldPos->precise = insn->exact;
3133    }
3134
3135    if (unlikely(!oldPos))
3136       return true;
3137
3138    while (oldPos->next) {
3139       oldPos = oldPos->next;
3140       oldPos->precise = insn->exact;
3141    }
3142    oldPos->saturate = insn->dest.saturate;
3143
3144    return true;
3145 }
3146 #undef DEFAULT_CHECKS
3147
3148 bool
3149 Converter::visit(nir_ssa_undef_instr *insn)
3150 {
3151    LValues &newDefs = convert(&insn->def);
3152    for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
3153       mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
3154    }
3155    return true;
3156 }
3157
3158 #define CASE_SAMPLER(ty) \
3159    case GLSL_SAMPLER_DIM_ ## ty : \
3160       if (isArray && !isShadow) \
3161          return TEX_TARGET_ ## ty ## _ARRAY; \
3162       else if (!isArray && isShadow) \
3163          return TEX_TARGET_## ty ## _SHADOW; \
3164       else if (isArray && isShadow) \
3165          return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
3166       else \
3167          return TEX_TARGET_ ## ty
3168
3169 TexTarget
3170 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
3171 {
3172    switch (dim) {
3173    CASE_SAMPLER(1D);
3174    CASE_SAMPLER(2D);
3175    CASE_SAMPLER(CUBE);
3176    case GLSL_SAMPLER_DIM_3D:
3177       return TEX_TARGET_3D;
3178    case GLSL_SAMPLER_DIM_MS:
3179       if (isArray)
3180          return TEX_TARGET_2D_MS_ARRAY;
3181       return TEX_TARGET_2D_MS;
3182    case GLSL_SAMPLER_DIM_RECT:
3183       if (isShadow)
3184          return TEX_TARGET_RECT_SHADOW;
3185       return TEX_TARGET_RECT;
3186    case GLSL_SAMPLER_DIM_BUF:
3187       return TEX_TARGET_BUFFER;
3188    case GLSL_SAMPLER_DIM_EXTERNAL:
3189       return TEX_TARGET_2D;
3190    default:
3191       ERROR("unknown glsl_sampler_dim %u\n", dim);
3192       assert(false);
3193       return TEX_TARGET_COUNT;
3194    }
3195 }
3196 #undef CASE_SAMPLER
3197
3198 Value*
3199 Converter::applyProjection(Value *src, Value *proj)
3200 {
3201    if (!proj)
3202       return src;
3203    return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
3204 }
3205
3206 unsigned int
3207 Converter::getNIRArgCount(TexInstruction::Target& target)
3208 {
3209    unsigned int result = target.getArgCount();
3210    if (target.isCube() && target.isArray())
3211       result--;
3212    if (target.isMS())
3213       result--;
3214    return result;
3215 }
3216
3217 uint16_t
3218 Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_variable * &tex)
3219 {
3220    typedef std::pair<uint32_t,Value*> DerefPair;
3221    std::list<DerefPair> derefs;
3222
3223    uint16_t result = 0;
3224    while (deref->deref_type != nir_deref_type_var) {
3225       switch (deref->deref_type) {
3226       case nir_deref_type_array: {
3227          Value *indirect;
3228          uint8_t size = type_size(deref->type, true);
3229          result += size * getIndirect(&deref->arr.index, 0, indirect);
3230
3231          if (indirect) {
3232             derefs.push_front(std::make_pair(size, indirect));
3233          }
3234
3235          break;
3236       }
3237       case nir_deref_type_struct: {
3238          result += nir_deref_instr_parent(deref)->type->struct_location_offset(deref->strct.index);
3239          break;
3240       }
3241       case nir_deref_type_var:
3242       default:
3243          unreachable("nir_deref_type_var reached in handleDeref!");
3244          break;
3245       }
3246       deref = nir_deref_instr_parent(deref);
3247    }
3248
3249    indirect = NULL;
3250    for (std::list<DerefPair>::const_iterator it = derefs.begin(); it != derefs.end(); ++it) {
3251       Value *offset = mkOp2v(OP_MUL, TYPE_U32, getSSA(), loadImm(getSSA(), it->first), it->second);
3252       if (indirect)
3253          indirect = mkOp2v(OP_ADD, TYPE_U32, getSSA(), indirect, offset);
3254       else
3255          indirect = offset;
3256    }
3257
3258    tex = nir_deref_instr_get_variable(deref);
3259    assert(tex);
3260
3261    return result + tex->data.driver_location;
3262 }
3263
3264 CacheMode
3265 Converter::convert(enum gl_access_qualifier access)
3266 {
3267    switch (access) {
3268    case ACCESS_VOLATILE:
3269       return CACHE_CV;
3270    case ACCESS_COHERENT:
3271       return CACHE_CG;
3272    default:
3273       return CACHE_CA;
3274    }
3275 }
3276
3277 CacheMode
3278 Converter::getCacheModeFromVar(const nir_variable *var)
3279 {
3280    return convert(var->data.image.access);
3281 }
3282
3283 bool
3284 Converter::visit(nir_tex_instr *insn)
3285 {
3286    switch (insn->op) {
3287    case nir_texop_lod:
3288    case nir_texop_query_levels:
3289    case nir_texop_tex:
3290    case nir_texop_texture_samples:
3291    case nir_texop_tg4:
3292    case nir_texop_txb:
3293    case nir_texop_txd:
3294    case nir_texop_txf:
3295    case nir_texop_txf_ms:
3296    case nir_texop_txl:
3297    case nir_texop_txs: {
3298       LValues &newDefs = convert(&insn->dest);
3299       std::vector<Value*> srcs;
3300       std::vector<Value*> defs;
3301       std::vector<nir_src*> offsets;
3302       uint8_t mask = 0;
3303       bool lz = false;
3304       Value *proj = NULL;
3305       TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
3306       operation op = getOperation(insn->op);
3307
3308       int r, s;
3309       int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
3310       int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
3311       int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
3312       int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
3313       int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
3314       int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
3315       int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
3316       int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
3317       int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
3318       int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
3319       int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
3320       int sampHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_handle);
3321       int texHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_handle);
3322
3323       bool bindless = sampHandleIdx != -1 || texHandleIdx != -1;
3324       assert((sampHandleIdx != -1) == (texHandleIdx != -1));
3325
3326       if (projIdx != -1)
3327          proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
3328
3329       srcs.resize(insn->coord_components);
3330       for (uint8_t i = 0u; i < insn->coord_components; ++i)
3331          srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
3332
3333       // sometimes we get less args than target.getArgCount, but codegen expects the latter
3334       if (insn->coord_components) {
3335          uint32_t argCount = target.getArgCount();
3336
3337          if (target.isMS())
3338             argCount -= 1;
3339
3340          for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
3341             srcs.push_back(getSSA());
3342       }
3343
3344       if (insn->op == nir_texop_texture_samples)
3345          srcs.push_back(zero);
3346       else if (!insn->num_srcs)
3347          srcs.push_back(loadImm(NULL, 0));
3348       if (biasIdx != -1)
3349          srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
3350       if (lodIdx != -1)
3351          srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
3352       else if (op == OP_TXF)
3353          lz = true;
3354       if (msIdx != -1)
3355          srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
3356       if (offsetIdx != -1)
3357          offsets.push_back(&insn->src[offsetIdx].src);
3358       if (compIdx != -1)
3359          srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
3360       if (texOffIdx != -1) {
3361          srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
3362          texOffIdx = srcs.size() - 1;
3363       }
3364       if (sampOffIdx != -1) {
3365          srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
3366          sampOffIdx = srcs.size() - 1;
3367       }
3368       if (bindless) {
3369          // currently we use the lower bits
3370          Value *split[2];
3371          Value *handle = getSrc(&insn->src[sampHandleIdx].src, 0);
3372
3373          mkSplit(split, 4, handle);
3374
3375          srcs.push_back(split[0]);
3376          texOffIdx = srcs.size() - 1;
3377       }
3378
3379       r = bindless ? 0xff : insn->texture_index;
3380       s = bindless ? 0x1f : insn->sampler_index;
3381
3382       defs.resize(newDefs.size());
3383       for (uint8_t d = 0u; d < newDefs.size(); ++d) {
3384          defs[d] = newDefs[d];
3385          mask |= 1 << d;
3386       }
3387       if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
3388          lz = true;
3389
3390       TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
3391       texi->tex.levelZero = lz;
3392       texi->tex.mask = mask;
3393       texi->tex.bindless = bindless;
3394
3395       if (texOffIdx != -1)
3396          texi->tex.rIndirectSrc = texOffIdx;
3397       if (sampOffIdx != -1)
3398          texi->tex.sIndirectSrc = sampOffIdx;
3399
3400       switch (insn->op) {
3401       case nir_texop_tg4:
3402          if (!target.isShadow())
3403             texi->tex.gatherComp = insn->component;
3404          break;
3405       case nir_texop_txs:
3406          texi->tex.query = TXQ_DIMS;
3407          break;
3408       case nir_texop_texture_samples:
3409          texi->tex.mask = 0x4;
3410          texi->tex.query = TXQ_TYPE;
3411          break;
3412       case nir_texop_query_levels:
3413          texi->tex.mask = 0x8;
3414          texi->tex.query = TXQ_DIMS;
3415          break;
3416       default:
3417          break;
3418       }
3419
3420       texi->tex.useOffsets = offsets.size();
3421       if (texi->tex.useOffsets) {
3422          for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
3423             for (uint32_t c = 0u; c < 3; ++c) {
3424                uint8_t s2 = std::min(c, target.getDim() - 1);
3425                texi->offset[s][c].set(getSrc(offsets[s], s2));
3426                texi->offset[s][c].setInsn(texi);
3427             }
3428          }
3429       }
3430
3431       if (op == OP_TXG && offsetIdx == -1) {
3432          if (nir_tex_instr_has_explicit_tg4_offsets(insn)) {
3433             texi->tex.useOffsets = 4;
3434             setPosition(texi, false);
3435             for (uint8_t i = 0; i < 4; ++i) {
3436                for (uint8_t j = 0; j < 2; ++j) {
3437                   texi->offset[i][j].set(loadImm(NULL, insn->tg4_offsets[i][j]));
3438                   texi->offset[i][j].setInsn(texi);
3439                }
3440             }
3441             setPosition(texi, true);
3442          }
3443       }
3444
3445       if (ddxIdx != -1 && ddyIdx != -1) {
3446          for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
3447             texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
3448             texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
3449          }
3450       }
3451
3452       break;
3453    }
3454    default:
3455       ERROR("unknown nir_texop %u\n", insn->op);
3456       return false;
3457    }
3458    return true;
3459 }
3460
3461 bool
3462 Converter::visit(nir_deref_instr *deref)
3463 {
3464    // we just ignore those, because images intrinsics are the only place where
3465    // we should end up with deref sources and those have to backtrack anyway
3466    // to get the nir_variable. This code just exists to handle some special
3467    // cases.
3468    switch (deref->deref_type) {
3469    case nir_deref_type_array:
3470    case nir_deref_type_struct:
3471    case nir_deref_type_var:
3472       break;
3473    default:
3474       ERROR("unknown nir_deref_instr %u\n", deref->deref_type);
3475       return false;
3476    }
3477    return true;
3478 }
3479
3480 bool
3481 Converter::run()
3482 {
3483    bool progress;
3484
3485    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
3486       nir_print_shader(nir, stderr);
3487
3488    struct nir_lower_subgroups_options subgroup_options = {
3489       .subgroup_size = 32,
3490       .ballot_bit_size = 32,
3491    };
3492
3493    NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
3494    NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3495    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3496    NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
3497    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3498    NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
3499    NIR_PASS_V(nir, nir_lower_phis_to_scalar);
3500
3501    do {
3502       progress = false;
3503       NIR_PASS(progress, nir, nir_copy_prop);
3504       NIR_PASS(progress, nir, nir_opt_remove_phis);
3505       NIR_PASS(progress, nir, nir_opt_trivial_continues);
3506       NIR_PASS(progress, nir, nir_opt_cse);
3507       NIR_PASS(progress, nir, nir_opt_algebraic);
3508       NIR_PASS(progress, nir, nir_opt_constant_folding);
3509       NIR_PASS(progress, nir, nir_copy_prop);
3510       NIR_PASS(progress, nir, nir_opt_dce);
3511       NIR_PASS(progress, nir, nir_opt_dead_cf);
3512    } while (progress);
3513
3514    NIR_PASS_V(nir, nir_lower_bool_to_int32);
3515    NIR_PASS_V(nir, nir_lower_locals_to_regs);
3516    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
3517    NIR_PASS_V(nir, nir_convert_from_ssa, true);
3518
3519    // Garbage collect dead instructions
3520    nir_sweep(nir);
3521
3522    if (!parseNIR()) {
3523       ERROR("Couldn't prase NIR!\n");
3524       return false;
3525    }
3526
3527    if (!assignSlots()) {
3528       ERROR("Couldn't assign slots!\n");
3529       return false;
3530    }
3531
3532    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
3533       nir_print_shader(nir, stderr);
3534
3535    nir_foreach_function(function, nir) {
3536       if (!visit(function))
3537          return false;
3538    }
3539
3540    return true;
3541 }
3542
3543 } // unnamed namespace
3544
3545 namespace nv50_ir {
3546
3547 bool
3548 Program::makeFromNIR(struct nv50_ir_prog_info *info)
3549 {
3550    nir_shader *nir = (nir_shader*)info->bin.source;
3551    Converter converter(this, nir, info);
3552    bool result = converter.run();
3553    if (!result)
3554       return result;
3555    LoweringHelper lowering;
3556    lowering.run(this);
3557    tlsSize = info->bin.tlsSpace;
3558    return result;
3559 }
3560
3561 } // namespace nv50_ir