src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

   1 /*
   2  * Copyright 2017 Red Hat Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * Authors: Karol Herbst <kherbst@redhat.com>
  23  */
  24
  25 #include "compiler/nir/nir.h"
  26
  27 #include "util/u_debug.h"
  28
  29 #include "codegen/nv50_ir.h"
  30 #include "codegen/nv50_ir_from_common.h"
  31 #include "codegen/nv50_ir_lowering_helper.h"
  32 #include "codegen/nv50_ir_util.h"
  33
  34 #if __cplusplus >= 201103L
  35 #include <unordered_map>
  36 #else
  37 #include <tr1/unordered_map>
  38 #endif
  39 #include <vector>
  40
  41 namespace {
  42
  43 #if __cplusplus >= 201103L
  44 using std::hash;
  45 using std::unordered_map;
  46 #else
  47 using std::tr1::hash;
  48 using std::tr1::unordered_map;
  49 #endif
  50
  51 using namespace nv50_ir;
  52
  53 int
  54 type_size(const struct glsl_type *type)
  55 {
  56    return glsl_count_attribute_slots(type, false);
  57 }
  58
  59 class Converter : public ConverterCommon
  60 {
  61 public:
  62    Converter(Program *, nir_shader *, nv50_ir_prog_info *);
  63
  64    bool run();
  65 private:
  66    typedef std::vector<LValue*> LValues;
  67    typedef unordered_map<unsigned, LValues> NirDefMap;
  68    typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
  69    typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
  70
  71    TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
  72    LValues& convert(nir_alu_dest *);
  73    BasicBlock* convert(nir_block *);
  74    LValues& convert(nir_dest *);
  75    SVSemantic convert(nir_intrinsic_op);
  76    LValues& convert(nir_register *);
  77    LValues& convert(nir_ssa_def *);
  78
  79    Value* getSrc(nir_alu_src *, uint8_t component = 0);
  80    Value* getSrc(nir_register *, uint8_t);
  81    Value* getSrc(nir_src *, uint8_t, bool indirect = false);
  82    Value* getSrc(nir_ssa_def *, uint8_t);
  83
  84    // returned value is the constant part of the given source (either the
  85    // nir_src or the selected source component of an intrinsic). Even though
  86    // this is mostly an optimization to be able to skip indirects in a few
  87    // cases, sometimes we require immediate values or set some fileds on
  88    // instructions (e.g. tex) in order for codegen to consume those.
  89    // If the found value has not a constant part, the Value gets returned
  90    // through the Value parameter.
  91    uint32_t getIndirect(nir_src *, uint8_t, Value *&);
  92    uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&);
  93
  94    uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
  95
  96    void setInterpolate(nv50_ir_varying *,
  97                        uint8_t,
  98                        bool centroid,
  99                        unsigned semantics);
 100
 101    Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
 102                          uint8_t c, Value *indirect0 = NULL,
 103                          Value *indirect1 = NULL, bool patch = false);
 104    void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
 105                 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
 106                 Value *indirect1 = NULL);
 107
 108    bool isFloatType(nir_alu_type);
 109    bool isSignedType(nir_alu_type);
 110    bool isResultFloat(nir_op);
 111    bool isResultSigned(nir_op);
 112
 113    DataType getDType(nir_alu_instr *);
 114    DataType getDType(nir_intrinsic_instr *);
 115    DataType getDType(nir_op, uint8_t);
 116
 117    std::vector<DataType> getSTypes(nir_alu_instr *);
 118    DataType getSType(nir_src &, bool isFloat, bool isSigned);
 119
 120    operation getOperation(nir_intrinsic_op);
 121    operation getOperation(nir_op);
 122    operation getOperation(nir_texop);
 123    operation preOperationNeeded(nir_op);
 124
 125    int getSubOp(nir_intrinsic_op);
 126    int getSubOp(nir_op);
 127
 128    CondCode getCondCode(nir_op);
 129
 130    bool assignSlots();
 131    bool parseNIR();
 132
 133    bool visit(nir_alu_instr *);
 134    bool visit(nir_block *);
 135    bool visit(nir_cf_node *);
 136    bool visit(nir_function *);
 137    bool visit(nir_if *);
 138    bool visit(nir_instr *);
 139    bool visit(nir_intrinsic_instr *);
 140    bool visit(nir_jump_instr *);
 141    bool visit(nir_load_const_instr*);
 142    bool visit(nir_loop *);
 143    bool visit(nir_ssa_undef_instr *);
 144    bool visit(nir_tex_instr *);
 145
 146    // tex stuff
 147    Value* applyProjection(Value *src, Value *proj);
 148
 149    nir_shader *nir;
 150
 151    NirDefMap ssaDefs;
 152    NirDefMap regDefs;
 153    NirArrayLMemOffsets regToLmemOffset;
 154    NirBlockMap blocks;
 155    unsigned int curLoopDepth;
 156
 157    BasicBlock *exit;
 158    Value *zero;
 159
 160    int clipVertexOutput;
 161
 162    union {
 163       struct {
 164          Value *position;
 165       } fp;
 166    };
 167 };
 168
 169 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
 170    : ConverterCommon(prog, info),
 171      nir(nir),
 172      curLoopDepth(0),
 173      clipVertexOutput(-1)
 174 {
 175    zero = mkImm((uint32_t)0);
 176 }
 177
 178 BasicBlock *
 179 Converter::convert(nir_block *block)
 180 {
 181    NirBlockMap::iterator it = blocks.find(block->index);
 182    if (it != blocks.end())
 183       return it->second;
 184
 185    BasicBlock *bb = new BasicBlock(func);
 186    blocks[block->index] = bb;
 187    return bb;
 188 }
 189
 190 bool
 191 Converter::isFloatType(nir_alu_type type)
 192 {
 193    return nir_alu_type_get_base_type(type) == nir_type_float;
 194 }
 195
 196 bool
 197 Converter::isSignedType(nir_alu_type type)
 198 {
 199    return nir_alu_type_get_base_type(type) == nir_type_int;
 200 }
 201
 202 bool
 203 Converter::isResultFloat(nir_op op)
 204 {
 205    const nir_op_info &info = nir_op_infos[op];
 206    if (info.output_type != nir_type_invalid)
 207       return isFloatType(info.output_type);
 208
 209    ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
 210    assert(false);
 211    return true;
 212 }
 213
 214 bool
 215 Converter::isResultSigned(nir_op op)
 216 {
 217    switch (op) {
 218    // there is no umul and we get wrong results if we treat all muls as signed
 219    case nir_op_imul:
 220    case nir_op_inot:
 221       return false;
 222    default:
 223       const nir_op_info &info = nir_op_infos[op];
 224       if (info.output_type != nir_type_invalid)
 225          return isSignedType(info.output_type);
 226       ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
 227       assert(false);
 228       return true;
 229    }
 230 }
 231
 232 DataType
 233 Converter::getDType(nir_alu_instr *insn)
 234 {
 235    if (insn->dest.dest.is_ssa)
 236       return getDType(insn->op, insn->dest.dest.ssa.bit_size);
 237    else
 238       return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
 239 }
 240
 241 DataType
 242 Converter::getDType(nir_intrinsic_instr *insn)
 243 {
 244    if (insn->dest.is_ssa)
 245       return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
 246    else
 247       return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
 248 }
 249
 250 DataType
 251 Converter::getDType(nir_op op, uint8_t bitSize)
 252 {
 253    DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
 254    if (ty == TYPE_NONE) {
 255       ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
 256       assert(false);
 257    }
 258    return ty;
 259 }
 260
 261 std::vector<DataType>
 262 Converter::getSTypes(nir_alu_instr *insn)
 263 {
 264    const nir_op_info &info = nir_op_infos[insn->op];
 265    std::vector<DataType> res(info.num_inputs);
 266
 267    for (uint8_t i = 0; i < info.num_inputs; ++i) {
 268       if (info.input_types[i] != nir_type_invalid) {
 269          res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
 270       } else {
 271          ERROR("getSType not implemented for %s idx %u\n", info.name, i);
 272          assert(false);
 273          res[i] = TYPE_NONE;
 274          break;
 275       }
 276    }
 277
 278    return res;
 279 }
 280
 281 DataType
 282 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
 283 {
 284    uint8_t bitSize;
 285    if (src.is_ssa)
 286       bitSize = src.ssa->bit_size;
 287    else
 288       bitSize = src.reg.reg->bit_size;
 289
 290    DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
 291    if (ty == TYPE_NONE) {
 292       const char *str;
 293       if (isFloat)
 294          str = "float";
 295       else if (isSigned)
 296          str = "int";
 297       else
 298          str = "uint";
 299       ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
 300       assert(false);
 301    }
 302    return ty;
 303 }
 304
 305 operation
 306 Converter::getOperation(nir_op op)
 307 {
 308    switch (op) {
 309    // basic ops with float and int variants
 310    case nir_op_fabs:
 311    case nir_op_iabs:
 312       return OP_ABS;
 313    case nir_op_fadd:
 314    case nir_op_iadd:
 315       return OP_ADD;
 316    case nir_op_fand:
 317    case nir_op_iand:
 318       return OP_AND;
 319    case nir_op_ifind_msb:
 320    case nir_op_ufind_msb:
 321       return OP_BFIND;
 322    case nir_op_fceil:
 323       return OP_CEIL;
 324    case nir_op_fcos:
 325       return OP_COS;
 326    case nir_op_f2f32:
 327    case nir_op_f2f64:
 328    case nir_op_f2i32:
 329    case nir_op_f2i64:
 330    case nir_op_f2u32:
 331    case nir_op_f2u64:
 332    case nir_op_i2f32:
 333    case nir_op_i2f64:
 334    case nir_op_i2i32:
 335    case nir_op_i2i64:
 336    case nir_op_u2f32:
 337    case nir_op_u2f64:
 338    case nir_op_u2u32:
 339    case nir_op_u2u64:
 340       return OP_CVT;
 341    case nir_op_fddx:
 342    case nir_op_fddx_coarse:
 343    case nir_op_fddx_fine:
 344       return OP_DFDX;
 345    case nir_op_fddy:
 346    case nir_op_fddy_coarse:
 347    case nir_op_fddy_fine:
 348       return OP_DFDY;
 349    case nir_op_fdiv:
 350    case nir_op_idiv:
 351    case nir_op_udiv:
 352       return OP_DIV;
 353    case nir_op_fexp2:
 354       return OP_EX2;
 355    case nir_op_ffloor:
 356       return OP_FLOOR;
 357    case nir_op_ffma:
 358       return OP_FMA;
 359    case nir_op_flog2:
 360       return OP_LG2;
 361    case nir_op_fmax:
 362    case nir_op_imax:
 363    case nir_op_umax:
 364       return OP_MAX;
 365    case nir_op_pack_64_2x32_split:
 366       return OP_MERGE;
 367    case nir_op_fmin:
 368    case nir_op_imin:
 369    case nir_op_umin:
 370       return OP_MIN;
 371    case nir_op_fmod:
 372    case nir_op_imod:
 373    case nir_op_umod:
 374    case nir_op_frem:
 375    case nir_op_irem:
 376       return OP_MOD;
 377    case nir_op_fmul:
 378    case nir_op_imul:
 379    case nir_op_imul_high:
 380    case nir_op_umul_high:
 381       return OP_MUL;
 382    case nir_op_fneg:
 383    case nir_op_ineg:
 384       return OP_NEG;
 385    case nir_op_fnot:
 386    case nir_op_inot:
 387       return OP_NOT;
 388    case nir_op_for:
 389    case nir_op_ior:
 390       return OP_OR;
 391    case nir_op_fpow:
 392       return OP_POW;
 393    case nir_op_frcp:
 394       return OP_RCP;
 395    case nir_op_frsq:
 396       return OP_RSQ;
 397    case nir_op_fsat:
 398       return OP_SAT;
 399    case nir_op_feq32:
 400    case nir_op_ieq32:
 401    case nir_op_fge32:
 402    case nir_op_ige32:
 403    case nir_op_uge32:
 404    case nir_op_flt32:
 405    case nir_op_ilt32:
 406    case nir_op_ult32:
 407    case nir_op_fne32:
 408    case nir_op_ine32:
 409       return OP_SET;
 410    case nir_op_ishl:
 411       return OP_SHL;
 412    case nir_op_ishr:
 413    case nir_op_ushr:
 414       return OP_SHR;
 415    case nir_op_fsin:
 416       return OP_SIN;
 417    case nir_op_fsqrt:
 418       return OP_SQRT;
 419    case nir_op_fsub:
 420    case nir_op_isub:
 421       return OP_SUB;
 422    case nir_op_ftrunc:
 423       return OP_TRUNC;
 424    case nir_op_fxor:
 425    case nir_op_ixor:
 426       return OP_XOR;
 427    default:
 428       ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
 429       assert(false);
 430       return OP_NOP;
 431    }
 432 }
 433
 434 operation
 435 Converter::getOperation(nir_texop op)
 436 {
 437    switch (op) {
 438    case nir_texop_tex:
 439       return OP_TEX;
 440    case nir_texop_lod:
 441       return OP_TXLQ;
 442    case nir_texop_txb:
 443       return OP_TXB;
 444    case nir_texop_txd:
 445       return OP_TXD;
 446    case nir_texop_txf:
 447    case nir_texop_txf_ms:
 448       return OP_TXF;
 449    case nir_texop_tg4:
 450       return OP_TXG;
 451    case nir_texop_txl:
 452       return OP_TXL;
 453    case nir_texop_query_levels:
 454    case nir_texop_texture_samples:
 455    case nir_texop_txs:
 456       return OP_TXQ;
 457    default:
 458       ERROR("couldn't get operation for nir_texop %u\n", op);
 459       assert(false);
 460       return OP_NOP;
 461    }
 462 }
 463
 464 operation
 465 Converter::getOperation(nir_intrinsic_op op)
 466 {
 467    switch (op) {
 468    default:
 469       ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
 470       assert(false);
 471       return OP_NOP;
 472    }
 473 }
 474
 475 operation
 476 Converter::preOperationNeeded(nir_op op)
 477 {
 478    switch (op) {
 479    case nir_op_fcos:
 480    case nir_op_fsin:
 481       return OP_PRESIN;
 482    default:
 483       return OP_NOP;
 484    }
 485 }
 486
 487 int
 488 Converter::getSubOp(nir_op op)
 489 {
 490    switch (op) {
 491    case nir_op_imul_high:
 492    case nir_op_umul_high:
 493       return NV50_IR_SUBOP_MUL_HIGH;
 494    default:
 495       return 0;
 496    }
 497 }
 498
 499 int
 500 Converter::getSubOp(nir_intrinsic_op op)
 501 {
 502    switch (op) {
 503    case nir_intrinsic_vote_all:
 504       return NV50_IR_SUBOP_VOTE_ALL;
 505    case nir_intrinsic_vote_any:
 506       return NV50_IR_SUBOP_VOTE_ANY;
 507    case nir_intrinsic_vote_ieq:
 508       return NV50_IR_SUBOP_VOTE_UNI;
 509    default:
 510       return 0;
 511    }
 512 }
 513
 514 CondCode
 515 Converter::getCondCode(nir_op op)
 516 {
 517    switch (op) {
 518    case nir_op_feq32:
 519    case nir_op_ieq32:
 520       return CC_EQ;
 521    case nir_op_fge32:
 522    case nir_op_ige32:
 523    case nir_op_uge32:
 524       return CC_GE;
 525    case nir_op_flt32:
 526    case nir_op_ilt32:
 527    case nir_op_ult32:
 528       return CC_LT;
 529    case nir_op_fne32:
 530       return CC_NEU;
 531    case nir_op_ine32:
 532       return CC_NE;
 533    default:
 534       ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
 535       assert(false);
 536       return CC_FL;
 537    }
 538 }
 539
 540 Converter::LValues&
 541 Converter::convert(nir_alu_dest *dest)
 542 {
 543    return convert(&dest->dest);
 544 }
 545
 546 Converter::LValues&
 547 Converter::convert(nir_dest *dest)
 548 {
 549    if (dest->is_ssa)
 550       return convert(&dest->ssa);
 551    if (dest->reg.indirect) {
 552       ERROR("no support for indirects.");
 553       assert(false);
 554    }
 555    return convert(dest->reg.reg);
 556 }
 557
 558 Converter::LValues&
 559 Converter::convert(nir_register *reg)
 560 {
 561    NirDefMap::iterator it = regDefs.find(reg->index);
 562    if (it != regDefs.end())
 563       return it->second;
 564
 565    LValues newDef(reg->num_components);
 566    for (uint8_t i = 0; i < reg->num_components; i++)
 567       newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
 568    return regDefs[reg->index] = newDef;
 569 }
 570
 571 Converter::LValues&
 572 Converter::convert(nir_ssa_def *def)
 573 {
 574    NirDefMap::iterator it = ssaDefs.find(def->index);
 575    if (it != ssaDefs.end())
 576       return it->second;
 577
 578    LValues newDef(def->num_components);
 579    for (uint8_t i = 0; i < def->num_components; i++)
 580       newDef[i] = getSSA(std::max(4, def->bit_size / 8));
 581    return ssaDefs[def->index] = newDef;
 582 }
 583
 584 Value*
 585 Converter::getSrc(nir_alu_src *src, uint8_t component)
 586 {
 587    if (src->abs || src->negate) {
 588       ERROR("modifiers currently not supported on nir_alu_src\n");
 589       assert(false);
 590    }
 591    return getSrc(&src->src, src->swizzle[component]);
 592 }
 593
 594 Value*
 595 Converter::getSrc(nir_register *reg, uint8_t idx)
 596 {
 597    NirDefMap::iterator it = regDefs.find(reg->index);
 598    if (it == regDefs.end())
 599       return convert(reg)[idx];
 600    return it->second[idx];
 601 }
 602
 603 Value*
 604 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
 605 {
 606    if (src->is_ssa)
 607       return getSrc(src->ssa, idx);
 608
 609    if (src->reg.indirect) {
 610       if (indirect)
 611          return getSrc(src->reg.indirect, idx);
 612       ERROR("no support for indirects.");
 613       assert(false);
 614       return NULL;
 615    }
 616
 617    return getSrc(src->reg.reg, idx);
 618 }
 619
 620 Value*
 621 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
 622 {
 623    NirDefMap::iterator it = ssaDefs.find(src->index);
 624    if (it == ssaDefs.end()) {
 625       ERROR("SSA value %u not found\n", src->index);
 626       assert(false);
 627       return NULL;
 628    }
 629    return it->second[idx];
 630 }
 631
 632 uint32_t
 633 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
 634 {
 635    nir_const_value *offset = nir_src_as_const_value(*src);
 636
 637    if (offset) {
 638       indirect = NULL;
 639       return offset->u32[0];
 640    }
 641
 642    indirect = getSrc(src, idx, true);
 643    return 0;
 644 }
 645
 646 uint32_t
 647 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect)
 648 {
 649    int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
 650    if (indirect)
 651       indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
 652    return idx;
 653 }
 654
 655 static void
 656 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
 657 {
 658    assert(name && index);
 659
 660    if (slot >= VERT_ATTRIB_MAX) {
 661       ERROR("invalid varying slot %u\n", slot);
 662       assert(false);
 663       return;
 664    }
 665
 666    if (slot >= VERT_ATTRIB_GENERIC0 &&
 667        slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
 668       *name = TGSI_SEMANTIC_GENERIC;
 669       *index = slot - VERT_ATTRIB_GENERIC0;
 670       return;
 671    }
 672
 673    if (slot >= VERT_ATTRIB_TEX0 &&
 674        slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
 675       *name = TGSI_SEMANTIC_TEXCOORD;
 676       *index = slot - VERT_ATTRIB_TEX0;
 677       return;
 678    }
 679
 680    switch (slot) {
 681    case VERT_ATTRIB_COLOR0:
 682       *name = TGSI_SEMANTIC_COLOR;
 683       *index = 0;
 684       break;
 685    case VERT_ATTRIB_COLOR1:
 686       *name = TGSI_SEMANTIC_COLOR;
 687       *index = 1;
 688       break;
 689    case VERT_ATTRIB_EDGEFLAG:
 690       *name = TGSI_SEMANTIC_EDGEFLAG;
 691       *index = 0;
 692       break;
 693    case VERT_ATTRIB_FOG:
 694       *name = TGSI_SEMANTIC_FOG;
 695       *index = 0;
 696       break;
 697    case VERT_ATTRIB_NORMAL:
 698       *name = TGSI_SEMANTIC_NORMAL;
 699       *index = 0;
 700       break;
 701    case VERT_ATTRIB_POS:
 702       *name = TGSI_SEMANTIC_POSITION;
 703       *index = 0;
 704       break;
 705    case VERT_ATTRIB_POINT_SIZE:
 706       *name = TGSI_SEMANTIC_PSIZE;
 707       *index = 0;
 708       break;
 709    default:
 710       ERROR("unknown vert attrib slot %u\n", slot);
 711       assert(false);
 712       break;
 713    }
 714 }
 715
 716 static void
 717 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
 718 {
 719    assert(name && index);
 720
 721    if (slot >= VARYING_SLOT_TESS_MAX) {
 722       ERROR("invalid varying slot %u\n", slot);
 723       assert(false);
 724       return;
 725    }
 726
 727    if (slot >= VARYING_SLOT_PATCH0) {
 728       *name = TGSI_SEMANTIC_PATCH;
 729       *index = slot - VARYING_SLOT_PATCH0;
 730       return;
 731    }
 732
 733    if (slot >= VARYING_SLOT_VAR0) {
 734       *name = TGSI_SEMANTIC_GENERIC;
 735       *index = slot - VARYING_SLOT_VAR0;
 736       return;
 737    }
 738
 739    if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
 740       *name = TGSI_SEMANTIC_TEXCOORD;
 741       *index = slot - VARYING_SLOT_TEX0;
 742       return;
 743    }
 744
 745    switch (slot) {
 746    case VARYING_SLOT_BFC0:
 747       *name = TGSI_SEMANTIC_BCOLOR;
 748       *index = 0;
 749       break;
 750    case VARYING_SLOT_BFC1:
 751       *name = TGSI_SEMANTIC_BCOLOR;
 752       *index = 1;
 753       break;
 754    case VARYING_SLOT_CLIP_DIST0:
 755       *name = TGSI_SEMANTIC_CLIPDIST;
 756       *index = 0;
 757       break;
 758    case VARYING_SLOT_CLIP_DIST1:
 759       *name = TGSI_SEMANTIC_CLIPDIST;
 760       *index = 1;
 761       break;
 762    case VARYING_SLOT_CLIP_VERTEX:
 763       *name = TGSI_SEMANTIC_CLIPVERTEX;
 764       *index = 0;
 765       break;
 766    case VARYING_SLOT_COL0:
 767       *name = TGSI_SEMANTIC_COLOR;
 768       *index = 0;
 769       break;
 770    case VARYING_SLOT_COL1:
 771       *name = TGSI_SEMANTIC_COLOR;
 772       *index = 1;
 773       break;
 774    case VARYING_SLOT_EDGE:
 775       *name = TGSI_SEMANTIC_EDGEFLAG;
 776       *index = 0;
 777       break;
 778    case VARYING_SLOT_FACE:
 779       *name = TGSI_SEMANTIC_FACE;
 780       *index = 0;
 781       break;
 782    case VARYING_SLOT_FOGC:
 783       *name = TGSI_SEMANTIC_FOG;
 784       *index = 0;
 785       break;
 786    case VARYING_SLOT_LAYER:
 787       *name = TGSI_SEMANTIC_LAYER;
 788       *index = 0;
 789       break;
 790    case VARYING_SLOT_PNTC:
 791       *name = TGSI_SEMANTIC_PCOORD;
 792       *index = 0;
 793       break;
 794    case VARYING_SLOT_POS:
 795       *name = TGSI_SEMANTIC_POSITION;
 796       *index = 0;
 797       break;
 798    case VARYING_SLOT_PRIMITIVE_ID:
 799       *name = TGSI_SEMANTIC_PRIMID;
 800       *index = 0;
 801       break;
 802    case VARYING_SLOT_PSIZ:
 803       *name = TGSI_SEMANTIC_PSIZE;
 804       *index = 0;
 805       break;
 806    case VARYING_SLOT_TESS_LEVEL_INNER:
 807       *name = TGSI_SEMANTIC_TESSINNER;
 808       *index = 0;
 809       break;
 810    case VARYING_SLOT_TESS_LEVEL_OUTER:
 811       *name = TGSI_SEMANTIC_TESSOUTER;
 812       *index = 0;
 813       break;
 814    case VARYING_SLOT_VIEWPORT:
 815       *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
 816       *index = 0;
 817       break;
 818    default:
 819       ERROR("unknown varying slot %u\n", slot);
 820       assert(false);
 821       break;
 822    }
 823 }
 824
 825 static void
 826 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
 827 {
 828    if (slot >= FRAG_RESULT_DATA0) {
 829       *name = TGSI_SEMANTIC_COLOR;
 830       *index = slot - FRAG_RESULT_COLOR - 2; // intentional
 831       return;
 832    }
 833
 834    switch (slot) {
 835    case FRAG_RESULT_COLOR:
 836       *name = TGSI_SEMANTIC_COLOR;
 837       *index = 0;
 838       break;
 839    case FRAG_RESULT_DEPTH:
 840       *name = TGSI_SEMANTIC_POSITION;
 841       *index = 0;
 842       break;
 843    case FRAG_RESULT_SAMPLE_MASK:
 844       *name = TGSI_SEMANTIC_SAMPLEMASK;
 845       *index = 0;
 846       break;
 847    default:
 848       ERROR("unknown frag result slot %u\n", slot);
 849       assert(false);
 850       break;
 851    }
 852 }
 853
 854 // copy of _mesa_sysval_to_semantic
 855 static void
 856 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
 857 {
 858    *index = 0;
 859    switch (val) {
 860    // Vertex shader
 861    case SYSTEM_VALUE_VERTEX_ID:
 862       *name = TGSI_SEMANTIC_VERTEXID;
 863       break;
 864    case SYSTEM_VALUE_INSTANCE_ID:
 865       *name = TGSI_SEMANTIC_INSTANCEID;
 866       break;
 867    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
 868       *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
 869       break;
 870    case SYSTEM_VALUE_BASE_VERTEX:
 871       *name = TGSI_SEMANTIC_BASEVERTEX;
 872       break;
 873    case SYSTEM_VALUE_BASE_INSTANCE:
 874       *name = TGSI_SEMANTIC_BASEINSTANCE;
 875       break;
 876    case SYSTEM_VALUE_DRAW_ID:
 877       *name = TGSI_SEMANTIC_DRAWID;
 878       break;
 879
 880    // Geometry shader
 881    case SYSTEM_VALUE_INVOCATION_ID:
 882       *name = TGSI_SEMANTIC_INVOCATIONID;
 883       break;
 884
 885    // Fragment shader
 886    case SYSTEM_VALUE_FRAG_COORD:
 887       *name = TGSI_SEMANTIC_POSITION;
 888       break;
 889    case SYSTEM_VALUE_FRONT_FACE:
 890       *name = TGSI_SEMANTIC_FACE;
 891       break;
 892    case SYSTEM_VALUE_SAMPLE_ID:
 893       *name = TGSI_SEMANTIC_SAMPLEID;
 894       break;
 895    case SYSTEM_VALUE_SAMPLE_POS:
 896       *name = TGSI_SEMANTIC_SAMPLEPOS;
 897       break;
 898    case SYSTEM_VALUE_SAMPLE_MASK_IN:
 899       *name = TGSI_SEMANTIC_SAMPLEMASK;
 900       break;
 901    case SYSTEM_VALUE_HELPER_INVOCATION:
 902       *name = TGSI_SEMANTIC_HELPER_INVOCATION;
 903       break;
 904
 905    // Tessellation shader
 906    case SYSTEM_VALUE_TESS_COORD:
 907       *name = TGSI_SEMANTIC_TESSCOORD;
 908       break;
 909    case SYSTEM_VALUE_VERTICES_IN:
 910       *name = TGSI_SEMANTIC_VERTICESIN;
 911       break;
 912    case SYSTEM_VALUE_PRIMITIVE_ID:
 913       *name = TGSI_SEMANTIC_PRIMID;
 914       break;
 915    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
 916       *name = TGSI_SEMANTIC_TESSOUTER;
 917       break;
 918    case SYSTEM_VALUE_TESS_LEVEL_INNER:
 919       *name = TGSI_SEMANTIC_TESSINNER;
 920       break;
 921
 922    // Compute shader
 923    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
 924       *name = TGSI_SEMANTIC_THREAD_ID;
 925       break;
 926    case SYSTEM_VALUE_WORK_GROUP_ID:
 927       *name = TGSI_SEMANTIC_BLOCK_ID;
 928       break;
 929    case SYSTEM_VALUE_NUM_WORK_GROUPS:
 930       *name = TGSI_SEMANTIC_GRID_SIZE;
 931       break;
 932    case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
 933       *name = TGSI_SEMANTIC_BLOCK_SIZE;
 934       break;
 935
 936    // ARB_shader_ballot
 937    case SYSTEM_VALUE_SUBGROUP_SIZE:
 938       *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
 939       break;
 940    case SYSTEM_VALUE_SUBGROUP_INVOCATION:
 941       *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
 942       break;
 943    case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
 944       *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
 945       break;
 946    case SYSTEM_VALUE_SUBGROUP_GE_MASK:
 947       *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
 948       break;
 949    case SYSTEM_VALUE_SUBGROUP_GT_MASK:
 950       *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
 951       break;
 952    case SYSTEM_VALUE_SUBGROUP_LE_MASK:
 953       *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
 954       break;
 955    case SYSTEM_VALUE_SUBGROUP_LT_MASK:
 956       *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
 957       break;
 958
 959    default:
 960       ERROR("unknown system value %u\n", val);
 961       assert(false);
 962       break;
 963    }
 964 }
 965
 966 void
 967 Converter::setInterpolate(nv50_ir_varying *var,
 968                           uint8_t mode,
 969                           bool centroid,
 970                           unsigned semantic)
 971 {
 972    switch (mode) {
 973    case INTERP_MODE_FLAT:
 974       var->flat = 1;
 975       break;
 976    case INTERP_MODE_NONE:
 977       if (semantic == TGSI_SEMANTIC_COLOR)
 978          var->sc = 1;
 979       else if (semantic == TGSI_SEMANTIC_POSITION)
 980          var->linear = 1;
 981       break;
 982    case INTERP_MODE_NOPERSPECTIVE:
 983       var->linear = 1;
 984       break;
 985    case INTERP_MODE_SMOOTH:
 986       break;
 987    }
 988    var->centroid = centroid;
 989 }
 990
 991 static uint16_t
 992 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
 993           bool input, const nir_variable *var)
 994 {
 995    if (!type->is_array())
 996       return type->count_attribute_slots(false);
 997
 998    uint16_t slots;
 999    switch (stage) {
1000    case Program::TYPE_GEOMETRY:
1001       slots = type->uniform_locations();
1002       if (input)
1003          slots /= info.gs.vertices_in;
1004       break;
1005    case Program::TYPE_TESSELLATION_CONTROL:
1006    case Program::TYPE_TESSELLATION_EVAL:
1007       // remove first dimension
1008       if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
1009          slots = type->uniform_locations();
1010       else
1011          slots = type->fields.array->uniform_locations();
1012       break;
1013    default:
1014       slots = type->count_attribute_slots(false);
1015       break;
1016    }
1017
1018    return slots;
1019 }
1020
1021 bool Converter::assignSlots() {
1022    unsigned name;
1023    unsigned index;
1024
1025    info->io.viewportId = -1;
1026    info->numInputs = 0;
1027
1028    // we have to fixup the uniform locations for arrays
1029    unsigned numImages = 0;
1030    nir_foreach_variable(var, &nir->uniforms) {
1031       const glsl_type *type = var->type;
1032       if (!type->without_array()->is_image())
1033          continue;
1034       var->data.driver_location = numImages;
1035       numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
1036    }
1037
1038    nir_foreach_variable(var, &nir->inputs) {
1039       const glsl_type *type = var->type;
1040       int slot = var->data.location;
1041       uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
1042       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1043                                        : type->component_slots();
1044       uint32_t frac = var->data.location_frac;
1045       uint32_t vary = var->data.driver_location;
1046
1047       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1048          if (comp > 2)
1049             slots *= 2;
1050       }
1051
1052       assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
1053
1054       switch(prog->getType()) {
1055       case Program::TYPE_FRAGMENT:
1056          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1057          for (uint16_t i = 0; i < slots; ++i) {
1058             setInterpolate(&info->in[vary + i], var->data.interpolation,
1059                            var->data.centroid | var->data.sample, name);
1060          }
1061          break;
1062       case Program::TYPE_GEOMETRY:
1063          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1064          break;
1065       case Program::TYPE_TESSELLATION_CONTROL:
1066       case Program::TYPE_TESSELLATION_EVAL:
1067          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1068          if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1069             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1070          break;
1071       case Program::TYPE_VERTEX:
1072          vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1073          switch (name) {
1074          case TGSI_SEMANTIC_EDGEFLAG:
1075             info->io.edgeFlagIn = vary;
1076             break;
1077          default:
1078             break;
1079          }
1080          break;
1081       default:
1082          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1083          return false;
1084       }
1085
1086       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1087          info->in[vary].id = vary;
1088          info->in[vary].patch = var->data.patch;
1089          info->in[vary].sn = name;
1090          info->in[vary].si = index + i;
1091          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1092             if (i & 0x1)
1093                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1094             else
1095                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1096          else
1097             info->in[vary].mask |= ((1 << comp) - 1) << frac;
1098       }
1099       info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1100    }
1101
1102    info->numOutputs = 0;
1103    nir_foreach_variable(var, &nir->outputs) {
1104       const glsl_type *type = var->type;
1105       int slot = var->data.location;
1106       uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1107       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1108                                        : type->component_slots();
1109       uint32_t frac = var->data.location_frac;
1110       uint32_t vary = var->data.driver_location;
1111
1112       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1113          if (comp > 2)
1114             slots *= 2;
1115       }
1116
1117       assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1118
1119       switch(prog->getType()) {
1120       case Program::TYPE_FRAGMENT:
1121          frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1122          switch (name) {
1123          case TGSI_SEMANTIC_COLOR:
1124             if (!var->data.fb_fetch_output)
1125                info->prop.fp.numColourResults++;
1126             info->prop.fp.separateFragData = true;
1127             // sometimes we get FRAG_RESULT_DATAX with data.index 0
1128             // sometimes we get FRAG_RESULT_DATA0 with data.index X
1129             index = index == 0 ? var->data.index : index;
1130             break;
1131          case TGSI_SEMANTIC_POSITION:
1132             info->io.fragDepth = vary;
1133             info->prop.fp.writesDepth = true;
1134             break;
1135          case TGSI_SEMANTIC_SAMPLEMASK:
1136             info->io.sampleMask = vary;
1137             break;
1138          default:
1139             break;
1140          }
1141          break;
1142       case Program::TYPE_GEOMETRY:
1143       case Program::TYPE_TESSELLATION_CONTROL:
1144       case Program::TYPE_TESSELLATION_EVAL:
1145       case Program::TYPE_VERTEX:
1146          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1147
1148          if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1149              name != TGSI_SEMANTIC_TESSOUTER)
1150             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1151
1152          switch (name) {
1153          case TGSI_SEMANTIC_CLIPDIST:
1154             info->io.genUserClip = -1;
1155             break;
1156          case TGSI_SEMANTIC_CLIPVERTEX:
1157             clipVertexOutput = vary;
1158             break;
1159          case TGSI_SEMANTIC_EDGEFLAG:
1160             info->io.edgeFlagOut = vary;
1161             break;
1162          case TGSI_SEMANTIC_POSITION:
1163             if (clipVertexOutput < 0)
1164                clipVertexOutput = vary;
1165             break;
1166          default:
1167             break;
1168          }
1169          break;
1170       default:
1171          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1172          return false;
1173       }
1174
1175       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1176          info->out[vary].id = vary;
1177          info->out[vary].patch = var->data.patch;
1178          info->out[vary].sn = name;
1179          info->out[vary].si = index + i;
1180          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1181             if (i & 0x1)
1182                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1183             else
1184                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1185          else
1186             info->out[vary].mask |= ((1 << comp) - 1) << frac;
1187
1188          if (nir->info.outputs_read & 1ll << slot)
1189             info->out[vary].oread = 1;
1190       }
1191       info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1192    }
1193
1194    info->numSysVals = 0;
1195    for (uint8_t i = 0; i < 64; ++i) {
1196       if (!(nir->info.system_values_read & 1ll << i))
1197          continue;
1198
1199       system_val_to_tgsi_semantic(i, &name, &index);
1200       info->sv[info->numSysVals].sn = name;
1201       info->sv[info->numSysVals].si = index;
1202       info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1203
1204       switch (i) {
1205       case SYSTEM_VALUE_INSTANCE_ID:
1206          info->io.instanceId = info->numSysVals;
1207          break;
1208       case SYSTEM_VALUE_TESS_LEVEL_INNER:
1209       case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1210          info->sv[info->numSysVals].patch = 1;
1211          break;
1212       case SYSTEM_VALUE_VERTEX_ID:
1213          info->io.vertexId = info->numSysVals;
1214          break;
1215       default:
1216          break;
1217       }
1218
1219       info->numSysVals += 1;
1220    }
1221
1222    if (info->io.genUserClip > 0) {
1223       info->io.clipDistances = info->io.genUserClip;
1224
1225       const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1226
1227       for (unsigned int n = 0; n < nOut; ++n) {
1228          unsigned int i = info->numOutputs++;
1229          info->out[i].id = i;
1230          info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1231          info->out[i].si = n;
1232          info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1233       }
1234    }
1235
1236    return info->assignSlots(info) == 0;
1237 }
1238
1239 uint32_t
1240 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1241 {
1242    DataType ty;
1243    int offset = nir_intrinsic_component(insn);
1244    bool input;
1245
1246    if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1247       ty = getDType(insn);
1248    else
1249       ty = getSType(insn->src[0], false, false);
1250
1251    switch (insn->intrinsic) {
1252    case nir_intrinsic_load_input:
1253    case nir_intrinsic_load_interpolated_input:
1254    case nir_intrinsic_load_per_vertex_input:
1255       input = true;
1256       break;
1257    case nir_intrinsic_load_output:
1258    case nir_intrinsic_load_per_vertex_output:
1259    case nir_intrinsic_store_output:
1260    case nir_intrinsic_store_per_vertex_output:
1261       input = false;
1262       break;
1263    default:
1264       ERROR("unknown intrinsic in getSlotAddress %s",
1265             nir_intrinsic_infos[insn->intrinsic].name);
1266       input = false;
1267       assert(false);
1268       break;
1269    }
1270
1271    if (typeSizeof(ty) == 8) {
1272       slot *= 2;
1273       slot += offset;
1274       if (slot >= 4) {
1275          idx += 1;
1276          slot -= 4;
1277       }
1278    } else {
1279       slot += offset;
1280    }
1281
1282    assert(slot < 4);
1283    assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1284    assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1285
1286    const nv50_ir_varying *vary = input ? info->in : info->out;
1287    return vary[idx].slot[slot] * 4;
1288 }
1289
1290 Instruction *
1291 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1292                     uint32_t base, uint8_t c, Value *indirect0,
1293                     Value *indirect1, bool patch)
1294 {
1295    unsigned int tySize = typeSizeof(ty);
1296
1297    if (tySize == 8 &&
1298        (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1299       Value *lo = getSSA();
1300       Value *hi = getSSA();
1301
1302       Instruction *loi =
1303          mkLoad(TYPE_U32, lo,
1304                 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1305                 indirect0);
1306       loi->setIndirect(0, 1, indirect1);
1307       loi->perPatch = patch;
1308
1309       Instruction *hii =
1310          mkLoad(TYPE_U32, hi,
1311                 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1312                 indirect0);
1313       hii->setIndirect(0, 1, indirect1);
1314       hii->perPatch = patch;
1315
1316       return mkOp2(OP_MERGE, ty, def, lo, hi);
1317    } else {
1318       Instruction *ld =
1319          mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1320       ld->setIndirect(0, 1, indirect1);
1321       ld->perPatch = patch;
1322       return ld;
1323    }
1324 }
1325
1326 void
1327 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1328                    DataType ty, Value *src, uint8_t idx, uint8_t c,
1329                    Value *indirect0, Value *indirect1)
1330 {
1331    uint8_t size = typeSizeof(ty);
1332    uint32_t address = getSlotAddress(insn, idx, c);
1333
1334    if (size == 8 && indirect0) {
1335       Value *split[2];
1336       mkSplit(split, 4, src);
1337
1338       if (op == OP_EXPORT) {
1339          split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1340          split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1341       }
1342
1343       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1344               split[0])->perPatch = info->out[idx].patch;
1345       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1346               split[1])->perPatch = info->out[idx].patch;
1347    } else {
1348       if (op == OP_EXPORT)
1349          src = mkMov(getSSA(size), src, ty)->getDef(0);
1350       mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1351               src)->perPatch = info->out[idx].patch;
1352    }
1353 }
1354
1355 bool
1356 Converter::parseNIR()
1357 {
1358    info->bin.tlsSpace = 0;
1359    info->io.clipDistances = nir->info.clip_distance_array_size;
1360    info->io.cullDistances = nir->info.cull_distance_array_size;
1361
1362    switch(prog->getType()) {
1363    case Program::TYPE_COMPUTE:
1364       info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1365       info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1366       info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1367       info->bin.smemSize = nir->info.cs.shared_size;
1368       break;
1369    case Program::TYPE_FRAGMENT:
1370       info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1371       info->prop.fp.persampleInvocation =
1372          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1373          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1374       info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1375       info->prop.fp.readsSampleLocations =
1376          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1377       info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1378       info->prop.fp.usesSampleMaskIn =
1379          !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1380       break;
1381    case Program::TYPE_GEOMETRY:
1382       info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1383       info->prop.gp.instanceCount = nir->info.gs.invocations;
1384       info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1385       info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1386       break;
1387    case Program::TYPE_TESSELLATION_CONTROL:
1388    case Program::TYPE_TESSELLATION_EVAL:
1389       if (nir->info.tess.primitive_mode == GL_ISOLINES)
1390          info->prop.tp.domain = GL_LINES;
1391       else
1392          info->prop.tp.domain = nir->info.tess.primitive_mode;
1393       info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1394       info->prop.tp.outputPrim =
1395          nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1396       info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1397       info->prop.tp.winding = !nir->info.tess.ccw;
1398       break;
1399    case Program::TYPE_VERTEX:
1400       info->prop.vp.usesDrawParameters =
1401          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1402          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1403          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1404       break;
1405    default:
1406       break;
1407    }
1408
1409    return true;
1410 }
1411
1412 bool
1413 Converter::visit(nir_function *function)
1414 {
1415    // we only support emiting the main function for now
1416    assert(!strcmp(function->name, "main"));
1417    assert(function->impl);
1418
1419    // usually the blocks will set everything up, but main is special
1420    BasicBlock *entry = new BasicBlock(prog->main);
1421    exit = new BasicBlock(prog->main);
1422    blocks[nir_start_block(function->impl)->index] = entry;
1423    prog->main->setEntry(entry);
1424    prog->main->setExit(exit);
1425
1426    setPosition(entry, true);
1427
1428    if (info->io.genUserClip > 0) {
1429       for (int c = 0; c < 4; ++c)
1430          clipVtx[c] = getScratch();
1431    }
1432
1433    switch (prog->getType()) {
1434    case Program::TYPE_TESSELLATION_CONTROL:
1435       outBase = mkOp2v(
1436          OP_SUB, TYPE_U32, getSSA(),
1437          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1438          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1439       break;
1440    case Program::TYPE_FRAGMENT: {
1441       Symbol *sv = mkSysVal(SV_POSITION, 3);
1442       fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1443       fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1444       break;
1445    }
1446    default:
1447       break;
1448    }
1449
1450    nir_foreach_register(reg, &function->impl->registers) {
1451       if (reg->num_array_elems) {
1452          // TODO: packed variables would be nice, but MemoryOpt fails
1453          // replace 4 with reg->num_components
1454          uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
1455          regToLmemOffset[reg->index] = info->bin.tlsSpace;
1456          info->bin.tlsSpace += size;
1457       }
1458    }
1459
1460    nir_index_ssa_defs(function->impl);
1461    foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1462       if (!visit(node))
1463          return false;
1464    }
1465
1466    bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1467    setPosition(exit, true);
1468
1469    if (info->io.genUserClip > 0)
1470       handleUserClipPlanes();
1471
1472    // TODO: for non main function this needs to be a OP_RETURN
1473    mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1474    return true;
1475 }
1476
1477 bool
1478 Converter::visit(nir_cf_node *node)
1479 {
1480    switch (node->type) {
1481    case nir_cf_node_block:
1482       return visit(nir_cf_node_as_block(node));
1483    case nir_cf_node_if:
1484       return visit(nir_cf_node_as_if(node));
1485    case nir_cf_node_loop:
1486       return visit(nir_cf_node_as_loop(node));
1487    default:
1488       ERROR("unknown nir_cf_node type %u\n", node->type);
1489       return false;
1490    }
1491 }
1492
1493 bool
1494 Converter::visit(nir_block *block)
1495 {
1496    if (!block->predecessors->entries && block->instr_list.is_empty())
1497       return true;
1498
1499    BasicBlock *bb = convert(block);
1500
1501    setPosition(bb, true);
1502    nir_foreach_instr(insn, block) {
1503       if (!visit(insn))
1504          return false;
1505    }
1506    return true;
1507 }
1508
1509 bool
1510 Converter::visit(nir_if *nif)
1511 {
1512    DataType sType = getSType(nif->condition, false, false);
1513    Value *src = getSrc(&nif->condition, 0);
1514
1515    nir_block *lastThen = nir_if_last_then_block(nif);
1516    nir_block *lastElse = nir_if_last_else_block(nif);
1517
1518    assert(!lastThen->successors[1]);
1519    assert(!lastElse->successors[1]);
1520
1521    BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1522    BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1523
1524    bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1525    bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1526
1527    // we only insert joinats, if both nodes end up at the end of the if again.
1528    // the reason for this to not happens are breaks/continues/ret/... which
1529    // have their own handling
1530    if (lastThen->successors[0] == lastElse->successors[0])
1531       bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1532                           CC_ALWAYS, NULL);
1533
1534    mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1535
1536    foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1537       if (!visit(node))
1538          return false;
1539    }
1540    setPosition(convert(lastThen), true);
1541    if (!bb->getExit() ||
1542        !bb->getExit()->asFlow() ||
1543         bb->getExit()->asFlow()->op == OP_JOIN) {
1544       BasicBlock *tailBB = convert(lastThen->successors[0]);
1545       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1546       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1547    }
1548
1549    foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1550       if (!visit(node))
1551          return false;
1552    }
1553    setPosition(convert(lastElse), true);
1554    if (!bb->getExit() ||
1555        !bb->getExit()->asFlow() ||
1556         bb->getExit()->asFlow()->op == OP_JOIN) {
1557       BasicBlock *tailBB = convert(lastElse->successors[0]);
1558       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1559       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1560    }
1561
1562    if (lastThen->successors[0] == lastElse->successors[0]) {
1563       setPosition(convert(lastThen->successors[0]), true);
1564       mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1565    }
1566
1567    return true;
1568 }
1569
1570 bool
1571 Converter::visit(nir_loop *loop)
1572 {
1573    curLoopDepth += 1;
1574    func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1575
1576    BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1577    BasicBlock *tailBB =
1578       convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1579    bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1580
1581    mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1582    setPosition(loopBB, false);
1583    mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1584
1585    foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1586       if (!visit(node))
1587          return false;
1588    }
1589    Instruction *insn = bb->getExit();
1590    if (bb->cfg.incidentCount() != 0) {
1591       if (!insn || !insn->asFlow()) {
1592          mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1593          bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1594       } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1595                  tailBB->cfg.incidentCount() == 0) {
1596          // RA doesn't like having blocks around with no incident edge,
1597          // so we create a fake one to make it happy
1598          bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1599       }
1600    }
1601
1602    curLoopDepth -= 1;
1603
1604    return true;
1605 }
1606
1607 bool
1608 Converter::visit(nir_instr *insn)
1609 {
1610    switch (insn->type) {
1611    case nir_instr_type_alu:
1612       return visit(nir_instr_as_alu(insn));
1613    case nir_instr_type_intrinsic:
1614       return visit(nir_instr_as_intrinsic(insn));
1615    case nir_instr_type_jump:
1616       return visit(nir_instr_as_jump(insn));
1617    case nir_instr_type_load_const:
1618       return visit(nir_instr_as_load_const(insn));
1619    case nir_instr_type_ssa_undef:
1620       return visit(nir_instr_as_ssa_undef(insn));
1621    case nir_instr_type_tex:
1622       return visit(nir_instr_as_tex(insn));
1623    default:
1624       ERROR("unknown nir_instr type %u\n", insn->type);
1625       return false;
1626    }
1627    return true;
1628 }
1629
1630 SVSemantic
1631 Converter::convert(nir_intrinsic_op intr)
1632 {
1633    switch (intr) {
1634    case nir_intrinsic_load_base_vertex:
1635       return SV_BASEVERTEX;
1636    case nir_intrinsic_load_base_instance:
1637       return SV_BASEINSTANCE;
1638    case nir_intrinsic_load_draw_id:
1639       return SV_DRAWID;
1640    case nir_intrinsic_load_front_face:
1641       return SV_FACE;
1642    case nir_intrinsic_load_helper_invocation:
1643       return SV_THREAD_KILL;
1644    case nir_intrinsic_load_instance_id:
1645       return SV_INSTANCE_ID;
1646    case nir_intrinsic_load_invocation_id:
1647       return SV_INVOCATION_ID;
1648    case nir_intrinsic_load_local_group_size:
1649       return SV_NTID;
1650    case nir_intrinsic_load_local_invocation_id:
1651       return SV_TID;
1652    case nir_intrinsic_load_num_work_groups:
1653       return SV_NCTAID;
1654    case nir_intrinsic_load_patch_vertices_in:
1655       return SV_VERTEX_COUNT;
1656    case nir_intrinsic_load_primitive_id:
1657       return SV_PRIMITIVE_ID;
1658    case nir_intrinsic_load_sample_id:
1659       return SV_SAMPLE_INDEX;
1660    case nir_intrinsic_load_sample_mask_in:
1661       return SV_SAMPLE_MASK;
1662    case nir_intrinsic_load_sample_pos:
1663       return SV_SAMPLE_POS;
1664    case nir_intrinsic_load_subgroup_eq_mask:
1665       return SV_LANEMASK_EQ;
1666    case nir_intrinsic_load_subgroup_ge_mask:
1667       return SV_LANEMASK_GE;
1668    case nir_intrinsic_load_subgroup_gt_mask:
1669       return SV_LANEMASK_GT;
1670    case nir_intrinsic_load_subgroup_le_mask:
1671       return SV_LANEMASK_LE;
1672    case nir_intrinsic_load_subgroup_lt_mask:
1673       return SV_LANEMASK_LT;
1674    case nir_intrinsic_load_subgroup_invocation:
1675       return SV_LANEID;
1676    case nir_intrinsic_load_tess_coord:
1677       return SV_TESS_COORD;
1678    case nir_intrinsic_load_tess_level_inner:
1679       return SV_TESS_INNER;
1680    case nir_intrinsic_load_tess_level_outer:
1681       return SV_TESS_OUTER;
1682    case nir_intrinsic_load_vertex_id:
1683       return SV_VERTEX_ID;
1684    case nir_intrinsic_load_work_group_id:
1685       return SV_CTAID;
1686    default:
1687       ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1688             nir_intrinsic_infos[intr].name);
1689       assert(false);
1690       return SV_LAST;
1691    }
1692 }
1693
1694 bool
1695 Converter::visit(nir_intrinsic_instr *insn)
1696 {
1697    nir_intrinsic_op op = insn->intrinsic;
1698
1699    switch (op) {
1700    case nir_intrinsic_load_uniform: {
1701       LValues &newDefs = convert(&insn->dest);
1702       const DataType dType = getDType(insn);
1703       Value *indirect;
1704       uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1705       for (uint8_t i = 0; i < insn->num_components; ++i) {
1706          loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1707       }
1708       break;
1709    }
1710    case nir_intrinsic_store_output:
1711    case nir_intrinsic_store_per_vertex_output: {
1712       Value *indirect;
1713       DataType dType = getSType(insn->src[0], false, false);
1714       uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1715
1716       for (uint8_t i = 0u; i < insn->num_components; ++i) {
1717          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1718             continue;
1719
1720          uint8_t offset = 0;
1721          Value *src = getSrc(&insn->src[0], i);
1722          switch (prog->getType()) {
1723          case Program::TYPE_FRAGMENT: {
1724             if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1725                // TGSI uses a different interface than NIR, TGSI stores that
1726                // value in the z component, NIR in X
1727                offset += 2;
1728                src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1729             }
1730             break;
1731          }
1732          case Program::TYPE_VERTEX: {
1733             if (info->io.genUserClip > 0 && idx == clipVertexOutput) {
1734                mkMov(clipVtx[i], src);
1735                src = clipVtx[i];
1736             }
1737             break;
1738          }
1739          default:
1740             break;
1741          }
1742
1743          storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1744       }
1745       break;
1746    }
1747    case nir_intrinsic_load_input:
1748    case nir_intrinsic_load_interpolated_input:
1749    case nir_intrinsic_load_output: {
1750       LValues &newDefs = convert(&insn->dest);
1751
1752       // FBFetch
1753       if (prog->getType() == Program::TYPE_FRAGMENT &&
1754           op == nir_intrinsic_load_output) {
1755          std::vector<Value*> defs, srcs;
1756          uint8_t mask = 0;
1757
1758          srcs.push_back(getSSA());
1759          srcs.push_back(getSSA());
1760          Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1761          Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1762          mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1763          mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1764
1765          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1766          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1767
1768          for (uint8_t i = 0u; i < insn->num_components; ++i) {
1769             defs.push_back(newDefs[i]);
1770             mask |= 1 << i;
1771          }
1772
1773          TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1774          texi->tex.levelZero = 1;
1775          texi->tex.mask = mask;
1776          texi->tex.useOffsets = 0;
1777          texi->tex.r = 0xffff;
1778          texi->tex.s = 0xffff;
1779
1780          info->prop.fp.readsFramebuffer = true;
1781          break;
1782       }
1783
1784       const DataType dType = getDType(insn);
1785       Value *indirect;
1786       bool input = op != nir_intrinsic_load_output;
1787       operation nvirOp;
1788       uint32_t mode = 0;
1789
1790       uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1791       nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
1792
1793       // see load_barycentric_* handling
1794       if (prog->getType() == Program::TYPE_FRAGMENT) {
1795          mode = translateInterpMode(&vary, nvirOp);
1796          if (op == nir_intrinsic_load_interpolated_input) {
1797             ImmediateValue immMode;
1798             if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1799                mode |= immMode.reg.data.u32;
1800          }
1801       }
1802
1803       for (uint8_t i = 0u; i < insn->num_components; ++i) {
1804          uint32_t address = getSlotAddress(insn, idx, i);
1805          Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
1806          if (prog->getType() == Program::TYPE_FRAGMENT) {
1807             int s = 1;
1808             if (typeSizeof(dType) == 8) {
1809                Value *lo = getSSA();
1810                Value *hi = getSSA();
1811                Instruction *interp;
1812
1813                interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1814                if (nvirOp == OP_PINTERP)
1815                   interp->setSrc(s++, fp.position);
1816                if (mode & NV50_IR_INTERP_OFFSET)
1817                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1818                interp->setInterpolate(mode);
1819                interp->setIndirect(0, 0, indirect);
1820
1821                Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
1822                interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1823                if (nvirOp == OP_PINTERP)
1824                   interp->setSrc(s++, fp.position);
1825                if (mode & NV50_IR_INTERP_OFFSET)
1826                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1827                interp->setInterpolate(mode);
1828                interp->setIndirect(0, 0, indirect);
1829
1830                mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
1831             } else {
1832                Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
1833                if (nvirOp == OP_PINTERP)
1834                   interp->setSrc(s++, fp.position);
1835                if (mode & NV50_IR_INTERP_OFFSET)
1836                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1837                interp->setInterpolate(mode);
1838                interp->setIndirect(0, 0, indirect);
1839             }
1840          } else {
1841             mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
1842          }
1843       }
1844       break;
1845    }
1846    case nir_intrinsic_load_barycentric_at_offset:
1847    case nir_intrinsic_load_barycentric_at_sample:
1848    case nir_intrinsic_load_barycentric_centroid:
1849    case nir_intrinsic_load_barycentric_pixel:
1850    case nir_intrinsic_load_barycentric_sample: {
1851       LValues &newDefs = convert(&insn->dest);
1852       uint32_t mode;
1853
1854       if (op == nir_intrinsic_load_barycentric_centroid ||
1855           op == nir_intrinsic_load_barycentric_sample) {
1856          mode = NV50_IR_INTERP_CENTROID;
1857       } else if (op == nir_intrinsic_load_barycentric_at_offset) {
1858          Value *offs[2];
1859          for (uint8_t c = 0; c < 2; c++) {
1860             offs[c] = getScratch();
1861             mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
1862             mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
1863             mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
1864             mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
1865          }
1866          mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
1867
1868          mode = NV50_IR_INTERP_OFFSET;
1869       } else if (op == nir_intrinsic_load_barycentric_pixel) {
1870          mode = NV50_IR_INTERP_DEFAULT;
1871       } else if (op == nir_intrinsic_load_barycentric_at_sample) {
1872          info->prop.fp.readsSampleLocations = true;
1873          mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
1874          mode = NV50_IR_INTERP_OFFSET;
1875       } else {
1876          unreachable("all intrinsics already handled above");
1877       }
1878
1879       loadImm(newDefs[1], mode);
1880       break;
1881    }
1882    case nir_intrinsic_discard:
1883       mkOp(OP_DISCARD, TYPE_NONE, NULL);
1884       break;
1885    case nir_intrinsic_discard_if: {
1886       Value *pred = getSSA(1, FILE_PREDICATE);
1887       if (insn->num_components > 1) {
1888          ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
1889          assert(false);
1890          return false;
1891       }
1892       mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1893       mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
1894       break;
1895    }
1896    case nir_intrinsic_load_base_vertex:
1897    case nir_intrinsic_load_base_instance:
1898    case nir_intrinsic_load_draw_id:
1899    case nir_intrinsic_load_front_face:
1900    case nir_intrinsic_load_helper_invocation:
1901    case nir_intrinsic_load_instance_id:
1902    case nir_intrinsic_load_invocation_id:
1903    case nir_intrinsic_load_local_group_size:
1904    case nir_intrinsic_load_local_invocation_id:
1905    case nir_intrinsic_load_num_work_groups:
1906    case nir_intrinsic_load_patch_vertices_in:
1907    case nir_intrinsic_load_primitive_id:
1908    case nir_intrinsic_load_sample_id:
1909    case nir_intrinsic_load_sample_mask_in:
1910    case nir_intrinsic_load_sample_pos:
1911    case nir_intrinsic_load_subgroup_eq_mask:
1912    case nir_intrinsic_load_subgroup_ge_mask:
1913    case nir_intrinsic_load_subgroup_gt_mask:
1914    case nir_intrinsic_load_subgroup_le_mask:
1915    case nir_intrinsic_load_subgroup_lt_mask:
1916    case nir_intrinsic_load_subgroup_invocation:
1917    case nir_intrinsic_load_tess_coord:
1918    case nir_intrinsic_load_tess_level_inner:
1919    case nir_intrinsic_load_tess_level_outer:
1920    case nir_intrinsic_load_vertex_id:
1921    case nir_intrinsic_load_work_group_id: {
1922       const DataType dType = getDType(insn);
1923       SVSemantic sv = convert(op);
1924       LValues &newDefs = convert(&insn->dest);
1925
1926       for (uint8_t i = 0u; i < insn->num_components; ++i) {
1927          Value *def;
1928          if (typeSizeof(dType) == 8)
1929             def = getSSA();
1930          else
1931             def = newDefs[i];
1932
1933          if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
1934             loadImm(def, 0u);
1935          } else {
1936             Symbol *sym = mkSysVal(sv, i);
1937             Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
1938             if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
1939                rdsv->perPatch = 1;
1940          }
1941
1942          if (typeSizeof(dType) == 8)
1943             mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
1944       }
1945       break;
1946    }
1947    // constants
1948    case nir_intrinsic_load_subgroup_size: {
1949       LValues &newDefs = convert(&insn->dest);
1950       loadImm(newDefs[0], 32u);
1951       break;
1952    }
1953    case nir_intrinsic_vote_all:
1954    case nir_intrinsic_vote_any:
1955    case nir_intrinsic_vote_ieq: {
1956       LValues &newDefs = convert(&insn->dest);
1957       Value *pred = getScratch(1, FILE_PREDICATE);
1958       mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1959       mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
1960       mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
1961       break;
1962    }
1963    case nir_intrinsic_ballot: {
1964       LValues &newDefs = convert(&insn->dest);
1965       Value *pred = getSSA(1, FILE_PREDICATE);
1966       mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1967       mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
1968       break;
1969    }
1970    case nir_intrinsic_read_first_invocation:
1971    case nir_intrinsic_read_invocation: {
1972       LValues &newDefs = convert(&insn->dest);
1973       const DataType dType = getDType(insn);
1974       Value *tmp = getScratch();
1975
1976       if (op == nir_intrinsic_read_first_invocation) {
1977          mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
1978          mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
1979          mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
1980       } else
1981          tmp = getSrc(&insn->src[1], 0);
1982
1983       for (uint8_t i = 0; i < insn->num_components; ++i) {
1984          mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
1985             ->subOp = NV50_IR_SUBOP_SHFL_IDX;
1986       }
1987       break;
1988    }
1989    default:
1990       ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
1991       return false;
1992    }
1993
1994    return true;
1995 }
1996
1997 bool
1998 Converter::visit(nir_jump_instr *insn)
1999 {
2000    switch (insn->type) {
2001    case nir_jump_return:
2002       // TODO: this only works in the main function
2003       mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
2004       bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
2005       break;
2006    case nir_jump_break:
2007    case nir_jump_continue: {
2008       bool isBreak = insn->type == nir_jump_break;
2009       nir_block *block = insn->instr.block;
2010       assert(!block->successors[1]);
2011       BasicBlock *target = convert(block->successors[0]);
2012       mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2013       bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2014       break;
2015    }
2016    default:
2017       ERROR("unknown nir_jump_type %u\n", insn->type);
2018       return false;
2019    }
2020
2021    return true;
2022 }
2023
2024 bool
2025 Converter::visit(nir_load_const_instr *insn)
2026 {
2027    assert(insn->def.bit_size <= 64);
2028
2029    LValues &newDefs = convert(&insn->def);
2030    for (int i = 0; i < insn->def.num_components; i++) {
2031       switch (insn->def.bit_size) {
2032       case 64:
2033          loadImm(newDefs[i], insn->value.u64[i]);
2034          break;
2035       case 32:
2036          loadImm(newDefs[i], insn->value.u32[i]);
2037          break;
2038       case 16:
2039          loadImm(newDefs[i], insn->value.u16[i]);
2040          break;
2041       case 8:
2042          loadImm(newDefs[i], insn->value.u8[i]);
2043          break;
2044       }
2045    }
2046    return true;
2047 }
2048
2049 #define DEFAULT_CHECKS \
2050       if (insn->dest.dest.ssa.num_components > 1) { \
2051          ERROR("nir_alu_instr only supported with 1 component!\n"); \
2052          return false; \
2053       } \
2054       if (insn->dest.write_mask != 1) { \
2055          ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2056          return false; \
2057       }
2058 bool
2059 Converter::visit(nir_alu_instr *insn)
2060 {
2061    const nir_op op = insn->op;
2062    const nir_op_info &info = nir_op_infos[op];
2063    DataType dType = getDType(insn);
2064    const std::vector<DataType> sTypes = getSTypes(insn);
2065
2066    Instruction *oldPos = this->bb->getExit();
2067
2068    switch (op) {
2069    case nir_op_fabs:
2070    case nir_op_iabs:
2071    case nir_op_fadd:
2072    case nir_op_iadd:
2073    case nir_op_fand:
2074    case nir_op_iand:
2075    case nir_op_fceil:
2076    case nir_op_fcos:
2077    case nir_op_fddx:
2078    case nir_op_fddx_coarse:
2079    case nir_op_fddx_fine:
2080    case nir_op_fddy:
2081    case nir_op_fddy_coarse:
2082    case nir_op_fddy_fine:
2083    case nir_op_fdiv:
2084    case nir_op_idiv:
2085    case nir_op_udiv:
2086    case nir_op_fexp2:
2087    case nir_op_ffloor:
2088    case nir_op_ffma:
2089    case nir_op_flog2:
2090    case nir_op_fmax:
2091    case nir_op_imax:
2092    case nir_op_umax:
2093    case nir_op_fmin:
2094    case nir_op_imin:
2095    case nir_op_umin:
2096    case nir_op_fmod:
2097    case nir_op_imod:
2098    case nir_op_umod:
2099    case nir_op_fmul:
2100    case nir_op_imul:
2101    case nir_op_imul_high:
2102    case nir_op_umul_high:
2103    case nir_op_fneg:
2104    case nir_op_ineg:
2105    case nir_op_fnot:
2106    case nir_op_inot:
2107    case nir_op_for:
2108    case nir_op_ior:
2109    case nir_op_pack_64_2x32_split:
2110    case nir_op_fpow:
2111    case nir_op_frcp:
2112    case nir_op_frem:
2113    case nir_op_irem:
2114    case nir_op_frsq:
2115    case nir_op_fsat:
2116    case nir_op_ishr:
2117    case nir_op_ushr:
2118    case nir_op_fsin:
2119    case nir_op_fsqrt:
2120    case nir_op_fsub:
2121    case nir_op_isub:
2122    case nir_op_ftrunc:
2123    case nir_op_ishl:
2124    case nir_op_fxor:
2125    case nir_op_ixor: {
2126       DEFAULT_CHECKS;
2127       LValues &newDefs = convert(&insn->dest);
2128       operation preOp = preOperationNeeded(op);
2129       if (preOp != OP_NOP) {
2130          assert(info.num_inputs < 2);
2131          Value *tmp = getSSA(typeSizeof(dType));
2132          Instruction *i0 = mkOp(preOp, dType, tmp);
2133          Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2134          if (info.num_inputs) {
2135             i0->setSrc(0, getSrc(&insn->src[0]));
2136             i1->setSrc(0, tmp);
2137          }
2138          i1->subOp = getSubOp(op);
2139       } else {
2140          Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2141          for (unsigned s = 0u; s < info.num_inputs; ++s) {
2142             i->setSrc(s, getSrc(&insn->src[s]));
2143          }
2144          i->subOp = getSubOp(op);
2145       }
2146       break;
2147    }
2148    case nir_op_ifind_msb:
2149    case nir_op_ufind_msb: {
2150       DEFAULT_CHECKS;
2151       LValues &newDefs = convert(&insn->dest);
2152       dType = sTypes[0];
2153       mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2154       break;
2155    }
2156    case nir_op_fround_even: {
2157       DEFAULT_CHECKS;
2158       LValues &newDefs = convert(&insn->dest);
2159       mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2160       break;
2161    }
2162    // convert instructions
2163    case nir_op_f2f32:
2164    case nir_op_f2i32:
2165    case nir_op_f2u32:
2166    case nir_op_i2f32:
2167    case nir_op_i2i32:
2168    case nir_op_u2f32:
2169    case nir_op_u2u32:
2170    case nir_op_f2f64:
2171    case nir_op_f2i64:
2172    case nir_op_f2u64:
2173    case nir_op_i2f64:
2174    case nir_op_i2i64:
2175    case nir_op_u2f64:
2176    case nir_op_u2u64: {
2177       DEFAULT_CHECKS;
2178       LValues &newDefs = convert(&insn->dest);
2179       Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2180       if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2181          i->rnd = ROUND_Z;
2182       i->sType = sTypes[0];
2183       break;
2184    }
2185    // compare instructions
2186    case nir_op_feq32:
2187    case nir_op_ieq32:
2188    case nir_op_fge32:
2189    case nir_op_ige32:
2190    case nir_op_uge32:
2191    case nir_op_flt32:
2192    case nir_op_ilt32:
2193    case nir_op_ult32:
2194    case nir_op_fne32:
2195    case nir_op_ine32: {
2196       DEFAULT_CHECKS;
2197       LValues &newDefs = convert(&insn->dest);
2198       Instruction *i = mkCmp(getOperation(op),
2199                              getCondCode(op),
2200                              dType,
2201                              newDefs[0],
2202                              dType,
2203                              getSrc(&insn->src[0]),
2204                              getSrc(&insn->src[1]));
2205       if (info.num_inputs == 3)
2206          i->setSrc(2, getSrc(&insn->src[2]));
2207       i->sType = sTypes[0];
2208       break;
2209    }
2210    // those are weird ALU ops and need special handling, because
2211    //   1. they are always componend based
2212    //   2. they basically just merge multiple values into one data type
2213    case nir_op_imov:
2214    case nir_op_fmov:
2215       if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) {
2216          nir_reg_dest& reg = insn->dest.dest.reg;
2217          uint32_t goffset = regToLmemOffset[reg.reg->index];
2218          uint8_t comps = reg.reg->num_components;
2219          uint8_t size = reg.reg->bit_size / 8;
2220          uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2221          uint32_t aoffset = csize * reg.base_offset;
2222          Value *indirect = NULL;
2223
2224          if (reg.indirect)
2225             indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
2226                               getSrc(reg.indirect, 0), mkImm(csize));
2227
2228          for (uint8_t i = 0u; i < comps; ++i) {
2229             if (!((1u << i) & insn->dest.write_mask))
2230                continue;
2231
2232             Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size);
2233             mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
2234          }
2235          break;
2236       } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) {
2237          LValues &newDefs = convert(&insn->dest);
2238          nir_reg_src& reg = insn->src[0].src.reg;
2239          uint32_t goffset = regToLmemOffset[reg.reg->index];
2240          // uint8_t comps = reg.reg->num_components;
2241          uint8_t size = reg.reg->bit_size / 8;
2242          uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
2243          uint32_t aoffset = csize * reg.base_offset;
2244          Value *indirect = NULL;
2245
2246          if (reg.indirect)
2247             indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize));
2248
2249          for (uint8_t i = 0u; i < newDefs.size(); ++i)
2250             loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect);
2251
2252          break;
2253       } else {
2254          LValues &newDefs = convert(&insn->dest);
2255          for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2256             mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
2257          }
2258       }
2259       break;
2260    case nir_op_vec2:
2261    case nir_op_vec3:
2262    case nir_op_vec4: {
2263       LValues &newDefs = convert(&insn->dest);
2264       for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2265          mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2266       }
2267       break;
2268    }
2269    // (un)pack
2270    case nir_op_pack_64_2x32: {
2271       LValues &newDefs = convert(&insn->dest);
2272       Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2273       merge->setSrc(0, getSrc(&insn->src[0], 0));
2274       merge->setSrc(1, getSrc(&insn->src[0], 1));
2275       break;
2276    }
2277    case nir_op_pack_half_2x16_split: {
2278       LValues &newDefs = convert(&insn->dest);
2279       Value *tmpH = getSSA();
2280       Value *tmpL = getSSA();
2281
2282       mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2283       mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2284       mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2285       break;
2286    }
2287    case nir_op_unpack_half_2x16_split_x:
2288    case nir_op_unpack_half_2x16_split_y: {
2289       LValues &newDefs = convert(&insn->dest);
2290       Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2291       if (op == nir_op_unpack_half_2x16_split_y)
2292          cvt->subOp = 1;
2293       break;
2294    }
2295    case nir_op_unpack_64_2x32: {
2296       LValues &newDefs = convert(&insn->dest);
2297       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2298       break;
2299    }
2300    case nir_op_unpack_64_2x32_split_x: {
2301       LValues &newDefs = convert(&insn->dest);
2302       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2303       break;
2304    }
2305    case nir_op_unpack_64_2x32_split_y: {
2306       LValues &newDefs = convert(&insn->dest);
2307       mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2308       break;
2309    }
2310    // special instructions
2311    case nir_op_fsign:
2312    case nir_op_isign: {
2313       DEFAULT_CHECKS;
2314       DataType iType;
2315       if (::isFloatType(dType))
2316          iType = TYPE_F32;
2317       else
2318          iType = TYPE_S32;
2319
2320       LValues &newDefs = convert(&insn->dest);
2321       LValue *val0 = getScratch();
2322       LValue *val1 = getScratch();
2323       mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2324       mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2325
2326       if (dType == TYPE_F64) {
2327          mkOp2(OP_SUB, iType, val0, val0, val1);
2328          mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2329       } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2330          mkOp2(OP_SUB, iType, val0, val1, val0);
2331          mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2332          mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2333       } else if (::isFloatType(dType))
2334          mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2335       else
2336          mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2337       break;
2338    }
2339    case nir_op_fcsel:
2340    case nir_op_b32csel: {
2341       DEFAULT_CHECKS;
2342       LValues &newDefs = convert(&insn->dest);
2343       mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2344       break;
2345    }
2346    case nir_op_ibitfield_extract:
2347    case nir_op_ubitfield_extract: {
2348       DEFAULT_CHECKS;
2349       Value *tmp = getSSA();
2350       LValues &newDefs = convert(&insn->dest);
2351       mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2352       mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2353       break;
2354    }
2355    case nir_op_bfm: {
2356       DEFAULT_CHECKS;
2357       LValues &newDefs = convert(&insn->dest);
2358       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2359       break;
2360    }
2361    case nir_op_bitfield_insert: {
2362       DEFAULT_CHECKS;
2363       LValues &newDefs = convert(&insn->dest);
2364       LValue *temp = getSSA();
2365       mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2366       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2367       break;
2368    }
2369    case nir_op_bit_count: {
2370       DEFAULT_CHECKS;
2371       LValues &newDefs = convert(&insn->dest);
2372       mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
2373       break;
2374    }
2375    case nir_op_bitfield_reverse: {
2376       DEFAULT_CHECKS;
2377       LValues &newDefs = convert(&insn->dest);
2378       mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2379       break;
2380    }
2381    case nir_op_find_lsb: {
2382       DEFAULT_CHECKS;
2383       LValues &newDefs = convert(&insn->dest);
2384       Value *tmp = getSSA();
2385       mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2386       mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2387       break;
2388    }
2389    // boolean conversions
2390    case nir_op_b2f32: {
2391       DEFAULT_CHECKS;
2392       LValues &newDefs = convert(&insn->dest);
2393       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
2394       break;
2395    }
2396    case nir_op_b2f64: {
2397       DEFAULT_CHECKS;
2398       LValues &newDefs = convert(&insn->dest);
2399       Value *tmp = getSSA(4);
2400       mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
2401       mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
2402       break;
2403    }
2404    case nir_op_f2b32:
2405    case nir_op_i2b32: {
2406       DEFAULT_CHECKS;
2407       LValues &newDefs = convert(&insn->dest);
2408       Value *src1;
2409       if (typeSizeof(sTypes[0]) == 8) {
2410          src1 = loadImm(getSSA(8), 0.0);
2411       } else {
2412          src1 = zero;
2413       }
2414       CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
2415       mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
2416       break;
2417    }
2418    case nir_op_b2i32: {
2419       DEFAULT_CHECKS;
2420       LValues &newDefs = convert(&insn->dest);
2421       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
2422       break;
2423    }
2424    case nir_op_b2i64: {
2425       DEFAULT_CHECKS;
2426       LValues &newDefs = convert(&insn->dest);
2427       LValue *def = getScratch();
2428       mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
2429       mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
2430       break;
2431    }
2432    default:
2433       ERROR("unknown nir_op %s\n", info.name);
2434       return false;
2435    }
2436
2437    if (!oldPos) {
2438       oldPos = this->bb->getEntry();
2439       oldPos->precise = insn->exact;
2440    }
2441
2442    if (unlikely(!oldPos))
2443       return true;
2444
2445    while (oldPos->next) {
2446       oldPos = oldPos->next;
2447       oldPos->precise = insn->exact;
2448    }
2449    oldPos->saturate = insn->dest.saturate;
2450
2451    return true;
2452 }
2453 #undef DEFAULT_CHECKS
2454
2455 bool
2456 Converter::visit(nir_ssa_undef_instr *insn)
2457 {
2458    LValues &newDefs = convert(&insn->def);
2459    for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
2460       mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
2461    }
2462    return true;
2463 }
2464
2465 #define CASE_SAMPLER(ty) \
2466    case GLSL_SAMPLER_DIM_ ## ty : \
2467       if (isArray && !isShadow) \
2468          return TEX_TARGET_ ## ty ## _ARRAY; \
2469       else if (!isArray && isShadow) \
2470          return TEX_TARGET_## ty ## _SHADOW; \
2471       else if (isArray && isShadow) \
2472          return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
2473       else \
2474          return TEX_TARGET_ ## ty
2475
2476 TexTarget
2477 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
2478 {
2479    switch (dim) {
2480    CASE_SAMPLER(1D);
2481    CASE_SAMPLER(2D);
2482    CASE_SAMPLER(CUBE);
2483    case GLSL_SAMPLER_DIM_3D:
2484       return TEX_TARGET_3D;
2485    case GLSL_SAMPLER_DIM_MS:
2486       if (isArray)
2487          return TEX_TARGET_2D_MS_ARRAY;
2488       return TEX_TARGET_2D_MS;
2489    case GLSL_SAMPLER_DIM_RECT:
2490       if (isShadow)
2491          return TEX_TARGET_RECT_SHADOW;
2492       return TEX_TARGET_RECT;
2493    case GLSL_SAMPLER_DIM_BUF:
2494       return TEX_TARGET_BUFFER;
2495    case GLSL_SAMPLER_DIM_EXTERNAL:
2496       return TEX_TARGET_2D;
2497    default:
2498       ERROR("unknown glsl_sampler_dim %u\n", dim);
2499       assert(false);
2500       return TEX_TARGET_COUNT;
2501    }
2502 }
2503 #undef CASE_SAMPLER
2504
2505 Value*
2506 Converter::applyProjection(Value *src, Value *proj)
2507 {
2508    if (!proj)
2509       return src;
2510    return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
2511 }
2512
2513 bool
2514 Converter::visit(nir_tex_instr *insn)
2515 {
2516    switch (insn->op) {
2517    case nir_texop_lod:
2518    case nir_texop_query_levels:
2519    case nir_texop_tex:
2520    case nir_texop_texture_samples:
2521    case nir_texop_tg4:
2522    case nir_texop_txb:
2523    case nir_texop_txd:
2524    case nir_texop_txf:
2525    case nir_texop_txf_ms:
2526    case nir_texop_txl:
2527    case nir_texop_txs: {
2528       LValues &newDefs = convert(&insn->dest);
2529       std::vector<Value*> srcs;
2530       std::vector<Value*> defs;
2531       std::vector<nir_src*> offsets;
2532       uint8_t mask = 0;
2533       bool lz = false;
2534       Value *proj = NULL;
2535       TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
2536       operation op = getOperation(insn->op);
2537
2538       int r, s;
2539       int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
2540       int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
2541       int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
2542       int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
2543       int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
2544       int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
2545       int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
2546       int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
2547       int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
2548       int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
2549       int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
2550
2551       if (projIdx != -1)
2552          proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
2553
2554       srcs.resize(insn->coord_components);
2555       for (uint8_t i = 0u; i < insn->coord_components; ++i)
2556          srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
2557
2558       // sometimes we get less args than target.getArgCount, but codegen expects the latter
2559       if (insn->coord_components) {
2560          uint32_t argCount = target.getArgCount();
2561
2562          if (target.isMS())
2563             argCount -= 1;
2564
2565          for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
2566             srcs.push_back(getSSA());
2567       }
2568
2569       if (insn->op == nir_texop_texture_samples)
2570          srcs.push_back(zero);
2571       else if (!insn->num_srcs)
2572          srcs.push_back(loadImm(NULL, 0));
2573       if (biasIdx != -1)
2574          srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
2575       if (lodIdx != -1)
2576          srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
2577       else if (op == OP_TXF)
2578          lz = true;
2579       if (msIdx != -1)
2580          srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
2581       if (offsetIdx != -1)
2582          offsets.push_back(&insn->src[offsetIdx].src);
2583       if (compIdx != -1)
2584          srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
2585       if (texOffIdx != -1) {
2586          srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
2587          texOffIdx = srcs.size() - 1;
2588       }
2589       if (sampOffIdx != -1) {
2590          srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
2591          sampOffIdx = srcs.size() - 1;
2592       }
2593
2594       r = insn->texture_index;
2595       s = insn->sampler_index;
2596
2597       defs.resize(newDefs.size());
2598       for (uint8_t d = 0u; d < newDefs.size(); ++d) {
2599          defs[d] = newDefs[d];
2600          mask |= 1 << d;
2601       }
2602       if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
2603          lz = true;
2604
2605       TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
2606       texi->tex.levelZero = lz;
2607       texi->tex.mask = mask;
2608
2609       if (texOffIdx != -1)
2610          texi->tex.rIndirectSrc = texOffIdx;
2611       if (sampOffIdx != -1)
2612          texi->tex.sIndirectSrc = sampOffIdx;
2613
2614       switch (insn->op) {
2615       case nir_texop_tg4:
2616          if (!target.isShadow())
2617             texi->tex.gatherComp = insn->component;
2618          break;
2619       case nir_texop_txs:
2620          texi->tex.query = TXQ_DIMS;
2621          break;
2622       case nir_texop_texture_samples:
2623          texi->tex.mask = 0x4;
2624          texi->tex.query = TXQ_TYPE;
2625          break;
2626       case nir_texop_query_levels:
2627          texi->tex.mask = 0x8;
2628          texi->tex.query = TXQ_DIMS;
2629          break;
2630       default:
2631          break;
2632       }
2633
2634       texi->tex.useOffsets = offsets.size();
2635       if (texi->tex.useOffsets) {
2636          for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
2637             for (uint32_t c = 0u; c < 3; ++c) {
2638                uint8_t s2 = std::min(c, target.getDim() - 1);
2639                texi->offset[s][c].set(getSrc(offsets[s], s2));
2640                texi->offset[s][c].setInsn(texi);
2641             }
2642          }
2643       }
2644
2645       if (ddxIdx != -1 && ddyIdx != -1) {
2646          for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
2647             texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
2648             texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
2649          }
2650       }
2651
2652       break;
2653    }
2654    default:
2655       ERROR("unknown nir_texop %u\n", insn->op);
2656       return false;
2657    }
2658    return true;
2659 }
2660
2661 bool
2662 Converter::run()
2663 {
2664    bool progress;
2665
2666    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
2667       nir_print_shader(nir, stderr);
2668
2669    struct nir_lower_subgroups_options subgroup_options = {
2670       .subgroup_size = 32,
2671       .ballot_bit_size = 32,
2672    };
2673
2674    NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
2675    NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
2676    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
2677    NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
2678    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2679    NIR_PASS_V(nir, nir_lower_alu_to_scalar);
2680    NIR_PASS_V(nir, nir_lower_phis_to_scalar);
2681
2682    do {
2683       progress = false;
2684       NIR_PASS(progress, nir, nir_copy_prop);
2685       NIR_PASS(progress, nir, nir_opt_remove_phis);
2686       NIR_PASS(progress, nir, nir_opt_trivial_continues);
2687       NIR_PASS(progress, nir, nir_opt_cse);
2688       NIR_PASS(progress, nir, nir_opt_algebraic);
2689       NIR_PASS(progress, nir, nir_opt_constant_folding);
2690       NIR_PASS(progress, nir, nir_copy_prop);
2691       NIR_PASS(progress, nir, nir_opt_dce);
2692       NIR_PASS(progress, nir, nir_opt_dead_cf);
2693    } while (progress);
2694
2695    NIR_PASS_V(nir, nir_lower_bool_to_int32);
2696    NIR_PASS_V(nir, nir_lower_locals_to_regs);
2697    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
2698    NIR_PASS_V(nir, nir_convert_from_ssa, true);
2699
2700    // Garbage collect dead instructions
2701    nir_sweep(nir);
2702
2703    if (!parseNIR()) {
2704       ERROR("Couldn't prase NIR!\n");
2705       return false;
2706    }
2707
2708    if (!assignSlots()) {
2709       ERROR("Couldn't assign slots!\n");
2710       return false;
2711    }
2712
2713    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
2714       nir_print_shader(nir, stderr);
2715
2716    nir_foreach_function(function, nir) {
2717       if (!visit(function))
2718          return false;
2719    }
2720
2721    return true;
2722 }
2723
2724 } // unnamed namespace
2725
2726 namespace nv50_ir {
2727
2728 bool
2729 Program::makeFromNIR(struct nv50_ir_prog_info *info)
2730 {
2731    nir_shader *nir = (nir_shader*)info->bin.source;
2732    Converter converter(this, nir, info);
2733    bool result = converter.run();
2734    if (!result)
2735       return result;
2736    LoweringHelper lowering;
2737    lowering.run(this);
2738    tlsSize = info->bin.tlsSpace;
2739    return result;
2740 }
2741
2742 } // namespace nv50_ir