src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

   1 /*
   2  * Copyright 2017 Red Hat Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * Authors: Karol Herbst <kherbst@redhat.com>
  23  */
  24
  25 #include "compiler/nir/nir.h"
  26
  27 #include "util/u_debug.h"
  28
  29 #include "codegen/nv50_ir.h"
  30 #include "codegen/nv50_ir_from_common.h"
  31 #include "codegen/nv50_ir_lowering_helper.h"
  32 #include "codegen/nv50_ir_util.h"
  33
  34 #if __cplusplus >= 201103L
  35 #include <unordered_map>
  36 #else
  37 #include <tr1/unordered_map>
  38 #endif
  39 #include <vector>
  40
  41 namespace {
  42
  43 #if __cplusplus >= 201103L
  44 using std::hash;
  45 using std::unordered_map;
  46 #else
  47 using std::tr1::hash;
  48 using std::tr1::unordered_map;
  49 #endif
  50
  51 using namespace nv50_ir;
  52
  53 int
  54 type_size(const struct glsl_type *type)
  55 {
  56    return glsl_count_attribute_slots(type, false);
  57 }
  58
  59 class Converter : public ConverterCommon
  60 {
  61 public:
  62    Converter(Program *, nir_shader *, nv50_ir_prog_info *);
  63
  64    bool run();
  65 private:
  66    typedef std::vector<LValue*> LValues;
  67    typedef unordered_map<unsigned, LValues> NirDefMap;
  68    typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
  69
  70    TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
  71    LValues& convert(nir_alu_dest *);
  72    BasicBlock* convert(nir_block *);
  73    LValues& convert(nir_dest *);
  74    SVSemantic convert(nir_intrinsic_op);
  75    LValues& convert(nir_register *);
  76    LValues& convert(nir_ssa_def *);
  77
  78    Value* getSrc(nir_alu_src *, uint8_t component = 0);
  79    Value* getSrc(nir_register *, uint8_t);
  80    Value* getSrc(nir_src *, uint8_t, bool indirect = false);
  81    Value* getSrc(nir_ssa_def *, uint8_t);
  82
  83    // returned value is the constant part of the given source (either the
  84    // nir_src or the selected source component of an intrinsic). Even though
  85    // this is mostly an optimization to be able to skip indirects in a few
  86    // cases, sometimes we require immediate values or set some fileds on
  87    // instructions (e.g. tex) in order for codegen to consume those.
  88    // If the found value has not a constant part, the Value gets returned
  89    // through the Value parameter.
  90    uint32_t getIndirect(nir_src *, uint8_t, Value *&);
  91    uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&);
  92
  93    uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
  94
  95    void setInterpolate(nv50_ir_varying *,
  96                        uint8_t,
  97                        bool centroid,
  98                        unsigned semantics);
  99
 100    Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
 101                          uint8_t c, Value *indirect0 = NULL,
 102                          Value *indirect1 = NULL, bool patch = false);
 103    void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
 104                 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
 105                 Value *indirect1 = NULL);
 106
 107    bool isFloatType(nir_alu_type);
 108    bool isSignedType(nir_alu_type);
 109    bool isResultFloat(nir_op);
 110    bool isResultSigned(nir_op);
 111
 112    DataType getDType(nir_alu_instr *);
 113    DataType getDType(nir_intrinsic_instr *);
 114    DataType getDType(nir_op, uint8_t);
 115
 116    std::vector<DataType> getSTypes(nir_alu_instr *);
 117    DataType getSType(nir_src &, bool isFloat, bool isSigned);
 118
 119    operation getOperation(nir_intrinsic_op);
 120    operation getOperation(nir_op);
 121    operation getOperation(nir_texop);
 122    operation preOperationNeeded(nir_op);
 123
 124    int getSubOp(nir_intrinsic_op);
 125    int getSubOp(nir_op);
 126
 127    CondCode getCondCode(nir_op);
 128
 129    bool assignSlots();
 130    bool parseNIR();
 131
 132    bool visit(nir_alu_instr *);
 133    bool visit(nir_block *);
 134    bool visit(nir_cf_node *);
 135    bool visit(nir_function *);
 136    bool visit(nir_if *);
 137    bool visit(nir_instr *);
 138    bool visit(nir_intrinsic_instr *);
 139    bool visit(nir_jump_instr *);
 140    bool visit(nir_load_const_instr*);
 141    bool visit(nir_loop *);
 142    bool visit(nir_ssa_undef_instr *);
 143    bool visit(nir_tex_instr *);
 144
 145    // tex stuff
 146    Value* applyProjection(Value *src, Value *proj);
 147
 148    nir_shader *nir;
 149
 150    NirDefMap ssaDefs;
 151    NirDefMap regDefs;
 152    NirBlockMap blocks;
 153    unsigned int curLoopDepth;
 154
 155    BasicBlock *exit;
 156    Value *zero;
 157
 158    int clipVertexOutput;
 159
 160    union {
 161       struct {
 162          Value *position;
 163       } fp;
 164    };
 165 };
 166
 167 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
 168    : ConverterCommon(prog, info),
 169      nir(nir),
 170      curLoopDepth(0),
 171      clipVertexOutput(-1)
 172 {
 173    zero = mkImm((uint32_t)0);
 174 }
 175
 176 BasicBlock *
 177 Converter::convert(nir_block *block)
 178 {
 179    NirBlockMap::iterator it = blocks.find(block->index);
 180    if (it != blocks.end())
 181       return it->second;
 182
 183    BasicBlock *bb = new BasicBlock(func);
 184    blocks[block->index] = bb;
 185    return bb;
 186 }
 187
 188 bool
 189 Converter::isFloatType(nir_alu_type type)
 190 {
 191    return nir_alu_type_get_base_type(type) == nir_type_float;
 192 }
 193
 194 bool
 195 Converter::isSignedType(nir_alu_type type)
 196 {
 197    return nir_alu_type_get_base_type(type) == nir_type_int;
 198 }
 199
 200 bool
 201 Converter::isResultFloat(nir_op op)
 202 {
 203    const nir_op_info &info = nir_op_infos[op];
 204    if (info.output_type != nir_type_invalid)
 205       return isFloatType(info.output_type);
 206
 207    ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
 208    assert(false);
 209    return true;
 210 }
 211
 212 bool
 213 Converter::isResultSigned(nir_op op)
 214 {
 215    switch (op) {
 216    // there is no umul and we get wrong results if we treat all muls as signed
 217    case nir_op_imul:
 218    case nir_op_inot:
 219       return false;
 220    default:
 221       const nir_op_info &info = nir_op_infos[op];
 222       if (info.output_type != nir_type_invalid)
 223          return isSignedType(info.output_type);
 224       ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
 225       assert(false);
 226       return true;
 227    }
 228 }
 229
 230 DataType
 231 Converter::getDType(nir_alu_instr *insn)
 232 {
 233    if (insn->dest.dest.is_ssa)
 234       return getDType(insn->op, insn->dest.dest.ssa.bit_size);
 235    else
 236       return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
 237 }
 238
 239 DataType
 240 Converter::getDType(nir_intrinsic_instr *insn)
 241 {
 242    if (insn->dest.is_ssa)
 243       return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
 244    else
 245       return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
 246 }
 247
 248 DataType
 249 Converter::getDType(nir_op op, uint8_t bitSize)
 250 {
 251    DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
 252    if (ty == TYPE_NONE) {
 253       ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
 254       assert(false);
 255    }
 256    return ty;
 257 }
 258
 259 std::vector<DataType>
 260 Converter::getSTypes(nir_alu_instr *insn)
 261 {
 262    const nir_op_info &info = nir_op_infos[insn->op];
 263    std::vector<DataType> res(info.num_inputs);
 264
 265    for (uint8_t i = 0; i < info.num_inputs; ++i) {
 266       if (info.input_types[i] != nir_type_invalid) {
 267          res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
 268       } else {
 269          ERROR("getSType not implemented for %s idx %u\n", info.name, i);
 270          assert(false);
 271          res[i] = TYPE_NONE;
 272          break;
 273       }
 274    }
 275
 276    return res;
 277 }
 278
 279 DataType
 280 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
 281 {
 282    uint8_t bitSize;
 283    if (src.is_ssa)
 284       bitSize = src.ssa->bit_size;
 285    else
 286       bitSize = src.reg.reg->bit_size;
 287
 288    DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
 289    if (ty == TYPE_NONE) {
 290       const char *str;
 291       if (isFloat)
 292          str = "float";
 293       else if (isSigned)
 294          str = "int";
 295       else
 296          str = "uint";
 297       ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
 298       assert(false);
 299    }
 300    return ty;
 301 }
 302
 303 operation
 304 Converter::getOperation(nir_op op)
 305 {
 306    switch (op) {
 307    // basic ops with float and int variants
 308    case nir_op_fabs:
 309    case nir_op_iabs:
 310       return OP_ABS;
 311    case nir_op_fadd:
 312    case nir_op_iadd:
 313       return OP_ADD;
 314    case nir_op_fand:
 315    case nir_op_iand:
 316       return OP_AND;
 317    case nir_op_ifind_msb:
 318    case nir_op_ufind_msb:
 319       return OP_BFIND;
 320    case nir_op_fceil:
 321       return OP_CEIL;
 322    case nir_op_fcos:
 323       return OP_COS;
 324    case nir_op_f2f32:
 325    case nir_op_f2f64:
 326    case nir_op_f2i32:
 327    case nir_op_f2i64:
 328    case nir_op_f2u32:
 329    case nir_op_f2u64:
 330    case nir_op_i2f32:
 331    case nir_op_i2f64:
 332    case nir_op_i2i32:
 333    case nir_op_i2i64:
 334    case nir_op_u2f32:
 335    case nir_op_u2f64:
 336    case nir_op_u2u32:
 337    case nir_op_u2u64:
 338       return OP_CVT;
 339    case nir_op_fddx:
 340    case nir_op_fddx_coarse:
 341    case nir_op_fddx_fine:
 342       return OP_DFDX;
 343    case nir_op_fddy:
 344    case nir_op_fddy_coarse:
 345    case nir_op_fddy_fine:
 346       return OP_DFDY;
 347    case nir_op_fdiv:
 348    case nir_op_idiv:
 349    case nir_op_udiv:
 350       return OP_DIV;
 351    case nir_op_fexp2:
 352       return OP_EX2;
 353    case nir_op_ffloor:
 354       return OP_FLOOR;
 355    case nir_op_ffma:
 356       return OP_FMA;
 357    case nir_op_flog2:
 358       return OP_LG2;
 359    case nir_op_fmax:
 360    case nir_op_imax:
 361    case nir_op_umax:
 362       return OP_MAX;
 363    case nir_op_pack_64_2x32_split:
 364       return OP_MERGE;
 365    case nir_op_fmin:
 366    case nir_op_imin:
 367    case nir_op_umin:
 368       return OP_MIN;
 369    case nir_op_fmod:
 370    case nir_op_imod:
 371    case nir_op_umod:
 372    case nir_op_frem:
 373    case nir_op_irem:
 374       return OP_MOD;
 375    case nir_op_fmul:
 376    case nir_op_imul:
 377    case nir_op_imul_high:
 378    case nir_op_umul_high:
 379       return OP_MUL;
 380    case nir_op_fneg:
 381    case nir_op_ineg:
 382       return OP_NEG;
 383    case nir_op_fnot:
 384    case nir_op_inot:
 385       return OP_NOT;
 386    case nir_op_for:
 387    case nir_op_ior:
 388       return OP_OR;
 389    case nir_op_fpow:
 390       return OP_POW;
 391    case nir_op_frcp:
 392       return OP_RCP;
 393    case nir_op_frsq:
 394       return OP_RSQ;
 395    case nir_op_fsat:
 396       return OP_SAT;
 397    case nir_op_feq32:
 398    case nir_op_ieq32:
 399    case nir_op_fge32:
 400    case nir_op_ige32:
 401    case nir_op_uge32:
 402    case nir_op_flt32:
 403    case nir_op_ilt32:
 404    case nir_op_ult32:
 405    case nir_op_fne32:
 406    case nir_op_ine32:
 407       return OP_SET;
 408    case nir_op_ishl:
 409       return OP_SHL;
 410    case nir_op_ishr:
 411    case nir_op_ushr:
 412       return OP_SHR;
 413    case nir_op_fsin:
 414       return OP_SIN;
 415    case nir_op_fsqrt:
 416       return OP_SQRT;
 417    case nir_op_fsub:
 418    case nir_op_isub:
 419       return OP_SUB;
 420    case nir_op_ftrunc:
 421       return OP_TRUNC;
 422    case nir_op_fxor:
 423    case nir_op_ixor:
 424       return OP_XOR;
 425    default:
 426       ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
 427       assert(false);
 428       return OP_NOP;
 429    }
 430 }
 431
 432 operation
 433 Converter::getOperation(nir_texop op)
 434 {
 435    switch (op) {
 436    case nir_texop_tex:
 437       return OP_TEX;
 438    case nir_texop_lod:
 439       return OP_TXLQ;
 440    case nir_texop_txb:
 441       return OP_TXB;
 442    case nir_texop_txd:
 443       return OP_TXD;
 444    case nir_texop_txf:
 445    case nir_texop_txf_ms:
 446       return OP_TXF;
 447    case nir_texop_tg4:
 448       return OP_TXG;
 449    case nir_texop_txl:
 450       return OP_TXL;
 451    case nir_texop_query_levels:
 452    case nir_texop_texture_samples:
 453    case nir_texop_txs:
 454       return OP_TXQ;
 455    default:
 456       ERROR("couldn't get operation for nir_texop %u\n", op);
 457       assert(false);
 458       return OP_NOP;
 459    }
 460 }
 461
 462 operation
 463 Converter::getOperation(nir_intrinsic_op op)
 464 {
 465    switch (op) {
 466    default:
 467       ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
 468       assert(false);
 469       return OP_NOP;
 470    }
 471 }
 472
 473 operation
 474 Converter::preOperationNeeded(nir_op op)
 475 {
 476    switch (op) {
 477    case nir_op_fcos:
 478    case nir_op_fsin:
 479       return OP_PRESIN;
 480    default:
 481       return OP_NOP;
 482    }
 483 }
 484
 485 int
 486 Converter::getSubOp(nir_op op)
 487 {
 488    switch (op) {
 489    case nir_op_imul_high:
 490    case nir_op_umul_high:
 491       return NV50_IR_SUBOP_MUL_HIGH;
 492    default:
 493       return 0;
 494    }
 495 }
 496
 497 int
 498 Converter::getSubOp(nir_intrinsic_op op)
 499 {
 500    switch (op) {
 501    case nir_intrinsic_vote_all:
 502       return NV50_IR_SUBOP_VOTE_ALL;
 503    case nir_intrinsic_vote_any:
 504       return NV50_IR_SUBOP_VOTE_ANY;
 505    case nir_intrinsic_vote_ieq:
 506       return NV50_IR_SUBOP_VOTE_UNI;
 507    default:
 508       return 0;
 509    }
 510 }
 511
 512 CondCode
 513 Converter::getCondCode(nir_op op)
 514 {
 515    switch (op) {
 516    case nir_op_feq32:
 517    case nir_op_ieq32:
 518       return CC_EQ;
 519    case nir_op_fge32:
 520    case nir_op_ige32:
 521    case nir_op_uge32:
 522       return CC_GE;
 523    case nir_op_flt32:
 524    case nir_op_ilt32:
 525    case nir_op_ult32:
 526       return CC_LT;
 527    case nir_op_fne32:
 528       return CC_NEU;
 529    case nir_op_ine32:
 530       return CC_NE;
 531    default:
 532       ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
 533       assert(false);
 534       return CC_FL;
 535    }
 536 }
 537
 538 Converter::LValues&
 539 Converter::convert(nir_alu_dest *dest)
 540 {
 541    return convert(&dest->dest);
 542 }
 543
 544 Converter::LValues&
 545 Converter::convert(nir_dest *dest)
 546 {
 547    if (dest->is_ssa)
 548       return convert(&dest->ssa);
 549    if (dest->reg.indirect) {
 550       ERROR("no support for indirects.");
 551       assert(false);
 552    }
 553    return convert(dest->reg.reg);
 554 }
 555
 556 Converter::LValues&
 557 Converter::convert(nir_register *reg)
 558 {
 559    NirDefMap::iterator it = regDefs.find(reg->index);
 560    if (it != regDefs.end())
 561       return it->second;
 562
 563    LValues newDef(reg->num_components);
 564    for (uint8_t i = 0; i < reg->num_components; i++)
 565       newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
 566    return regDefs[reg->index] = newDef;
 567 }
 568
 569 Converter::LValues&
 570 Converter::convert(nir_ssa_def *def)
 571 {
 572    NirDefMap::iterator it = ssaDefs.find(def->index);
 573    if (it != ssaDefs.end())
 574       return it->second;
 575
 576    LValues newDef(def->num_components);
 577    for (uint8_t i = 0; i < def->num_components; i++)
 578       newDef[i] = getSSA(std::max(4, def->bit_size / 8));
 579    return ssaDefs[def->index] = newDef;
 580 }
 581
 582 Value*
 583 Converter::getSrc(nir_alu_src *src, uint8_t component)
 584 {
 585    if (src->abs || src->negate) {
 586       ERROR("modifiers currently not supported on nir_alu_src\n");
 587       assert(false);
 588    }
 589    return getSrc(&src->src, src->swizzle[component]);
 590 }
 591
 592 Value*
 593 Converter::getSrc(nir_register *reg, uint8_t idx)
 594 {
 595    NirDefMap::iterator it = regDefs.find(reg->index);
 596    if (it == regDefs.end())
 597       return convert(reg)[idx];
 598    return it->second[idx];
 599 }
 600
 601 Value*
 602 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
 603 {
 604    if (src->is_ssa)
 605       return getSrc(src->ssa, idx);
 606
 607    if (src->reg.indirect) {
 608       if (indirect)
 609          return getSrc(src->reg.indirect, idx);
 610       ERROR("no support for indirects.");
 611       assert(false);
 612       return NULL;
 613    }
 614
 615    return getSrc(src->reg.reg, idx);
 616 }
 617
 618 Value*
 619 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
 620 {
 621    NirDefMap::iterator it = ssaDefs.find(src->index);
 622    if (it == ssaDefs.end()) {
 623       ERROR("SSA value %u not found\n", src->index);
 624       assert(false);
 625       return NULL;
 626    }
 627    return it->second[idx];
 628 }
 629
 630 uint32_t
 631 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
 632 {
 633    nir_const_value *offset = nir_src_as_const_value(*src);
 634
 635    if (offset) {
 636       indirect = NULL;
 637       return offset->u32[0];
 638    }
 639
 640    indirect = getSrc(src, idx, true);
 641    return 0;
 642 }
 643
 644 uint32_t
 645 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect)
 646 {
 647    int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
 648    if (indirect)
 649       indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
 650    return idx;
 651 }
 652
 653 static void
 654 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
 655 {
 656    assert(name && index);
 657
 658    if (slot >= VERT_ATTRIB_MAX) {
 659       ERROR("invalid varying slot %u\n", slot);
 660       assert(false);
 661       return;
 662    }
 663
 664    if (slot >= VERT_ATTRIB_GENERIC0 &&
 665        slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
 666       *name = TGSI_SEMANTIC_GENERIC;
 667       *index = slot - VERT_ATTRIB_GENERIC0;
 668       return;
 669    }
 670
 671    if (slot >= VERT_ATTRIB_TEX0 &&
 672        slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
 673       *name = TGSI_SEMANTIC_TEXCOORD;
 674       *index = slot - VERT_ATTRIB_TEX0;
 675       return;
 676    }
 677
 678    switch (slot) {
 679    case VERT_ATTRIB_COLOR0:
 680       *name = TGSI_SEMANTIC_COLOR;
 681       *index = 0;
 682       break;
 683    case VERT_ATTRIB_COLOR1:
 684       *name = TGSI_SEMANTIC_COLOR;
 685       *index = 1;
 686       break;
 687    case VERT_ATTRIB_EDGEFLAG:
 688       *name = TGSI_SEMANTIC_EDGEFLAG;
 689       *index = 0;
 690       break;
 691    case VERT_ATTRIB_FOG:
 692       *name = TGSI_SEMANTIC_FOG;
 693       *index = 0;
 694       break;
 695    case VERT_ATTRIB_NORMAL:
 696       *name = TGSI_SEMANTIC_NORMAL;
 697       *index = 0;
 698       break;
 699    case VERT_ATTRIB_POS:
 700       *name = TGSI_SEMANTIC_POSITION;
 701       *index = 0;
 702       break;
 703    case VERT_ATTRIB_POINT_SIZE:
 704       *name = TGSI_SEMANTIC_PSIZE;
 705       *index = 0;
 706       break;
 707    default:
 708       ERROR("unknown vert attrib slot %u\n", slot);
 709       assert(false);
 710       break;
 711    }
 712 }
 713
 714 static void
 715 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
 716 {
 717    assert(name && index);
 718
 719    if (slot >= VARYING_SLOT_TESS_MAX) {
 720       ERROR("invalid varying slot %u\n", slot);
 721       assert(false);
 722       return;
 723    }
 724
 725    if (slot >= VARYING_SLOT_PATCH0) {
 726       *name = TGSI_SEMANTIC_PATCH;
 727       *index = slot - VARYING_SLOT_PATCH0;
 728       return;
 729    }
 730
 731    if (slot >= VARYING_SLOT_VAR0) {
 732       *name = TGSI_SEMANTIC_GENERIC;
 733       *index = slot - VARYING_SLOT_VAR0;
 734       return;
 735    }
 736
 737    if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
 738       *name = TGSI_SEMANTIC_TEXCOORD;
 739       *index = slot - VARYING_SLOT_TEX0;
 740       return;
 741    }
 742
 743    switch (slot) {
 744    case VARYING_SLOT_BFC0:
 745       *name = TGSI_SEMANTIC_BCOLOR;
 746       *index = 0;
 747       break;
 748    case VARYING_SLOT_BFC1:
 749       *name = TGSI_SEMANTIC_BCOLOR;
 750       *index = 1;
 751       break;
 752    case VARYING_SLOT_CLIP_DIST0:
 753       *name = TGSI_SEMANTIC_CLIPDIST;
 754       *index = 0;
 755       break;
 756    case VARYING_SLOT_CLIP_DIST1:
 757       *name = TGSI_SEMANTIC_CLIPDIST;
 758       *index = 1;
 759       break;
 760    case VARYING_SLOT_CLIP_VERTEX:
 761       *name = TGSI_SEMANTIC_CLIPVERTEX;
 762       *index = 0;
 763       break;
 764    case VARYING_SLOT_COL0:
 765       *name = TGSI_SEMANTIC_COLOR;
 766       *index = 0;
 767       break;
 768    case VARYING_SLOT_COL1:
 769       *name = TGSI_SEMANTIC_COLOR;
 770       *index = 1;
 771       break;
 772    case VARYING_SLOT_EDGE:
 773       *name = TGSI_SEMANTIC_EDGEFLAG;
 774       *index = 0;
 775       break;
 776    case VARYING_SLOT_FACE:
 777       *name = TGSI_SEMANTIC_FACE;
 778       *index = 0;
 779       break;
 780    case VARYING_SLOT_FOGC:
 781       *name = TGSI_SEMANTIC_FOG;
 782       *index = 0;
 783       break;
 784    case VARYING_SLOT_LAYER:
 785       *name = TGSI_SEMANTIC_LAYER;
 786       *index = 0;
 787       break;
 788    case VARYING_SLOT_PNTC:
 789       *name = TGSI_SEMANTIC_PCOORD;
 790       *index = 0;
 791       break;
 792    case VARYING_SLOT_POS:
 793       *name = TGSI_SEMANTIC_POSITION;
 794       *index = 0;
 795       break;
 796    case VARYING_SLOT_PRIMITIVE_ID:
 797       *name = TGSI_SEMANTIC_PRIMID;
 798       *index = 0;
 799       break;
 800    case VARYING_SLOT_PSIZ:
 801       *name = TGSI_SEMANTIC_PSIZE;
 802       *index = 0;
 803       break;
 804    case VARYING_SLOT_TESS_LEVEL_INNER:
 805       *name = TGSI_SEMANTIC_TESSINNER;
 806       *index = 0;
 807       break;
 808    case VARYING_SLOT_TESS_LEVEL_OUTER:
 809       *name = TGSI_SEMANTIC_TESSOUTER;
 810       *index = 0;
 811       break;
 812    case VARYING_SLOT_VIEWPORT:
 813       *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
 814       *index = 0;
 815       break;
 816    default:
 817       ERROR("unknown varying slot %u\n", slot);
 818       assert(false);
 819       break;
 820    }
 821 }
 822
 823 static void
 824 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
 825 {
 826    if (slot >= FRAG_RESULT_DATA0) {
 827       *name = TGSI_SEMANTIC_COLOR;
 828       *index = slot - FRAG_RESULT_COLOR - 2; // intentional
 829       return;
 830    }
 831
 832    switch (slot) {
 833    case FRAG_RESULT_COLOR:
 834       *name = TGSI_SEMANTIC_COLOR;
 835       *index = 0;
 836       break;
 837    case FRAG_RESULT_DEPTH:
 838       *name = TGSI_SEMANTIC_POSITION;
 839       *index = 0;
 840       break;
 841    case FRAG_RESULT_SAMPLE_MASK:
 842       *name = TGSI_SEMANTIC_SAMPLEMASK;
 843       *index = 0;
 844       break;
 845    default:
 846       ERROR("unknown frag result slot %u\n", slot);
 847       assert(false);
 848       break;
 849    }
 850 }
 851
 852 // copy of _mesa_sysval_to_semantic
 853 static void
 854 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
 855 {
 856    *index = 0;
 857    switch (val) {
 858    // Vertex shader
 859    case SYSTEM_VALUE_VERTEX_ID:
 860       *name = TGSI_SEMANTIC_VERTEXID;
 861       break;
 862    case SYSTEM_VALUE_INSTANCE_ID:
 863       *name = TGSI_SEMANTIC_INSTANCEID;
 864       break;
 865    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
 866       *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
 867       break;
 868    case SYSTEM_VALUE_BASE_VERTEX:
 869       *name = TGSI_SEMANTIC_BASEVERTEX;
 870       break;
 871    case SYSTEM_VALUE_BASE_INSTANCE:
 872       *name = TGSI_SEMANTIC_BASEINSTANCE;
 873       break;
 874    case SYSTEM_VALUE_DRAW_ID:
 875       *name = TGSI_SEMANTIC_DRAWID;
 876       break;
 877
 878    // Geometry shader
 879    case SYSTEM_VALUE_INVOCATION_ID:
 880       *name = TGSI_SEMANTIC_INVOCATIONID;
 881       break;
 882
 883    // Fragment shader
 884    case SYSTEM_VALUE_FRAG_COORD:
 885       *name = TGSI_SEMANTIC_POSITION;
 886       break;
 887    case SYSTEM_VALUE_FRONT_FACE:
 888       *name = TGSI_SEMANTIC_FACE;
 889       break;
 890    case SYSTEM_VALUE_SAMPLE_ID:
 891       *name = TGSI_SEMANTIC_SAMPLEID;
 892       break;
 893    case SYSTEM_VALUE_SAMPLE_POS:
 894       *name = TGSI_SEMANTIC_SAMPLEPOS;
 895       break;
 896    case SYSTEM_VALUE_SAMPLE_MASK_IN:
 897       *name = TGSI_SEMANTIC_SAMPLEMASK;
 898       break;
 899    case SYSTEM_VALUE_HELPER_INVOCATION:
 900       *name = TGSI_SEMANTIC_HELPER_INVOCATION;
 901       break;
 902
 903    // Tessellation shader
 904    case SYSTEM_VALUE_TESS_COORD:
 905       *name = TGSI_SEMANTIC_TESSCOORD;
 906       break;
 907    case SYSTEM_VALUE_VERTICES_IN:
 908       *name = TGSI_SEMANTIC_VERTICESIN;
 909       break;
 910    case SYSTEM_VALUE_PRIMITIVE_ID:
 911       *name = TGSI_SEMANTIC_PRIMID;
 912       break;
 913    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
 914       *name = TGSI_SEMANTIC_TESSOUTER;
 915       break;
 916    case SYSTEM_VALUE_TESS_LEVEL_INNER:
 917       *name = TGSI_SEMANTIC_TESSINNER;
 918       break;
 919
 920    // Compute shader
 921    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
 922       *name = TGSI_SEMANTIC_THREAD_ID;
 923       break;
 924    case SYSTEM_VALUE_WORK_GROUP_ID:
 925       *name = TGSI_SEMANTIC_BLOCK_ID;
 926       break;
 927    case SYSTEM_VALUE_NUM_WORK_GROUPS:
 928       *name = TGSI_SEMANTIC_GRID_SIZE;
 929       break;
 930    case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
 931       *name = TGSI_SEMANTIC_BLOCK_SIZE;
 932       break;
 933
 934    // ARB_shader_ballot
 935    case SYSTEM_VALUE_SUBGROUP_SIZE:
 936       *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
 937       break;
 938    case SYSTEM_VALUE_SUBGROUP_INVOCATION:
 939       *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
 940       break;
 941    case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
 942       *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
 943       break;
 944    case SYSTEM_VALUE_SUBGROUP_GE_MASK:
 945       *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
 946       break;
 947    case SYSTEM_VALUE_SUBGROUP_GT_MASK:
 948       *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
 949       break;
 950    case SYSTEM_VALUE_SUBGROUP_LE_MASK:
 951       *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
 952       break;
 953    case SYSTEM_VALUE_SUBGROUP_LT_MASK:
 954       *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
 955       break;
 956
 957    default:
 958       ERROR("unknown system value %u\n", val);
 959       assert(false);
 960       break;
 961    }
 962 }
 963
 964 void
 965 Converter::setInterpolate(nv50_ir_varying *var,
 966                           uint8_t mode,
 967                           bool centroid,
 968                           unsigned semantic)
 969 {
 970    switch (mode) {
 971    case INTERP_MODE_FLAT:
 972       var->flat = 1;
 973       break;
 974    case INTERP_MODE_NONE:
 975       if (semantic == TGSI_SEMANTIC_COLOR)
 976          var->sc = 1;
 977       else if (semantic == TGSI_SEMANTIC_POSITION)
 978          var->linear = 1;
 979       break;
 980    case INTERP_MODE_NOPERSPECTIVE:
 981       var->linear = 1;
 982       break;
 983    case INTERP_MODE_SMOOTH:
 984       break;
 985    }
 986    var->centroid = centroid;
 987 }
 988
 989 static uint16_t
 990 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
 991           bool input, const nir_variable *var)
 992 {
 993    if (!type->is_array())
 994       return type->count_attribute_slots(false);
 995
 996    uint16_t slots;
 997    switch (stage) {
 998    case Program::TYPE_GEOMETRY:
 999       slots = type->uniform_locations();
1000       if (input)
1001          slots /= info.gs.vertices_in;
1002       break;
1003    case Program::TYPE_TESSELLATION_CONTROL:
1004    case Program::TYPE_TESSELLATION_EVAL:
1005       // remove first dimension
1006       if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
1007          slots = type->uniform_locations();
1008       else
1009          slots = type->fields.array->uniform_locations();
1010       break;
1011    default:
1012       slots = type->count_attribute_slots(false);
1013       break;
1014    }
1015
1016    return slots;
1017 }
1018
1019 bool Converter::assignSlots() {
1020    unsigned name;
1021    unsigned index;
1022
1023    info->io.viewportId = -1;
1024    info->numInputs = 0;
1025
1026    // we have to fixup the uniform locations for arrays
1027    unsigned numImages = 0;
1028    nir_foreach_variable(var, &nir->uniforms) {
1029       const glsl_type *type = var->type;
1030       if (!type->without_array()->is_image())
1031          continue;
1032       var->data.driver_location = numImages;
1033       numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
1034    }
1035
1036    nir_foreach_variable(var, &nir->inputs) {
1037       const glsl_type *type = var->type;
1038       int slot = var->data.location;
1039       uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
1040       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1041                                        : type->component_slots();
1042       uint32_t frac = var->data.location_frac;
1043       uint32_t vary = var->data.driver_location;
1044
1045       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1046          if (comp > 2)
1047             slots *= 2;
1048       }
1049
1050       assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
1051
1052       switch(prog->getType()) {
1053       case Program::TYPE_FRAGMENT:
1054          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1055          for (uint16_t i = 0; i < slots; ++i) {
1056             setInterpolate(&info->in[vary + i], var->data.interpolation,
1057                            var->data.centroid | var->data.sample, name);
1058          }
1059          break;
1060       case Program::TYPE_GEOMETRY:
1061          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1062          break;
1063       case Program::TYPE_TESSELLATION_CONTROL:
1064       case Program::TYPE_TESSELLATION_EVAL:
1065          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1066          if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1067             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1068          break;
1069       case Program::TYPE_VERTEX:
1070          vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1071          switch (name) {
1072          case TGSI_SEMANTIC_EDGEFLAG:
1073             info->io.edgeFlagIn = vary;
1074             break;
1075          default:
1076             break;
1077          }
1078          break;
1079       default:
1080          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1081          return false;
1082       }
1083
1084       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1085          info->in[vary].id = vary;
1086          info->in[vary].patch = var->data.patch;
1087          info->in[vary].sn = name;
1088          info->in[vary].si = index + i;
1089          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1090             if (i & 0x1)
1091                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1092             else
1093                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1094          else
1095             info->in[vary].mask |= ((1 << comp) - 1) << frac;
1096       }
1097       info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1098    }
1099
1100    info->numOutputs = 0;
1101    nir_foreach_variable(var, &nir->outputs) {
1102       const glsl_type *type = var->type;
1103       int slot = var->data.location;
1104       uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1105       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1106                                        : type->component_slots();
1107       uint32_t frac = var->data.location_frac;
1108       uint32_t vary = var->data.driver_location;
1109
1110       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1111          if (comp > 2)
1112             slots *= 2;
1113       }
1114
1115       assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1116
1117       switch(prog->getType()) {
1118       case Program::TYPE_FRAGMENT:
1119          frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1120          switch (name) {
1121          case TGSI_SEMANTIC_COLOR:
1122             if (!var->data.fb_fetch_output)
1123                info->prop.fp.numColourResults++;
1124             info->prop.fp.separateFragData = true;
1125             // sometimes we get FRAG_RESULT_DATAX with data.index 0
1126             // sometimes we get FRAG_RESULT_DATA0 with data.index X
1127             index = index == 0 ? var->data.index : index;
1128             break;
1129          case TGSI_SEMANTIC_POSITION:
1130             info->io.fragDepth = vary;
1131             info->prop.fp.writesDepth = true;
1132             break;
1133          case TGSI_SEMANTIC_SAMPLEMASK:
1134             info->io.sampleMask = vary;
1135             break;
1136          default:
1137             break;
1138          }
1139          break;
1140       case Program::TYPE_GEOMETRY:
1141       case Program::TYPE_TESSELLATION_CONTROL:
1142       case Program::TYPE_TESSELLATION_EVAL:
1143       case Program::TYPE_VERTEX:
1144          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1145
1146          if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1147              name != TGSI_SEMANTIC_TESSOUTER)
1148             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1149
1150          switch (name) {
1151          case TGSI_SEMANTIC_CLIPDIST:
1152             info->io.genUserClip = -1;
1153             break;
1154          case TGSI_SEMANTIC_CLIPVERTEX:
1155             clipVertexOutput = vary;
1156             break;
1157          case TGSI_SEMANTIC_EDGEFLAG:
1158             info->io.edgeFlagOut = vary;
1159             break;
1160          case TGSI_SEMANTIC_POSITION:
1161             if (clipVertexOutput < 0)
1162                clipVertexOutput = vary;
1163             break;
1164          default:
1165             break;
1166          }
1167          break;
1168       default:
1169          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1170          return false;
1171       }
1172
1173       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1174          info->out[vary].id = vary;
1175          info->out[vary].patch = var->data.patch;
1176          info->out[vary].sn = name;
1177          info->out[vary].si = index + i;
1178          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1179             if (i & 0x1)
1180                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1181             else
1182                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1183          else
1184             info->out[vary].mask |= ((1 << comp) - 1) << frac;
1185
1186          if (nir->info.outputs_read & 1ll << slot)
1187             info->out[vary].oread = 1;
1188       }
1189       info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1190    }
1191
1192    info->numSysVals = 0;
1193    for (uint8_t i = 0; i < 64; ++i) {
1194       if (!(nir->info.system_values_read & 1ll << i))
1195          continue;
1196
1197       system_val_to_tgsi_semantic(i, &name, &index);
1198       info->sv[info->numSysVals].sn = name;
1199       info->sv[info->numSysVals].si = index;
1200       info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1201
1202       switch (i) {
1203       case SYSTEM_VALUE_INSTANCE_ID:
1204          info->io.instanceId = info->numSysVals;
1205          break;
1206       case SYSTEM_VALUE_TESS_LEVEL_INNER:
1207       case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1208          info->sv[info->numSysVals].patch = 1;
1209          break;
1210       case SYSTEM_VALUE_VERTEX_ID:
1211          info->io.vertexId = info->numSysVals;
1212          break;
1213       default:
1214          break;
1215       }
1216
1217       info->numSysVals += 1;
1218    }
1219
1220    if (info->io.genUserClip > 0) {
1221       info->io.clipDistances = info->io.genUserClip;
1222
1223       const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1224
1225       for (unsigned int n = 0; n < nOut; ++n) {
1226          unsigned int i = info->numOutputs++;
1227          info->out[i].id = i;
1228          info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1229          info->out[i].si = n;
1230          info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1231       }
1232    }
1233
1234    return info->assignSlots(info) == 0;
1235 }
1236
1237 uint32_t
1238 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1239 {
1240    DataType ty;
1241    int offset = nir_intrinsic_component(insn);
1242    bool input;
1243
1244    if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1245       ty = getDType(insn);
1246    else
1247       ty = getSType(insn->src[0], false, false);
1248
1249    switch (insn->intrinsic) {
1250    case nir_intrinsic_load_input:
1251    case nir_intrinsic_load_interpolated_input:
1252    case nir_intrinsic_load_per_vertex_input:
1253       input = true;
1254       break;
1255    case nir_intrinsic_load_output:
1256    case nir_intrinsic_load_per_vertex_output:
1257    case nir_intrinsic_store_output:
1258    case nir_intrinsic_store_per_vertex_output:
1259       input = false;
1260       break;
1261    default:
1262       ERROR("unknown intrinsic in getSlotAddress %s",
1263             nir_intrinsic_infos[insn->intrinsic].name);
1264       input = false;
1265       assert(false);
1266       break;
1267    }
1268
1269    if (typeSizeof(ty) == 8) {
1270       slot *= 2;
1271       slot += offset;
1272       if (slot >= 4) {
1273          idx += 1;
1274          slot -= 4;
1275       }
1276    } else {
1277       slot += offset;
1278    }
1279
1280    assert(slot < 4);
1281    assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1282    assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1283
1284    const nv50_ir_varying *vary = input ? info->in : info->out;
1285    return vary[idx].slot[slot] * 4;
1286 }
1287
1288 Instruction *
1289 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1290                     uint32_t base, uint8_t c, Value *indirect0,
1291                     Value *indirect1, bool patch)
1292 {
1293    unsigned int tySize = typeSizeof(ty);
1294
1295    if (tySize == 8 &&
1296        (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1297       Value *lo = getSSA();
1298       Value *hi = getSSA();
1299
1300       Instruction *loi =
1301          mkLoad(TYPE_U32, lo,
1302                 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1303                 indirect0);
1304       loi->setIndirect(0, 1, indirect1);
1305       loi->perPatch = patch;
1306
1307       Instruction *hii =
1308          mkLoad(TYPE_U32, hi,
1309                 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1310                 indirect0);
1311       hii->setIndirect(0, 1, indirect1);
1312       hii->perPatch = patch;
1313
1314       return mkOp2(OP_MERGE, ty, def, lo, hi);
1315    } else {
1316       Instruction *ld =
1317          mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1318       ld->setIndirect(0, 1, indirect1);
1319       ld->perPatch = patch;
1320       return ld;
1321    }
1322 }
1323
1324 void
1325 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1326                    DataType ty, Value *src, uint8_t idx, uint8_t c,
1327                    Value *indirect0, Value *indirect1)
1328 {
1329    uint8_t size = typeSizeof(ty);
1330    uint32_t address = getSlotAddress(insn, idx, c);
1331
1332    if (size == 8 && indirect0) {
1333       Value *split[2];
1334       mkSplit(split, 4, src);
1335
1336       if (op == OP_EXPORT) {
1337          split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1338          split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1339       }
1340
1341       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1342               split[0])->perPatch = info->out[idx].patch;
1343       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1344               split[1])->perPatch = info->out[idx].patch;
1345    } else {
1346       if (op == OP_EXPORT)
1347          src = mkMov(getSSA(size), src, ty)->getDef(0);
1348       mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1349               src)->perPatch = info->out[idx].patch;
1350    }
1351 }
1352
1353 bool
1354 Converter::parseNIR()
1355 {
1356    info->io.clipDistances = nir->info.clip_distance_array_size;
1357    info->io.cullDistances = nir->info.cull_distance_array_size;
1358
1359    switch(prog->getType()) {
1360    case Program::TYPE_COMPUTE:
1361       info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1362       info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1363       info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1364       info->bin.smemSize = nir->info.cs.shared_size;
1365       break;
1366    case Program::TYPE_FRAGMENT:
1367       info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1368       info->prop.fp.persampleInvocation =
1369          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1370          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1371       info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1372       info->prop.fp.readsSampleLocations =
1373          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1374       info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1375       info->prop.fp.usesSampleMaskIn =
1376          !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1377       break;
1378    case Program::TYPE_GEOMETRY:
1379       info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1380       info->prop.gp.instanceCount = nir->info.gs.invocations;
1381       info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1382       info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1383       break;
1384    case Program::TYPE_TESSELLATION_CONTROL:
1385    case Program::TYPE_TESSELLATION_EVAL:
1386       if (nir->info.tess.primitive_mode == GL_ISOLINES)
1387          info->prop.tp.domain = GL_LINES;
1388       else
1389          info->prop.tp.domain = nir->info.tess.primitive_mode;
1390       info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1391       info->prop.tp.outputPrim =
1392          nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1393       info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1394       info->prop.tp.winding = !nir->info.tess.ccw;
1395       break;
1396    case Program::TYPE_VERTEX:
1397       info->prop.vp.usesDrawParameters =
1398          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1399          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1400          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1401       break;
1402    default:
1403       break;
1404    }
1405
1406    return true;
1407 }
1408
1409 bool
1410 Converter::visit(nir_function *function)
1411 {
1412    // we only support emiting the main function for now
1413    assert(!strcmp(function->name, "main"));
1414    assert(function->impl);
1415
1416    // usually the blocks will set everything up, but main is special
1417    BasicBlock *entry = new BasicBlock(prog->main);
1418    exit = new BasicBlock(prog->main);
1419    blocks[nir_start_block(function->impl)->index] = entry;
1420    prog->main->setEntry(entry);
1421    prog->main->setExit(exit);
1422
1423    setPosition(entry, true);
1424
1425    if (info->io.genUserClip > 0) {
1426       for (int c = 0; c < 4; ++c)
1427          clipVtx[c] = getScratch();
1428    }
1429
1430    switch (prog->getType()) {
1431    case Program::TYPE_TESSELLATION_CONTROL:
1432       outBase = mkOp2v(
1433          OP_SUB, TYPE_U32, getSSA(),
1434          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1435          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1436       break;
1437    case Program::TYPE_FRAGMENT: {
1438       Symbol *sv = mkSysVal(SV_POSITION, 3);
1439       fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1440       fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1441       break;
1442    }
1443    default:
1444       break;
1445    }
1446
1447    nir_index_ssa_defs(function->impl);
1448    foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1449       if (!visit(node))
1450          return false;
1451    }
1452
1453    bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1454    setPosition(exit, true);
1455
1456    if (info->io.genUserClip > 0)
1457       handleUserClipPlanes();
1458
1459    // TODO: for non main function this needs to be a OP_RETURN
1460    mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1461    return true;
1462 }
1463
1464 bool
1465 Converter::visit(nir_cf_node *node)
1466 {
1467    switch (node->type) {
1468    case nir_cf_node_block:
1469       return visit(nir_cf_node_as_block(node));
1470    case nir_cf_node_if:
1471       return visit(nir_cf_node_as_if(node));
1472    case nir_cf_node_loop:
1473       return visit(nir_cf_node_as_loop(node));
1474    default:
1475       ERROR("unknown nir_cf_node type %u\n", node->type);
1476       return false;
1477    }
1478 }
1479
1480 bool
1481 Converter::visit(nir_block *block)
1482 {
1483    if (!block->predecessors->entries && block->instr_list.is_empty())
1484       return true;
1485
1486    BasicBlock *bb = convert(block);
1487
1488    setPosition(bb, true);
1489    nir_foreach_instr(insn, block) {
1490       if (!visit(insn))
1491          return false;
1492    }
1493    return true;
1494 }
1495
1496 bool
1497 Converter::visit(nir_if *nif)
1498 {
1499    DataType sType = getSType(nif->condition, false, false);
1500    Value *src = getSrc(&nif->condition, 0);
1501
1502    nir_block *lastThen = nir_if_last_then_block(nif);
1503    nir_block *lastElse = nir_if_last_else_block(nif);
1504
1505    assert(!lastThen->successors[1]);
1506    assert(!lastElse->successors[1]);
1507
1508    BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1509    BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1510
1511    bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1512    bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1513
1514    // we only insert joinats, if both nodes end up at the end of the if again.
1515    // the reason for this to not happens are breaks/continues/ret/... which
1516    // have their own handling
1517    if (lastThen->successors[0] == lastElse->successors[0])
1518       bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1519                           CC_ALWAYS, NULL);
1520
1521    mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1522
1523    foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1524       if (!visit(node))
1525          return false;
1526    }
1527    setPosition(convert(lastThen), true);
1528    if (!bb->getExit() ||
1529        !bb->getExit()->asFlow() ||
1530         bb->getExit()->asFlow()->op == OP_JOIN) {
1531       BasicBlock *tailBB = convert(lastThen->successors[0]);
1532       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1533       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1534    }
1535
1536    foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1537       if (!visit(node))
1538          return false;
1539    }
1540    setPosition(convert(lastElse), true);
1541    if (!bb->getExit() ||
1542        !bb->getExit()->asFlow() ||
1543         bb->getExit()->asFlow()->op == OP_JOIN) {
1544       BasicBlock *tailBB = convert(lastElse->successors[0]);
1545       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1546       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1547    }
1548
1549    if (lastThen->successors[0] == lastElse->successors[0]) {
1550       setPosition(convert(lastThen->successors[0]), true);
1551       mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1552    }
1553
1554    return true;
1555 }
1556
1557 bool
1558 Converter::visit(nir_loop *loop)
1559 {
1560    curLoopDepth += 1;
1561    func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1562
1563    BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1564    BasicBlock *tailBB =
1565       convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1566    bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1567
1568    mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1569    setPosition(loopBB, false);
1570    mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1571
1572    foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1573       if (!visit(node))
1574          return false;
1575    }
1576    Instruction *insn = bb->getExit();
1577    if (bb->cfg.incidentCount() != 0) {
1578       if (!insn || !insn->asFlow()) {
1579          mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1580          bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1581       } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1582                  tailBB->cfg.incidentCount() == 0) {
1583          // RA doesn't like having blocks around with no incident edge,
1584          // so we create a fake one to make it happy
1585          bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1586       }
1587    }
1588
1589    curLoopDepth -= 1;
1590
1591    return true;
1592 }
1593
1594 bool
1595 Converter::visit(nir_instr *insn)
1596 {
1597    switch (insn->type) {
1598    case nir_instr_type_alu:
1599       return visit(nir_instr_as_alu(insn));
1600    case nir_instr_type_intrinsic:
1601       return visit(nir_instr_as_intrinsic(insn));
1602    case nir_instr_type_jump:
1603       return visit(nir_instr_as_jump(insn));
1604    case nir_instr_type_load_const:
1605       return visit(nir_instr_as_load_const(insn));
1606    case nir_instr_type_ssa_undef:
1607       return visit(nir_instr_as_ssa_undef(insn));
1608    case nir_instr_type_tex:
1609       return visit(nir_instr_as_tex(insn));
1610    default:
1611       ERROR("unknown nir_instr type %u\n", insn->type);
1612       return false;
1613    }
1614    return true;
1615 }
1616
1617 SVSemantic
1618 Converter::convert(nir_intrinsic_op intr)
1619 {
1620    switch (intr) {
1621    case nir_intrinsic_load_base_vertex:
1622       return SV_BASEVERTEX;
1623    case nir_intrinsic_load_base_instance:
1624       return SV_BASEINSTANCE;
1625    case nir_intrinsic_load_draw_id:
1626       return SV_DRAWID;
1627    case nir_intrinsic_load_front_face:
1628       return SV_FACE;
1629    case nir_intrinsic_load_helper_invocation:
1630       return SV_THREAD_KILL;
1631    case nir_intrinsic_load_instance_id:
1632       return SV_INSTANCE_ID;
1633    case nir_intrinsic_load_invocation_id:
1634       return SV_INVOCATION_ID;
1635    case nir_intrinsic_load_local_group_size:
1636       return SV_NTID;
1637    case nir_intrinsic_load_local_invocation_id:
1638       return SV_TID;
1639    case nir_intrinsic_load_num_work_groups:
1640       return SV_NCTAID;
1641    case nir_intrinsic_load_patch_vertices_in:
1642       return SV_VERTEX_COUNT;
1643    case nir_intrinsic_load_primitive_id:
1644       return SV_PRIMITIVE_ID;
1645    case nir_intrinsic_load_sample_id:
1646       return SV_SAMPLE_INDEX;
1647    case nir_intrinsic_load_sample_mask_in:
1648       return SV_SAMPLE_MASK;
1649    case nir_intrinsic_load_sample_pos:
1650       return SV_SAMPLE_POS;
1651    case nir_intrinsic_load_subgroup_eq_mask:
1652       return SV_LANEMASK_EQ;
1653    case nir_intrinsic_load_subgroup_ge_mask:
1654       return SV_LANEMASK_GE;
1655    case nir_intrinsic_load_subgroup_gt_mask:
1656       return SV_LANEMASK_GT;
1657    case nir_intrinsic_load_subgroup_le_mask:
1658       return SV_LANEMASK_LE;
1659    case nir_intrinsic_load_subgroup_lt_mask:
1660       return SV_LANEMASK_LT;
1661    case nir_intrinsic_load_subgroup_invocation:
1662       return SV_LANEID;
1663    case nir_intrinsic_load_tess_coord:
1664       return SV_TESS_COORD;
1665    case nir_intrinsic_load_tess_level_inner:
1666       return SV_TESS_INNER;
1667    case nir_intrinsic_load_tess_level_outer:
1668       return SV_TESS_OUTER;
1669    case nir_intrinsic_load_vertex_id:
1670       return SV_VERTEX_ID;
1671    case nir_intrinsic_load_work_group_id:
1672       return SV_CTAID;
1673    default:
1674       ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1675             nir_intrinsic_infos[intr].name);
1676       assert(false);
1677       return SV_LAST;
1678    }
1679 }
1680
1681 bool
1682 Converter::visit(nir_intrinsic_instr *insn)
1683 {
1684    nir_intrinsic_op op = insn->intrinsic;
1685
1686    switch (op) {
1687    case nir_intrinsic_load_uniform: {
1688       LValues &newDefs = convert(&insn->dest);
1689       const DataType dType = getDType(insn);
1690       Value *indirect;
1691       uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1692       for (uint8_t i = 0; i < insn->num_components; ++i) {
1693          loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1694       }
1695       break;
1696    }
1697    case nir_intrinsic_store_output:
1698    case nir_intrinsic_store_per_vertex_output: {
1699       Value *indirect;
1700       DataType dType = getSType(insn->src[0], false, false);
1701       uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1702
1703       for (uint8_t i = 0u; i < insn->num_components; ++i) {
1704          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1705             continue;
1706
1707          uint8_t offset = 0;
1708          Value *src = getSrc(&insn->src[0], i);
1709          switch (prog->getType()) {
1710          case Program::TYPE_FRAGMENT: {
1711             if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1712                // TGSI uses a different interface than NIR, TGSI stores that
1713                // value in the z component, NIR in X
1714                offset += 2;
1715                src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1716             }
1717             break;
1718          }
1719          case Program::TYPE_VERTEX: {
1720             if (info->io.genUserClip > 0 && idx == clipVertexOutput) {
1721                mkMov(clipVtx[i], src);
1722                src = clipVtx[i];
1723             }
1724             break;
1725          }
1726          default:
1727             break;
1728          }
1729
1730          storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1731       }
1732       break;
1733    }
1734    case nir_intrinsic_load_input:
1735    case nir_intrinsic_load_interpolated_input:
1736    case nir_intrinsic_load_output: {
1737       LValues &newDefs = convert(&insn->dest);
1738
1739       // FBFetch
1740       if (prog->getType() == Program::TYPE_FRAGMENT &&
1741           op == nir_intrinsic_load_output) {
1742          std::vector<Value*> defs, srcs;
1743          uint8_t mask = 0;
1744
1745          srcs.push_back(getSSA());
1746          srcs.push_back(getSSA());
1747          Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1748          Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1749          mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1750          mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1751
1752          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1753          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1754
1755          for (uint8_t i = 0u; i < insn->num_components; ++i) {
1756             defs.push_back(newDefs[i]);
1757             mask |= 1 << i;
1758          }
1759
1760          TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1761          texi->tex.levelZero = 1;
1762          texi->tex.mask = mask;
1763          texi->tex.useOffsets = 0;
1764          texi->tex.r = 0xffff;
1765          texi->tex.s = 0xffff;
1766
1767          info->prop.fp.readsFramebuffer = true;
1768          break;
1769       }
1770
1771       const DataType dType = getDType(insn);
1772       Value *indirect;
1773       bool input = op != nir_intrinsic_load_output;
1774       operation nvirOp;
1775       uint32_t mode = 0;
1776
1777       uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1778       nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
1779
1780       // see load_barycentric_* handling
1781       if (prog->getType() == Program::TYPE_FRAGMENT) {
1782          mode = translateInterpMode(&vary, nvirOp);
1783          if (op == nir_intrinsic_load_interpolated_input) {
1784             ImmediateValue immMode;
1785             if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1786                mode |= immMode.reg.data.u32;
1787          }
1788       }
1789
1790       for (uint8_t i = 0u; i < insn->num_components; ++i) {
1791          uint32_t address = getSlotAddress(insn, idx, i);
1792          Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
1793          if (prog->getType() == Program::TYPE_FRAGMENT) {
1794             int s = 1;
1795             if (typeSizeof(dType) == 8) {
1796                Value *lo = getSSA();
1797                Value *hi = getSSA();
1798                Instruction *interp;
1799
1800                interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1801                if (nvirOp == OP_PINTERP)
1802                   interp->setSrc(s++, fp.position);
1803                if (mode & NV50_IR_INTERP_OFFSET)
1804                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1805                interp->setInterpolate(mode);
1806                interp->setIndirect(0, 0, indirect);
1807
1808                Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
1809                interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1810                if (nvirOp == OP_PINTERP)
1811                   interp->setSrc(s++, fp.position);
1812                if (mode & NV50_IR_INTERP_OFFSET)
1813                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1814                interp->setInterpolate(mode);
1815                interp->setIndirect(0, 0, indirect);
1816
1817                mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
1818             } else {
1819                Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
1820                if (nvirOp == OP_PINTERP)
1821                   interp->setSrc(s++, fp.position);
1822                if (mode & NV50_IR_INTERP_OFFSET)
1823                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1824                interp->setInterpolate(mode);
1825                interp->setIndirect(0, 0, indirect);
1826             }
1827          } else {
1828             mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
1829          }
1830       }
1831       break;
1832    }
1833    case nir_intrinsic_load_barycentric_at_offset:
1834    case nir_intrinsic_load_barycentric_at_sample:
1835    case nir_intrinsic_load_barycentric_centroid:
1836    case nir_intrinsic_load_barycentric_pixel:
1837    case nir_intrinsic_load_barycentric_sample: {
1838       LValues &newDefs = convert(&insn->dest);
1839       uint32_t mode;
1840
1841       if (op == nir_intrinsic_load_barycentric_centroid ||
1842           op == nir_intrinsic_load_barycentric_sample) {
1843          mode = NV50_IR_INTERP_CENTROID;
1844       } else if (op == nir_intrinsic_load_barycentric_at_offset) {
1845          Value *offs[2];
1846          for (uint8_t c = 0; c < 2; c++) {
1847             offs[c] = getScratch();
1848             mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
1849             mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
1850             mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
1851             mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
1852          }
1853          mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
1854
1855          mode = NV50_IR_INTERP_OFFSET;
1856       } else if (op == nir_intrinsic_load_barycentric_pixel) {
1857          mode = NV50_IR_INTERP_DEFAULT;
1858       } else if (op == nir_intrinsic_load_barycentric_at_sample) {
1859          info->prop.fp.readsSampleLocations = true;
1860          mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
1861          mode = NV50_IR_INTERP_OFFSET;
1862       } else {
1863          unreachable("all intrinsics already handled above");
1864       }
1865
1866       loadImm(newDefs[1], mode);
1867       break;
1868    }
1869    case nir_intrinsic_discard:
1870       mkOp(OP_DISCARD, TYPE_NONE, NULL);
1871       break;
1872    case nir_intrinsic_discard_if: {
1873       Value *pred = getSSA(1, FILE_PREDICATE);
1874       if (insn->num_components > 1) {
1875          ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
1876          assert(false);
1877          return false;
1878       }
1879       mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1880       mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
1881       break;
1882    }
1883    case nir_intrinsic_load_base_vertex:
1884    case nir_intrinsic_load_base_instance:
1885    case nir_intrinsic_load_draw_id:
1886    case nir_intrinsic_load_front_face:
1887    case nir_intrinsic_load_helper_invocation:
1888    case nir_intrinsic_load_instance_id:
1889    case nir_intrinsic_load_invocation_id:
1890    case nir_intrinsic_load_local_group_size:
1891    case nir_intrinsic_load_local_invocation_id:
1892    case nir_intrinsic_load_num_work_groups:
1893    case nir_intrinsic_load_patch_vertices_in:
1894    case nir_intrinsic_load_primitive_id:
1895    case nir_intrinsic_load_sample_id:
1896    case nir_intrinsic_load_sample_mask_in:
1897    case nir_intrinsic_load_sample_pos:
1898    case nir_intrinsic_load_subgroup_eq_mask:
1899    case nir_intrinsic_load_subgroup_ge_mask:
1900    case nir_intrinsic_load_subgroup_gt_mask:
1901    case nir_intrinsic_load_subgroup_le_mask:
1902    case nir_intrinsic_load_subgroup_lt_mask:
1903    case nir_intrinsic_load_subgroup_invocation:
1904    case nir_intrinsic_load_tess_coord:
1905    case nir_intrinsic_load_tess_level_inner:
1906    case nir_intrinsic_load_tess_level_outer:
1907    case nir_intrinsic_load_vertex_id:
1908    case nir_intrinsic_load_work_group_id: {
1909       const DataType dType = getDType(insn);
1910       SVSemantic sv = convert(op);
1911       LValues &newDefs = convert(&insn->dest);
1912
1913       for (uint8_t i = 0u; i < insn->num_components; ++i) {
1914          Value *def;
1915          if (typeSizeof(dType) == 8)
1916             def = getSSA();
1917          else
1918             def = newDefs[i];
1919
1920          if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
1921             loadImm(def, 0u);
1922          } else {
1923             Symbol *sym = mkSysVal(sv, i);
1924             Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
1925             if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
1926                rdsv->perPatch = 1;
1927          }
1928
1929          if (typeSizeof(dType) == 8)
1930             mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
1931       }
1932       break;
1933    }
1934    // constants
1935    case nir_intrinsic_load_subgroup_size: {
1936       LValues &newDefs = convert(&insn->dest);
1937       loadImm(newDefs[0], 32u);
1938       break;
1939    }
1940    case nir_intrinsic_vote_all:
1941    case nir_intrinsic_vote_any:
1942    case nir_intrinsic_vote_ieq: {
1943       LValues &newDefs = convert(&insn->dest);
1944       Value *pred = getScratch(1, FILE_PREDICATE);
1945       mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1946       mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
1947       mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
1948       break;
1949    }
1950    case nir_intrinsic_ballot: {
1951       LValues &newDefs = convert(&insn->dest);
1952       Value *pred = getSSA(1, FILE_PREDICATE);
1953       mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1954       mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;
1955       break;
1956    }
1957    case nir_intrinsic_read_first_invocation:
1958    case nir_intrinsic_read_invocation: {
1959       LValues &newDefs = convert(&insn->dest);
1960       const DataType dType = getDType(insn);
1961       Value *tmp = getScratch();
1962
1963       if (op == nir_intrinsic_read_first_invocation) {
1964          mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
1965          mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
1966          mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
1967       } else
1968          tmp = getSrc(&insn->src[1], 0);
1969
1970       for (uint8_t i = 0; i < insn->num_components; ++i) {
1971          mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
1972             ->subOp = NV50_IR_SUBOP_SHFL_IDX;
1973       }
1974       break;
1975    }
1976    default:
1977       ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
1978       return false;
1979    }
1980
1981    return true;
1982 }
1983
1984 bool
1985 Converter::visit(nir_jump_instr *insn)
1986 {
1987    switch (insn->type) {
1988    case nir_jump_return:
1989       // TODO: this only works in the main function
1990       mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
1991       bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
1992       break;
1993    case nir_jump_break:
1994    case nir_jump_continue: {
1995       bool isBreak = insn->type == nir_jump_break;
1996       nir_block *block = insn->instr.block;
1997       assert(!block->successors[1]);
1998       BasicBlock *target = convert(block->successors[0]);
1999       mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
2000       bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
2001       break;
2002    }
2003    default:
2004       ERROR("unknown nir_jump_type %u\n", insn->type);
2005       return false;
2006    }
2007
2008    return true;
2009 }
2010
2011 bool
2012 Converter::visit(nir_load_const_instr *insn)
2013 {
2014    assert(insn->def.bit_size <= 64);
2015
2016    LValues &newDefs = convert(&insn->def);
2017    for (int i = 0; i < insn->def.num_components; i++) {
2018       switch (insn->def.bit_size) {
2019       case 64:
2020          loadImm(newDefs[i], insn->value.u64[i]);
2021          break;
2022       case 32:
2023          loadImm(newDefs[i], insn->value.u32[i]);
2024          break;
2025       case 16:
2026          loadImm(newDefs[i], insn->value.u16[i]);
2027          break;
2028       case 8:
2029          loadImm(newDefs[i], insn->value.u8[i]);
2030          break;
2031       }
2032    }
2033    return true;
2034 }
2035
2036 #define DEFAULT_CHECKS \
2037       if (insn->dest.dest.ssa.num_components > 1) { \
2038          ERROR("nir_alu_instr only supported with 1 component!\n"); \
2039          return false; \
2040       } \
2041       if (insn->dest.write_mask != 1) { \
2042          ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
2043          return false; \
2044       }
2045 bool
2046 Converter::visit(nir_alu_instr *insn)
2047 {
2048    const nir_op op = insn->op;
2049    const nir_op_info &info = nir_op_infos[op];
2050    DataType dType = getDType(insn);
2051    const std::vector<DataType> sTypes = getSTypes(insn);
2052
2053    Instruction *oldPos = this->bb->getExit();
2054
2055    switch (op) {
2056    case nir_op_fabs:
2057    case nir_op_iabs:
2058    case nir_op_fadd:
2059    case nir_op_iadd:
2060    case nir_op_fand:
2061    case nir_op_iand:
2062    case nir_op_fceil:
2063    case nir_op_fcos:
2064    case nir_op_fddx:
2065    case nir_op_fddx_coarse:
2066    case nir_op_fddx_fine:
2067    case nir_op_fddy:
2068    case nir_op_fddy_coarse:
2069    case nir_op_fddy_fine:
2070    case nir_op_fdiv:
2071    case nir_op_idiv:
2072    case nir_op_udiv:
2073    case nir_op_fexp2:
2074    case nir_op_ffloor:
2075    case nir_op_ffma:
2076    case nir_op_flog2:
2077    case nir_op_fmax:
2078    case nir_op_imax:
2079    case nir_op_umax:
2080    case nir_op_fmin:
2081    case nir_op_imin:
2082    case nir_op_umin:
2083    case nir_op_fmod:
2084    case nir_op_imod:
2085    case nir_op_umod:
2086    case nir_op_fmul:
2087    case nir_op_imul:
2088    case nir_op_imul_high:
2089    case nir_op_umul_high:
2090    case nir_op_fneg:
2091    case nir_op_ineg:
2092    case nir_op_fnot:
2093    case nir_op_inot:
2094    case nir_op_for:
2095    case nir_op_ior:
2096    case nir_op_pack_64_2x32_split:
2097    case nir_op_fpow:
2098    case nir_op_frcp:
2099    case nir_op_frem:
2100    case nir_op_irem:
2101    case nir_op_frsq:
2102    case nir_op_fsat:
2103    case nir_op_ishr:
2104    case nir_op_ushr:
2105    case nir_op_fsin:
2106    case nir_op_fsqrt:
2107    case nir_op_fsub:
2108    case nir_op_isub:
2109    case nir_op_ftrunc:
2110    case nir_op_ishl:
2111    case nir_op_fxor:
2112    case nir_op_ixor: {
2113       DEFAULT_CHECKS;
2114       LValues &newDefs = convert(&insn->dest);
2115       operation preOp = preOperationNeeded(op);
2116       if (preOp != OP_NOP) {
2117          assert(info.num_inputs < 2);
2118          Value *tmp = getSSA(typeSizeof(dType));
2119          Instruction *i0 = mkOp(preOp, dType, tmp);
2120          Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2121          if (info.num_inputs) {
2122             i0->setSrc(0, getSrc(&insn->src[0]));
2123             i1->setSrc(0, tmp);
2124          }
2125          i1->subOp = getSubOp(op);
2126       } else {
2127          Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2128          for (unsigned s = 0u; s < info.num_inputs; ++s) {
2129             i->setSrc(s, getSrc(&insn->src[s]));
2130          }
2131          i->subOp = getSubOp(op);
2132       }
2133       break;
2134    }
2135    case nir_op_ifind_msb:
2136    case nir_op_ufind_msb: {
2137       DEFAULT_CHECKS;
2138       LValues &newDefs = convert(&insn->dest);
2139       dType = sTypes[0];
2140       mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2141       break;
2142    }
2143    case nir_op_fround_even: {
2144       DEFAULT_CHECKS;
2145       LValues &newDefs = convert(&insn->dest);
2146       mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2147       break;
2148    }
2149    // convert instructions
2150    case nir_op_f2f32:
2151    case nir_op_f2i32:
2152    case nir_op_f2u32:
2153    case nir_op_i2f32:
2154    case nir_op_i2i32:
2155    case nir_op_u2f32:
2156    case nir_op_u2u32:
2157    case nir_op_f2f64:
2158    case nir_op_f2i64:
2159    case nir_op_f2u64:
2160    case nir_op_i2f64:
2161    case nir_op_i2i64:
2162    case nir_op_u2f64:
2163    case nir_op_u2u64: {
2164       DEFAULT_CHECKS;
2165       LValues &newDefs = convert(&insn->dest);
2166       Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2167       if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2168          i->rnd = ROUND_Z;
2169       i->sType = sTypes[0];
2170       break;
2171    }
2172    // compare instructions
2173    case nir_op_feq32:
2174    case nir_op_ieq32:
2175    case nir_op_fge32:
2176    case nir_op_ige32:
2177    case nir_op_uge32:
2178    case nir_op_flt32:
2179    case nir_op_ilt32:
2180    case nir_op_ult32:
2181    case nir_op_fne32:
2182    case nir_op_ine32: {
2183       DEFAULT_CHECKS;
2184       LValues &newDefs = convert(&insn->dest);
2185       Instruction *i = mkCmp(getOperation(op),
2186                              getCondCode(op),
2187                              dType,
2188                              newDefs[0],
2189                              dType,
2190                              getSrc(&insn->src[0]),
2191                              getSrc(&insn->src[1]));
2192       if (info.num_inputs == 3)
2193          i->setSrc(2, getSrc(&insn->src[2]));
2194       i->sType = sTypes[0];
2195       break;
2196    }
2197    // those are weird ALU ops and need special handling, because
2198    //   1. they are always componend based
2199    //   2. they basically just merge multiple values into one data type
2200    case nir_op_imov:
2201    case nir_op_fmov:
2202    case nir_op_vec2:
2203    case nir_op_vec3:
2204    case nir_op_vec4: {
2205       LValues &newDefs = convert(&insn->dest);
2206       for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2207          mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2208       }
2209       break;
2210    }
2211    // (un)pack
2212    case nir_op_pack_64_2x32: {
2213       LValues &newDefs = convert(&insn->dest);
2214       Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2215       merge->setSrc(0, getSrc(&insn->src[0], 0));
2216       merge->setSrc(1, getSrc(&insn->src[0], 1));
2217       break;
2218    }
2219    case nir_op_pack_half_2x16_split: {
2220       LValues &newDefs = convert(&insn->dest);
2221       Value *tmpH = getSSA();
2222       Value *tmpL = getSSA();
2223
2224       mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2225       mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2226       mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2227       break;
2228    }
2229    case nir_op_unpack_half_2x16_split_x:
2230    case nir_op_unpack_half_2x16_split_y: {
2231       LValues &newDefs = convert(&insn->dest);
2232       Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2233       if (op == nir_op_unpack_half_2x16_split_y)
2234          cvt->subOp = 1;
2235       break;
2236    }
2237    case nir_op_unpack_64_2x32: {
2238       LValues &newDefs = convert(&insn->dest);
2239       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2240       break;
2241    }
2242    case nir_op_unpack_64_2x32_split_x: {
2243       LValues &newDefs = convert(&insn->dest);
2244       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2245       break;
2246    }
2247    case nir_op_unpack_64_2x32_split_y: {
2248       LValues &newDefs = convert(&insn->dest);
2249       mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2250       break;
2251    }
2252    // special instructions
2253    case nir_op_fsign:
2254    case nir_op_isign: {
2255       DEFAULT_CHECKS;
2256       DataType iType;
2257       if (::isFloatType(dType))
2258          iType = TYPE_F32;
2259       else
2260          iType = TYPE_S32;
2261
2262       LValues &newDefs = convert(&insn->dest);
2263       LValue *val0 = getScratch();
2264       LValue *val1 = getScratch();
2265       mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2266       mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2267
2268       if (dType == TYPE_F64) {
2269          mkOp2(OP_SUB, iType, val0, val0, val1);
2270          mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2271       } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2272          mkOp2(OP_SUB, iType, val0, val1, val0);
2273          mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2274          mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2275       } else if (::isFloatType(dType))
2276          mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2277       else
2278          mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2279       break;
2280    }
2281    case nir_op_fcsel:
2282    case nir_op_b32csel: {
2283       DEFAULT_CHECKS;
2284       LValues &newDefs = convert(&insn->dest);
2285       mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2286       break;
2287    }
2288    case nir_op_ibitfield_extract:
2289    case nir_op_ubitfield_extract: {
2290       DEFAULT_CHECKS;
2291       Value *tmp = getSSA();
2292       LValues &newDefs = convert(&insn->dest);
2293       mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2294       mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2295       break;
2296    }
2297    case nir_op_bfm: {
2298       DEFAULT_CHECKS;
2299       LValues &newDefs = convert(&insn->dest);
2300       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2301       break;
2302    }
2303    case nir_op_bitfield_insert: {
2304       DEFAULT_CHECKS;
2305       LValues &newDefs = convert(&insn->dest);
2306       LValue *temp = getSSA();
2307       mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2308       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2309       break;
2310    }
2311    case nir_op_bit_count: {
2312       DEFAULT_CHECKS;
2313       LValues &newDefs = convert(&insn->dest);
2314       mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
2315       break;
2316    }
2317    case nir_op_bitfield_reverse: {
2318       DEFAULT_CHECKS;
2319       LValues &newDefs = convert(&insn->dest);
2320       mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2321       break;
2322    }
2323    case nir_op_find_lsb: {
2324       DEFAULT_CHECKS;
2325       LValues &newDefs = convert(&insn->dest);
2326       Value *tmp = getSSA();
2327       mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2328       mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2329       break;
2330    }
2331    // boolean conversions
2332    case nir_op_b2f32: {
2333       DEFAULT_CHECKS;
2334       LValues &newDefs = convert(&insn->dest);
2335       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
2336       break;
2337    }
2338    case nir_op_b2f64: {
2339       DEFAULT_CHECKS;
2340       LValues &newDefs = convert(&insn->dest);
2341       Value *tmp = getSSA(4);
2342       mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
2343       mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
2344       break;
2345    }
2346    case nir_op_f2b32:
2347    case nir_op_i2b32: {
2348       DEFAULT_CHECKS;
2349       LValues &newDefs = convert(&insn->dest);
2350       Value *src1;
2351       if (typeSizeof(sTypes[0]) == 8) {
2352          src1 = loadImm(getSSA(8), 0.0);
2353       } else {
2354          src1 = zero;
2355       }
2356       CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
2357       mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
2358       break;
2359    }
2360    case nir_op_b2i32: {
2361       DEFAULT_CHECKS;
2362       LValues &newDefs = convert(&insn->dest);
2363       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
2364       break;
2365    }
2366    case nir_op_b2i64: {
2367       DEFAULT_CHECKS;
2368       LValues &newDefs = convert(&insn->dest);
2369       LValue *def = getScratch();
2370       mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
2371       mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
2372       break;
2373    }
2374    default:
2375       ERROR("unknown nir_op %s\n", info.name);
2376       return false;
2377    }
2378
2379    if (!oldPos) {
2380       oldPos = this->bb->getEntry();
2381       oldPos->precise = insn->exact;
2382    }
2383
2384    if (unlikely(!oldPos))
2385       return true;
2386
2387    while (oldPos->next) {
2388       oldPos = oldPos->next;
2389       oldPos->precise = insn->exact;
2390    }
2391    oldPos->saturate = insn->dest.saturate;
2392
2393    return true;
2394 }
2395 #undef DEFAULT_CHECKS
2396
2397 bool
2398 Converter::visit(nir_ssa_undef_instr *insn)
2399 {
2400    LValues &newDefs = convert(&insn->def);
2401    for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
2402       mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
2403    }
2404    return true;
2405 }
2406
2407 #define CASE_SAMPLER(ty) \
2408    case GLSL_SAMPLER_DIM_ ## ty : \
2409       if (isArray && !isShadow) \
2410          return TEX_TARGET_ ## ty ## _ARRAY; \
2411       else if (!isArray && isShadow) \
2412          return TEX_TARGET_## ty ## _SHADOW; \
2413       else if (isArray && isShadow) \
2414          return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
2415       else \
2416          return TEX_TARGET_ ## ty
2417
2418 TexTarget
2419 Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
2420 {
2421    switch (dim) {
2422    CASE_SAMPLER(1D);
2423    CASE_SAMPLER(2D);
2424    CASE_SAMPLER(CUBE);
2425    case GLSL_SAMPLER_DIM_3D:
2426       return TEX_TARGET_3D;
2427    case GLSL_SAMPLER_DIM_MS:
2428       if (isArray)
2429          return TEX_TARGET_2D_MS_ARRAY;
2430       return TEX_TARGET_2D_MS;
2431    case GLSL_SAMPLER_DIM_RECT:
2432       if (isShadow)
2433          return TEX_TARGET_RECT_SHADOW;
2434       return TEX_TARGET_RECT;
2435    case GLSL_SAMPLER_DIM_BUF:
2436       return TEX_TARGET_BUFFER;
2437    case GLSL_SAMPLER_DIM_EXTERNAL:
2438       return TEX_TARGET_2D;
2439    default:
2440       ERROR("unknown glsl_sampler_dim %u\n", dim);
2441       assert(false);
2442       return TEX_TARGET_COUNT;
2443    }
2444 }
2445 #undef CASE_SAMPLER
2446
2447 Value*
2448 Converter::applyProjection(Value *src, Value *proj)
2449 {
2450    if (!proj)
2451       return src;
2452    return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
2453 }
2454
2455 bool
2456 Converter::visit(nir_tex_instr *insn)
2457 {
2458    switch (insn->op) {
2459    case nir_texop_lod:
2460    case nir_texop_query_levels:
2461    case nir_texop_tex:
2462    case nir_texop_texture_samples:
2463    case nir_texop_tg4:
2464    case nir_texop_txb:
2465    case nir_texop_txd:
2466    case nir_texop_txf:
2467    case nir_texop_txf_ms:
2468    case nir_texop_txl:
2469    case nir_texop_txs: {
2470       LValues &newDefs = convert(&insn->dest);
2471       std::vector<Value*> srcs;
2472       std::vector<Value*> defs;
2473       std::vector<nir_src*> offsets;
2474       uint8_t mask = 0;
2475       bool lz = false;
2476       Value *proj = NULL;
2477       TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);
2478       operation op = getOperation(insn->op);
2479
2480       int r, s;
2481       int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
2482       int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
2483       int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
2484       int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
2485       int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
2486       int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
2487       int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);
2488       int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);
2489       int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);
2490       int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);
2491       int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);
2492
2493       if (projIdx != -1)
2494          proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));
2495
2496       srcs.resize(insn->coord_components);
2497       for (uint8_t i = 0u; i < insn->coord_components; ++i)
2498          srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);
2499
2500       // sometimes we get less args than target.getArgCount, but codegen expects the latter
2501       if (insn->coord_components) {
2502          uint32_t argCount = target.getArgCount();
2503
2504          if (target.isMS())
2505             argCount -= 1;
2506
2507          for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)
2508             srcs.push_back(getSSA());
2509       }
2510
2511       if (insn->op == nir_texop_texture_samples)
2512          srcs.push_back(zero);
2513       else if (!insn->num_srcs)
2514          srcs.push_back(loadImm(NULL, 0));
2515       if (biasIdx != -1)
2516          srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));
2517       if (lodIdx != -1)
2518          srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));
2519       else if (op == OP_TXF)
2520          lz = true;
2521       if (msIdx != -1)
2522          srcs.push_back(getSrc(&insn->src[msIdx].src, 0));
2523       if (offsetIdx != -1)
2524          offsets.push_back(&insn->src[offsetIdx].src);
2525       if (compIdx != -1)
2526          srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));
2527       if (texOffIdx != -1) {
2528          srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));
2529          texOffIdx = srcs.size() - 1;
2530       }
2531       if (sampOffIdx != -1) {
2532          srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));
2533          sampOffIdx = srcs.size() - 1;
2534       }
2535
2536       r = insn->texture_index;
2537       s = insn->sampler_index;
2538
2539       defs.resize(newDefs.size());
2540       for (uint8_t d = 0u; d < newDefs.size(); ++d) {
2541          defs[d] = newDefs[d];
2542          mask |= 1 << d;
2543       }
2544       if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))
2545          lz = true;
2546
2547       TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);
2548       texi->tex.levelZero = lz;
2549       texi->tex.mask = mask;
2550
2551       if (texOffIdx != -1)
2552          texi->tex.rIndirectSrc = texOffIdx;
2553       if (sampOffIdx != -1)
2554          texi->tex.sIndirectSrc = sampOffIdx;
2555
2556       switch (insn->op) {
2557       case nir_texop_tg4:
2558          if (!target.isShadow())
2559             texi->tex.gatherComp = insn->component;
2560          break;
2561       case nir_texop_txs:
2562          texi->tex.query = TXQ_DIMS;
2563          break;
2564       case nir_texop_texture_samples:
2565          texi->tex.mask = 0x4;
2566          texi->tex.query = TXQ_TYPE;
2567          break;
2568       case nir_texop_query_levels:
2569          texi->tex.mask = 0x8;
2570          texi->tex.query = TXQ_DIMS;
2571          break;
2572       default:
2573          break;
2574       }
2575
2576       texi->tex.useOffsets = offsets.size();
2577       if (texi->tex.useOffsets) {
2578          for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {
2579             for (uint32_t c = 0u; c < 3; ++c) {
2580                uint8_t s2 = std::min(c, target.getDim() - 1);
2581                texi->offset[s][c].set(getSrc(offsets[s], s2));
2582                texi->offset[s][c].setInsn(texi);
2583             }
2584          }
2585       }
2586
2587       if (ddxIdx != -1 && ddyIdx != -1) {
2588          for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {
2589             texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));
2590             texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));
2591          }
2592       }
2593
2594       break;
2595    }
2596    default:
2597       ERROR("unknown nir_texop %u\n", insn->op);
2598       return false;
2599    }
2600    return true;
2601 }
2602
2603 bool
2604 Converter::run()
2605 {
2606    bool progress;
2607
2608    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
2609       nir_print_shader(nir, stderr);
2610
2611    struct nir_lower_subgroups_options subgroup_options = {
2612       .subgroup_size = 32,
2613       .ballot_bit_size = 32,
2614    };
2615
2616    NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
2617    NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
2618    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
2619    NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
2620    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2621    NIR_PASS_V(nir, nir_lower_alu_to_scalar);
2622    NIR_PASS_V(nir, nir_lower_phis_to_scalar);
2623
2624    do {
2625       progress = false;
2626       NIR_PASS(progress, nir, nir_copy_prop);
2627       NIR_PASS(progress, nir, nir_opt_remove_phis);
2628       NIR_PASS(progress, nir, nir_opt_trivial_continues);
2629       NIR_PASS(progress, nir, nir_opt_cse);
2630       NIR_PASS(progress, nir, nir_opt_algebraic);
2631       NIR_PASS(progress, nir, nir_opt_constant_folding);
2632       NIR_PASS(progress, nir, nir_copy_prop);
2633       NIR_PASS(progress, nir, nir_opt_dce);
2634       NIR_PASS(progress, nir, nir_opt_dead_cf);
2635    } while (progress);
2636
2637    NIR_PASS_V(nir, nir_lower_bool_to_int32);
2638    NIR_PASS_V(nir, nir_lower_locals_to_regs);
2639    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
2640    NIR_PASS_V(nir, nir_convert_from_ssa, true);
2641
2642    // Garbage collect dead instructions
2643    nir_sweep(nir);
2644
2645    if (!parseNIR()) {
2646       ERROR("Couldn't prase NIR!\n");
2647       return false;
2648    }
2649
2650    if (!assignSlots()) {
2651       ERROR("Couldn't assign slots!\n");
2652       return false;
2653    }
2654
2655    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
2656       nir_print_shader(nir, stderr);
2657
2658    nir_foreach_function(function, nir) {
2659       if (!visit(function))
2660          return false;
2661    }
2662
2663    return true;
2664 }
2665
2666 } // unnamed namespace
2667
2668 namespace nv50_ir {
2669
2670 bool
2671 Program::makeFromNIR(struct nv50_ir_prog_info *info)
2672 {
2673    nir_shader *nir = (nir_shader*)info->bin.source;
2674    Converter converter(this, nir, info);
2675    bool result = converter.run();
2676    if (!result)
2677       return result;
2678    LoweringHelper lowering;
2679    lowering.run(this);
2680    tlsSize = info->bin.tlsSpace;
2681    return result;
2682 }
2683
2684 } // namespace nv50_ir