src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

   1 /*
   2  * Copyright 2017 Red Hat Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * Authors: Karol Herbst <kherbst@redhat.com>
  23  */
  24
  25 #include "compiler/nir/nir.h"
  26
  27 #include "util/u_debug.h"
  28
  29 #include "codegen/nv50_ir.h"
  30 #include "codegen/nv50_ir_from_common.h"
  31 #include "codegen/nv50_ir_lowering_helper.h"
  32 #include "codegen/nv50_ir_util.h"
  33
  34 #if __cplusplus >= 201103L
  35 #include <unordered_map>
  36 #else
  37 #include <tr1/unordered_map>
  38 #endif
  39 #include <vector>
  40
  41 namespace {
  42
  43 #if __cplusplus >= 201103L
  44 using std::hash;
  45 using std::unordered_map;
  46 #else
  47 using std::tr1::hash;
  48 using std::tr1::unordered_map;
  49 #endif
  50
  51 using namespace nv50_ir;
  52
  53 int
  54 type_size(const struct glsl_type *type)
  55 {
  56    return glsl_count_attribute_slots(type, false);
  57 }
  58
  59 class Converter : public ConverterCommon
  60 {
  61 public:
  62    Converter(Program *, nir_shader *, nv50_ir_prog_info *);
  63
  64    bool run();
  65 private:
  66    typedef std::vector<LValue*> LValues;
  67    typedef unordered_map<unsigned, LValues> NirDefMap;
  68    typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
  69
  70    LValues& convert(nir_alu_dest *);
  71    BasicBlock* convert(nir_block *);
  72    LValues& convert(nir_dest *);
  73    SVSemantic convert(nir_intrinsic_op);
  74    LValues& convert(nir_register *);
  75    LValues& convert(nir_ssa_def *);
  76
  77    Value* getSrc(nir_alu_src *, uint8_t component = 0);
  78    Value* getSrc(nir_register *, uint8_t);
  79    Value* getSrc(nir_src *, uint8_t, bool indirect = false);
  80    Value* getSrc(nir_ssa_def *, uint8_t);
  81
  82    // returned value is the constant part of the given source (either the
  83    // nir_src or the selected source component of an intrinsic). Even though
  84    // this is mostly an optimization to be able to skip indirects in a few
  85    // cases, sometimes we require immediate values or set some fileds on
  86    // instructions (e.g. tex) in order for codegen to consume those.
  87    // If the found value has not a constant part, the Value gets returned
  88    // through the Value parameter.
  89    uint32_t getIndirect(nir_src *, uint8_t, Value *&);
  90    uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&);
  91
  92    uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
  93
  94    void setInterpolate(nv50_ir_varying *,
  95                        uint8_t,
  96                        bool centroid,
  97                        unsigned semantics);
  98
  99    Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
 100                          uint8_t c, Value *indirect0 = NULL,
 101                          Value *indirect1 = NULL, bool patch = false);
 102    void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
 103                 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
 104                 Value *indirect1 = NULL);
 105
 106    bool isFloatType(nir_alu_type);
 107    bool isSignedType(nir_alu_type);
 108    bool isResultFloat(nir_op);
 109    bool isResultSigned(nir_op);
 110
 111    DataType getDType(nir_alu_instr *);
 112    DataType getDType(nir_intrinsic_instr *);
 113    DataType getDType(nir_op, uint8_t);
 114
 115    std::vector<DataType> getSTypes(nir_alu_instr *);
 116    DataType getSType(nir_src &, bool isFloat, bool isSigned);
 117
 118    operation getOperation(nir_op);
 119    operation preOperationNeeded(nir_op);
 120
 121    int getSubOp(nir_op);
 122
 123    CondCode getCondCode(nir_op);
 124
 125    bool assignSlots();
 126    bool parseNIR();
 127
 128    bool visit(nir_alu_instr *);
 129    bool visit(nir_block *);
 130    bool visit(nir_cf_node *);
 131    bool visit(nir_function *);
 132    bool visit(nir_if *);
 133    bool visit(nir_instr *);
 134    bool visit(nir_intrinsic_instr *);
 135    bool visit(nir_jump_instr *);
 136    bool visit(nir_load_const_instr*);
 137    bool visit(nir_loop *);
 138
 139    nir_shader *nir;
 140
 141    NirDefMap ssaDefs;
 142    NirDefMap regDefs;
 143    NirBlockMap blocks;
 144    unsigned int curLoopDepth;
 145
 146    BasicBlock *exit;
 147    Value *zero;
 148
 149    int clipVertexOutput;
 150
 151    union {
 152       struct {
 153          Value *position;
 154       } fp;
 155    };
 156 };
 157
 158 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
 159    : ConverterCommon(prog, info),
 160      nir(nir),
 161      curLoopDepth(0),
 162      clipVertexOutput(-1)
 163 {
 164    zero = mkImm((uint32_t)0);
 165 }
 166
 167 BasicBlock *
 168 Converter::convert(nir_block *block)
 169 {
 170    NirBlockMap::iterator it = blocks.find(block->index);
 171    if (it != blocks.end())
 172       return it->second;
 173
 174    BasicBlock *bb = new BasicBlock(func);
 175    blocks[block->index] = bb;
 176    return bb;
 177 }
 178
 179 bool
 180 Converter::isFloatType(nir_alu_type type)
 181 {
 182    return nir_alu_type_get_base_type(type) == nir_type_float;
 183 }
 184
 185 bool
 186 Converter::isSignedType(nir_alu_type type)
 187 {
 188    return nir_alu_type_get_base_type(type) == nir_type_int;
 189 }
 190
 191 bool
 192 Converter::isResultFloat(nir_op op)
 193 {
 194    const nir_op_info &info = nir_op_infos[op];
 195    if (info.output_type != nir_type_invalid)
 196       return isFloatType(info.output_type);
 197
 198    ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
 199    assert(false);
 200    return true;
 201 }
 202
 203 bool
 204 Converter::isResultSigned(nir_op op)
 205 {
 206    switch (op) {
 207    // there is no umul and we get wrong results if we treat all muls as signed
 208    case nir_op_imul:
 209    case nir_op_inot:
 210       return false;
 211    default:
 212       const nir_op_info &info = nir_op_infos[op];
 213       if (info.output_type != nir_type_invalid)
 214          return isSignedType(info.output_type);
 215       ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
 216       assert(false);
 217       return true;
 218    }
 219 }
 220
 221 DataType
 222 Converter::getDType(nir_alu_instr *insn)
 223 {
 224    if (insn->dest.dest.is_ssa)
 225       return getDType(insn->op, insn->dest.dest.ssa.bit_size);
 226    else
 227       return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
 228 }
 229
 230 DataType
 231 Converter::getDType(nir_intrinsic_instr *insn)
 232 {
 233    if (insn->dest.is_ssa)
 234       return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
 235    else
 236       return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
 237 }
 238
 239 DataType
 240 Converter::getDType(nir_op op, uint8_t bitSize)
 241 {
 242    DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
 243    if (ty == TYPE_NONE) {
 244       ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
 245       assert(false);
 246    }
 247    return ty;
 248 }
 249
 250 std::vector<DataType>
 251 Converter::getSTypes(nir_alu_instr *insn)
 252 {
 253    const nir_op_info &info = nir_op_infos[insn->op];
 254    std::vector<DataType> res(info.num_inputs);
 255
 256    for (uint8_t i = 0; i < info.num_inputs; ++i) {
 257       if (info.input_types[i] != nir_type_invalid) {
 258          res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
 259       } else {
 260          ERROR("getSType not implemented for %s idx %u\n", info.name, i);
 261          assert(false);
 262          res[i] = TYPE_NONE;
 263          break;
 264       }
 265    }
 266
 267    return res;
 268 }
 269
 270 DataType
 271 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
 272 {
 273    uint8_t bitSize;
 274    if (src.is_ssa)
 275       bitSize = src.ssa->bit_size;
 276    else
 277       bitSize = src.reg.reg->bit_size;
 278
 279    DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
 280    if (ty == TYPE_NONE) {
 281       const char *str;
 282       if (isFloat)
 283          str = "float";
 284       else if (isSigned)
 285          str = "int";
 286       else
 287          str = "uint";
 288       ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
 289       assert(false);
 290    }
 291    return ty;
 292 }
 293
 294 operation
 295 Converter::getOperation(nir_op op)
 296 {
 297    switch (op) {
 298    // basic ops with float and int variants
 299    case nir_op_fabs:
 300    case nir_op_iabs:
 301       return OP_ABS;
 302    case nir_op_fadd:
 303    case nir_op_iadd:
 304       return OP_ADD;
 305    case nir_op_fand:
 306    case nir_op_iand:
 307       return OP_AND;
 308    case nir_op_ifind_msb:
 309    case nir_op_ufind_msb:
 310       return OP_BFIND;
 311    case nir_op_fceil:
 312       return OP_CEIL;
 313    case nir_op_fcos:
 314       return OP_COS;
 315    case nir_op_f2f32:
 316    case nir_op_f2f64:
 317    case nir_op_f2i32:
 318    case nir_op_f2i64:
 319    case nir_op_f2u32:
 320    case nir_op_f2u64:
 321    case nir_op_i2f32:
 322    case nir_op_i2f64:
 323    case nir_op_i2i32:
 324    case nir_op_i2i64:
 325    case nir_op_u2f32:
 326    case nir_op_u2f64:
 327    case nir_op_u2u32:
 328    case nir_op_u2u64:
 329       return OP_CVT;
 330    case nir_op_fddx:
 331    case nir_op_fddx_coarse:
 332    case nir_op_fddx_fine:
 333       return OP_DFDX;
 334    case nir_op_fddy:
 335    case nir_op_fddy_coarse:
 336    case nir_op_fddy_fine:
 337       return OP_DFDY;
 338    case nir_op_fdiv:
 339    case nir_op_idiv:
 340    case nir_op_udiv:
 341       return OP_DIV;
 342    case nir_op_fexp2:
 343       return OP_EX2;
 344    case nir_op_ffloor:
 345       return OP_FLOOR;
 346    case nir_op_ffma:
 347       return OP_FMA;
 348    case nir_op_flog2:
 349       return OP_LG2;
 350    case nir_op_fmax:
 351    case nir_op_imax:
 352    case nir_op_umax:
 353       return OP_MAX;
 354    case nir_op_pack_64_2x32_split:
 355       return OP_MERGE;
 356    case nir_op_fmin:
 357    case nir_op_imin:
 358    case nir_op_umin:
 359       return OP_MIN;
 360    case nir_op_fmod:
 361    case nir_op_imod:
 362    case nir_op_umod:
 363    case nir_op_frem:
 364    case nir_op_irem:
 365       return OP_MOD;
 366    case nir_op_fmul:
 367    case nir_op_imul:
 368    case nir_op_imul_high:
 369    case nir_op_umul_high:
 370       return OP_MUL;
 371    case nir_op_fneg:
 372    case nir_op_ineg:
 373       return OP_NEG;
 374    case nir_op_fnot:
 375    case nir_op_inot:
 376       return OP_NOT;
 377    case nir_op_for:
 378    case nir_op_ior:
 379       return OP_OR;
 380    case nir_op_fpow:
 381       return OP_POW;
 382    case nir_op_frcp:
 383       return OP_RCP;
 384    case nir_op_frsq:
 385       return OP_RSQ;
 386    case nir_op_fsat:
 387       return OP_SAT;
 388    case nir_op_feq32:
 389    case nir_op_ieq32:
 390    case nir_op_fge32:
 391    case nir_op_ige32:
 392    case nir_op_uge32:
 393    case nir_op_flt32:
 394    case nir_op_ilt32:
 395    case nir_op_ult32:
 396    case nir_op_fne32:
 397    case nir_op_ine32:
 398       return OP_SET;
 399    case nir_op_ishl:
 400       return OP_SHL;
 401    case nir_op_ishr:
 402    case nir_op_ushr:
 403       return OP_SHR;
 404    case nir_op_fsin:
 405       return OP_SIN;
 406    case nir_op_fsqrt:
 407       return OP_SQRT;
 408    case nir_op_fsub:
 409    case nir_op_isub:
 410       return OP_SUB;
 411    case nir_op_ftrunc:
 412       return OP_TRUNC;
 413    case nir_op_fxor:
 414    case nir_op_ixor:
 415       return OP_XOR;
 416    default:
 417       ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
 418       assert(false);
 419       return OP_NOP;
 420    }
 421 }
 422
 423 operation
 424 Converter::preOperationNeeded(nir_op op)
 425 {
 426    switch (op) {
 427    case nir_op_fcos:
 428    case nir_op_fsin:
 429       return OP_PRESIN;
 430    default:
 431       return OP_NOP;
 432    }
 433 }
 434
 435 int
 436 Converter::getSubOp(nir_op op)
 437 {
 438    switch (op) {
 439    case nir_op_imul_high:
 440    case nir_op_umul_high:
 441       return NV50_IR_SUBOP_MUL_HIGH;
 442    default:
 443       return 0;
 444    }
 445 }
 446
 447 CondCode
 448 Converter::getCondCode(nir_op op)
 449 {
 450    switch (op) {
 451    case nir_op_feq32:
 452    case nir_op_ieq32:
 453       return CC_EQ;
 454    case nir_op_fge32:
 455    case nir_op_ige32:
 456    case nir_op_uge32:
 457       return CC_GE;
 458    case nir_op_flt32:
 459    case nir_op_ilt32:
 460    case nir_op_ult32:
 461       return CC_LT;
 462    case nir_op_fne32:
 463       return CC_NEU;
 464    case nir_op_ine32:
 465       return CC_NE;
 466    default:
 467       ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
 468       assert(false);
 469       return CC_FL;
 470    }
 471 }
 472
 473 Converter::LValues&
 474 Converter::convert(nir_alu_dest *dest)
 475 {
 476    return convert(&dest->dest);
 477 }
 478
 479 Converter::LValues&
 480 Converter::convert(nir_dest *dest)
 481 {
 482    if (dest->is_ssa)
 483       return convert(&dest->ssa);
 484    if (dest->reg.indirect) {
 485       ERROR("no support for indirects.");
 486       assert(false);
 487    }
 488    return convert(dest->reg.reg);
 489 }
 490
 491 Converter::LValues&
 492 Converter::convert(nir_register *reg)
 493 {
 494    NirDefMap::iterator it = regDefs.find(reg->index);
 495    if (it != regDefs.end())
 496       return it->second;
 497
 498    LValues newDef(reg->num_components);
 499    for (uint8_t i = 0; i < reg->num_components; i++)
 500       newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
 501    return regDefs[reg->index] = newDef;
 502 }
 503
 504 Converter::LValues&
 505 Converter::convert(nir_ssa_def *def)
 506 {
 507    NirDefMap::iterator it = ssaDefs.find(def->index);
 508    if (it != ssaDefs.end())
 509       return it->second;
 510
 511    LValues newDef(def->num_components);
 512    for (uint8_t i = 0; i < def->num_components; i++)
 513       newDef[i] = getSSA(std::max(4, def->bit_size / 8));
 514    return ssaDefs[def->index] = newDef;
 515 }
 516
 517 Value*
 518 Converter::getSrc(nir_alu_src *src, uint8_t component)
 519 {
 520    if (src->abs || src->negate) {
 521       ERROR("modifiers currently not supported on nir_alu_src\n");
 522       assert(false);
 523    }
 524    return getSrc(&src->src, src->swizzle[component]);
 525 }
 526
 527 Value*
 528 Converter::getSrc(nir_register *reg, uint8_t idx)
 529 {
 530    NirDefMap::iterator it = regDefs.find(reg->index);
 531    if (it == regDefs.end())
 532       return convert(reg)[idx];
 533    return it->second[idx];
 534 }
 535
 536 Value*
 537 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
 538 {
 539    if (src->is_ssa)
 540       return getSrc(src->ssa, idx);
 541
 542    if (src->reg.indirect) {
 543       if (indirect)
 544          return getSrc(src->reg.indirect, idx);
 545       ERROR("no support for indirects.");
 546       assert(false);
 547       return NULL;
 548    }
 549
 550    return getSrc(src->reg.reg, idx);
 551 }
 552
 553 Value*
 554 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
 555 {
 556    NirDefMap::iterator it = ssaDefs.find(src->index);
 557    if (it == ssaDefs.end()) {
 558       ERROR("SSA value %u not found\n", src->index);
 559       assert(false);
 560       return NULL;
 561    }
 562    return it->second[idx];
 563 }
 564
 565 uint32_t
 566 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
 567 {
 568    nir_const_value *offset = nir_src_as_const_value(*src);
 569
 570    if (offset) {
 571       indirect = NULL;
 572       return offset->u32[0];
 573    }
 574
 575    indirect = getSrc(src, idx, true);
 576    return 0;
 577 }
 578
 579 uint32_t
 580 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect)
 581 {
 582    int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
 583    if (indirect)
 584       indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
 585    return idx;
 586 }
 587
 588 static void
 589 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
 590 {
 591    assert(name && index);
 592
 593    if (slot >= VERT_ATTRIB_MAX) {
 594       ERROR("invalid varying slot %u\n", slot);
 595       assert(false);
 596       return;
 597    }
 598
 599    if (slot >= VERT_ATTRIB_GENERIC0 &&
 600        slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
 601       *name = TGSI_SEMANTIC_GENERIC;
 602       *index = slot - VERT_ATTRIB_GENERIC0;
 603       return;
 604    }
 605
 606    if (slot >= VERT_ATTRIB_TEX0 &&
 607        slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
 608       *name = TGSI_SEMANTIC_TEXCOORD;
 609       *index = slot - VERT_ATTRIB_TEX0;
 610       return;
 611    }
 612
 613    switch (slot) {
 614    case VERT_ATTRIB_COLOR0:
 615       *name = TGSI_SEMANTIC_COLOR;
 616       *index = 0;
 617       break;
 618    case VERT_ATTRIB_COLOR1:
 619       *name = TGSI_SEMANTIC_COLOR;
 620       *index = 1;
 621       break;
 622    case VERT_ATTRIB_EDGEFLAG:
 623       *name = TGSI_SEMANTIC_EDGEFLAG;
 624       *index = 0;
 625       break;
 626    case VERT_ATTRIB_FOG:
 627       *name = TGSI_SEMANTIC_FOG;
 628       *index = 0;
 629       break;
 630    case VERT_ATTRIB_NORMAL:
 631       *name = TGSI_SEMANTIC_NORMAL;
 632       *index = 0;
 633       break;
 634    case VERT_ATTRIB_POS:
 635       *name = TGSI_SEMANTIC_POSITION;
 636       *index = 0;
 637       break;
 638    case VERT_ATTRIB_POINT_SIZE:
 639       *name = TGSI_SEMANTIC_PSIZE;
 640       *index = 0;
 641       break;
 642    default:
 643       ERROR("unknown vert attrib slot %u\n", slot);
 644       assert(false);
 645       break;
 646    }
 647 }
 648
 649 static void
 650 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
 651 {
 652    assert(name && index);
 653
 654    if (slot >= VARYING_SLOT_TESS_MAX) {
 655       ERROR("invalid varying slot %u\n", slot);
 656       assert(false);
 657       return;
 658    }
 659
 660    if (slot >= VARYING_SLOT_PATCH0) {
 661       *name = TGSI_SEMANTIC_PATCH;
 662       *index = slot - VARYING_SLOT_PATCH0;
 663       return;
 664    }
 665
 666    if (slot >= VARYING_SLOT_VAR0) {
 667       *name = TGSI_SEMANTIC_GENERIC;
 668       *index = slot - VARYING_SLOT_VAR0;
 669       return;
 670    }
 671
 672    if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
 673       *name = TGSI_SEMANTIC_TEXCOORD;
 674       *index = slot - VARYING_SLOT_TEX0;
 675       return;
 676    }
 677
 678    switch (slot) {
 679    case VARYING_SLOT_BFC0:
 680       *name = TGSI_SEMANTIC_BCOLOR;
 681       *index = 0;
 682       break;
 683    case VARYING_SLOT_BFC1:
 684       *name = TGSI_SEMANTIC_BCOLOR;
 685       *index = 1;
 686       break;
 687    case VARYING_SLOT_CLIP_DIST0:
 688       *name = TGSI_SEMANTIC_CLIPDIST;
 689       *index = 0;
 690       break;
 691    case VARYING_SLOT_CLIP_DIST1:
 692       *name = TGSI_SEMANTIC_CLIPDIST;
 693       *index = 1;
 694       break;
 695    case VARYING_SLOT_CLIP_VERTEX:
 696       *name = TGSI_SEMANTIC_CLIPVERTEX;
 697       *index = 0;
 698       break;
 699    case VARYING_SLOT_COL0:
 700       *name = TGSI_SEMANTIC_COLOR;
 701       *index = 0;
 702       break;
 703    case VARYING_SLOT_COL1:
 704       *name = TGSI_SEMANTIC_COLOR;
 705       *index = 1;
 706       break;
 707    case VARYING_SLOT_EDGE:
 708       *name = TGSI_SEMANTIC_EDGEFLAG;
 709       *index = 0;
 710       break;
 711    case VARYING_SLOT_FACE:
 712       *name = TGSI_SEMANTIC_FACE;
 713       *index = 0;
 714       break;
 715    case VARYING_SLOT_FOGC:
 716       *name = TGSI_SEMANTIC_FOG;
 717       *index = 0;
 718       break;
 719    case VARYING_SLOT_LAYER:
 720       *name = TGSI_SEMANTIC_LAYER;
 721       *index = 0;
 722       break;
 723    case VARYING_SLOT_PNTC:
 724       *name = TGSI_SEMANTIC_PCOORD;
 725       *index = 0;
 726       break;
 727    case VARYING_SLOT_POS:
 728       *name = TGSI_SEMANTIC_POSITION;
 729       *index = 0;
 730       break;
 731    case VARYING_SLOT_PRIMITIVE_ID:
 732       *name = TGSI_SEMANTIC_PRIMID;
 733       *index = 0;
 734       break;
 735    case VARYING_SLOT_PSIZ:
 736       *name = TGSI_SEMANTIC_PSIZE;
 737       *index = 0;
 738       break;
 739    case VARYING_SLOT_TESS_LEVEL_INNER:
 740       *name = TGSI_SEMANTIC_TESSINNER;
 741       *index = 0;
 742       break;
 743    case VARYING_SLOT_TESS_LEVEL_OUTER:
 744       *name = TGSI_SEMANTIC_TESSOUTER;
 745       *index = 0;
 746       break;
 747    case VARYING_SLOT_VIEWPORT:
 748       *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
 749       *index = 0;
 750       break;
 751    default:
 752       ERROR("unknown varying slot %u\n", slot);
 753       assert(false);
 754       break;
 755    }
 756 }
 757
 758 static void
 759 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
 760 {
 761    if (slot >= FRAG_RESULT_DATA0) {
 762       *name = TGSI_SEMANTIC_COLOR;
 763       *index = slot - FRAG_RESULT_COLOR - 2; // intentional
 764       return;
 765    }
 766
 767    switch (slot) {
 768    case FRAG_RESULT_COLOR:
 769       *name = TGSI_SEMANTIC_COLOR;
 770       *index = 0;
 771       break;
 772    case FRAG_RESULT_DEPTH:
 773       *name = TGSI_SEMANTIC_POSITION;
 774       *index = 0;
 775       break;
 776    case FRAG_RESULT_SAMPLE_MASK:
 777       *name = TGSI_SEMANTIC_SAMPLEMASK;
 778       *index = 0;
 779       break;
 780    default:
 781       ERROR("unknown frag result slot %u\n", slot);
 782       assert(false);
 783       break;
 784    }
 785 }
 786
 787 // copy of _mesa_sysval_to_semantic
 788 static void
 789 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
 790 {
 791    *index = 0;
 792    switch (val) {
 793    // Vertex shader
 794    case SYSTEM_VALUE_VERTEX_ID:
 795       *name = TGSI_SEMANTIC_VERTEXID;
 796       break;
 797    case SYSTEM_VALUE_INSTANCE_ID:
 798       *name = TGSI_SEMANTIC_INSTANCEID;
 799       break;
 800    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
 801       *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
 802       break;
 803    case SYSTEM_VALUE_BASE_VERTEX:
 804       *name = TGSI_SEMANTIC_BASEVERTEX;
 805       break;
 806    case SYSTEM_VALUE_BASE_INSTANCE:
 807       *name = TGSI_SEMANTIC_BASEINSTANCE;
 808       break;
 809    case SYSTEM_VALUE_DRAW_ID:
 810       *name = TGSI_SEMANTIC_DRAWID;
 811       break;
 812
 813    // Geometry shader
 814    case SYSTEM_VALUE_INVOCATION_ID:
 815       *name = TGSI_SEMANTIC_INVOCATIONID;
 816       break;
 817
 818    // Fragment shader
 819    case SYSTEM_VALUE_FRAG_COORD:
 820       *name = TGSI_SEMANTIC_POSITION;
 821       break;
 822    case SYSTEM_VALUE_FRONT_FACE:
 823       *name = TGSI_SEMANTIC_FACE;
 824       break;
 825    case SYSTEM_VALUE_SAMPLE_ID:
 826       *name = TGSI_SEMANTIC_SAMPLEID;
 827       break;
 828    case SYSTEM_VALUE_SAMPLE_POS:
 829       *name = TGSI_SEMANTIC_SAMPLEPOS;
 830       break;
 831    case SYSTEM_VALUE_SAMPLE_MASK_IN:
 832       *name = TGSI_SEMANTIC_SAMPLEMASK;
 833       break;
 834    case SYSTEM_VALUE_HELPER_INVOCATION:
 835       *name = TGSI_SEMANTIC_HELPER_INVOCATION;
 836       break;
 837
 838    // Tessellation shader
 839    case SYSTEM_VALUE_TESS_COORD:
 840       *name = TGSI_SEMANTIC_TESSCOORD;
 841       break;
 842    case SYSTEM_VALUE_VERTICES_IN:
 843       *name = TGSI_SEMANTIC_VERTICESIN;
 844       break;
 845    case SYSTEM_VALUE_PRIMITIVE_ID:
 846       *name = TGSI_SEMANTIC_PRIMID;
 847       break;
 848    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
 849       *name = TGSI_SEMANTIC_TESSOUTER;
 850       break;
 851    case SYSTEM_VALUE_TESS_LEVEL_INNER:
 852       *name = TGSI_SEMANTIC_TESSINNER;
 853       break;
 854
 855    // Compute shader
 856    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
 857       *name = TGSI_SEMANTIC_THREAD_ID;
 858       break;
 859    case SYSTEM_VALUE_WORK_GROUP_ID:
 860       *name = TGSI_SEMANTIC_BLOCK_ID;
 861       break;
 862    case SYSTEM_VALUE_NUM_WORK_GROUPS:
 863       *name = TGSI_SEMANTIC_GRID_SIZE;
 864       break;
 865    case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
 866       *name = TGSI_SEMANTIC_BLOCK_SIZE;
 867       break;
 868
 869    // ARB_shader_ballot
 870    case SYSTEM_VALUE_SUBGROUP_SIZE:
 871       *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
 872       break;
 873    case SYSTEM_VALUE_SUBGROUP_INVOCATION:
 874       *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
 875       break;
 876    case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
 877       *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
 878       break;
 879    case SYSTEM_VALUE_SUBGROUP_GE_MASK:
 880       *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
 881       break;
 882    case SYSTEM_VALUE_SUBGROUP_GT_MASK:
 883       *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
 884       break;
 885    case SYSTEM_VALUE_SUBGROUP_LE_MASK:
 886       *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
 887       break;
 888    case SYSTEM_VALUE_SUBGROUP_LT_MASK:
 889       *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
 890       break;
 891
 892    default:
 893       ERROR("unknown system value %u\n", val);
 894       assert(false);
 895       break;
 896    }
 897 }
 898
 899 void
 900 Converter::setInterpolate(nv50_ir_varying *var,
 901                           uint8_t mode,
 902                           bool centroid,
 903                           unsigned semantic)
 904 {
 905    switch (mode) {
 906    case INTERP_MODE_FLAT:
 907       var->flat = 1;
 908       break;
 909    case INTERP_MODE_NONE:
 910       if (semantic == TGSI_SEMANTIC_COLOR)
 911          var->sc = 1;
 912       else if (semantic == TGSI_SEMANTIC_POSITION)
 913          var->linear = 1;
 914       break;
 915    case INTERP_MODE_NOPERSPECTIVE:
 916       var->linear = 1;
 917       break;
 918    case INTERP_MODE_SMOOTH:
 919       break;
 920    }
 921    var->centroid = centroid;
 922 }
 923
 924 static uint16_t
 925 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
 926           bool input, const nir_variable *var)
 927 {
 928    if (!type->is_array())
 929       return type->count_attribute_slots(false);
 930
 931    uint16_t slots;
 932    switch (stage) {
 933    case Program::TYPE_GEOMETRY:
 934       slots = type->uniform_locations();
 935       if (input)
 936          slots /= info.gs.vertices_in;
 937       break;
 938    case Program::TYPE_TESSELLATION_CONTROL:
 939    case Program::TYPE_TESSELLATION_EVAL:
 940       // remove first dimension
 941       if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
 942          slots = type->uniform_locations();
 943       else
 944          slots = type->fields.array->uniform_locations();
 945       break;
 946    default:
 947       slots = type->count_attribute_slots(false);
 948       break;
 949    }
 950
 951    return slots;
 952 }
 953
 954 bool Converter::assignSlots() {
 955    unsigned name;
 956    unsigned index;
 957
 958    info->io.viewportId = -1;
 959    info->numInputs = 0;
 960
 961    // we have to fixup the uniform locations for arrays
 962    unsigned numImages = 0;
 963    nir_foreach_variable(var, &nir->uniforms) {
 964       const glsl_type *type = var->type;
 965       if (!type->without_array()->is_image())
 966          continue;
 967       var->data.driver_location = numImages;
 968       numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
 969    }
 970
 971    nir_foreach_variable(var, &nir->inputs) {
 972       const glsl_type *type = var->type;
 973       int slot = var->data.location;
 974       uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
 975       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
 976                                        : type->component_slots();
 977       uint32_t frac = var->data.location_frac;
 978       uint32_t vary = var->data.driver_location;
 979
 980       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
 981          if (comp > 2)
 982             slots *= 2;
 983       }
 984
 985       assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
 986
 987       switch(prog->getType()) {
 988       case Program::TYPE_FRAGMENT:
 989          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
 990          for (uint16_t i = 0; i < slots; ++i) {
 991             setInterpolate(&info->in[vary + i], var->data.interpolation,
 992                            var->data.centroid | var->data.sample, name);
 993          }
 994          break;
 995       case Program::TYPE_GEOMETRY:
 996          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
 997          break;
 998       case Program::TYPE_TESSELLATION_CONTROL:
 999       case Program::TYPE_TESSELLATION_EVAL:
1000          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1001          if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1002             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1003          break;
1004       case Program::TYPE_VERTEX:
1005          vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1006          switch (name) {
1007          case TGSI_SEMANTIC_EDGEFLAG:
1008             info->io.edgeFlagIn = vary;
1009             break;
1010          default:
1011             break;
1012          }
1013          break;
1014       default:
1015          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1016          return false;
1017       }
1018
1019       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1020          info->in[vary].id = vary;
1021          info->in[vary].patch = var->data.patch;
1022          info->in[vary].sn = name;
1023          info->in[vary].si = index + i;
1024          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1025             if (i & 0x1)
1026                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1027             else
1028                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1029          else
1030             info->in[vary].mask |= ((1 << comp) - 1) << frac;
1031       }
1032       info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1033    }
1034
1035    info->numOutputs = 0;
1036    nir_foreach_variable(var, &nir->outputs) {
1037       const glsl_type *type = var->type;
1038       int slot = var->data.location;
1039       uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1040       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1041                                        : type->component_slots();
1042       uint32_t frac = var->data.location_frac;
1043       uint32_t vary = var->data.driver_location;
1044
1045       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1046          if (comp > 2)
1047             slots *= 2;
1048       }
1049
1050       assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1051
1052       switch(prog->getType()) {
1053       case Program::TYPE_FRAGMENT:
1054          frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1055          switch (name) {
1056          case TGSI_SEMANTIC_COLOR:
1057             if (!var->data.fb_fetch_output)
1058                info->prop.fp.numColourResults++;
1059             info->prop.fp.separateFragData = true;
1060             // sometimes we get FRAG_RESULT_DATAX with data.index 0
1061             // sometimes we get FRAG_RESULT_DATA0 with data.index X
1062             index = index == 0 ? var->data.index : index;
1063             break;
1064          case TGSI_SEMANTIC_POSITION:
1065             info->io.fragDepth = vary;
1066             info->prop.fp.writesDepth = true;
1067             break;
1068          case TGSI_SEMANTIC_SAMPLEMASK:
1069             info->io.sampleMask = vary;
1070             break;
1071          default:
1072             break;
1073          }
1074          break;
1075       case Program::TYPE_GEOMETRY:
1076       case Program::TYPE_TESSELLATION_CONTROL:
1077       case Program::TYPE_TESSELLATION_EVAL:
1078       case Program::TYPE_VERTEX:
1079          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1080
1081          if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1082              name != TGSI_SEMANTIC_TESSOUTER)
1083             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1084
1085          switch (name) {
1086          case TGSI_SEMANTIC_CLIPDIST:
1087             info->io.genUserClip = -1;
1088             break;
1089          case TGSI_SEMANTIC_CLIPVERTEX:
1090             clipVertexOutput = vary;
1091             break;
1092          case TGSI_SEMANTIC_EDGEFLAG:
1093             info->io.edgeFlagOut = vary;
1094             break;
1095          case TGSI_SEMANTIC_POSITION:
1096             if (clipVertexOutput < 0)
1097                clipVertexOutput = vary;
1098             break;
1099          default:
1100             break;
1101          }
1102          break;
1103       default:
1104          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1105          return false;
1106       }
1107
1108       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1109          info->out[vary].id = vary;
1110          info->out[vary].patch = var->data.patch;
1111          info->out[vary].sn = name;
1112          info->out[vary].si = index + i;
1113          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1114             if (i & 0x1)
1115                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1116             else
1117                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1118          else
1119             info->out[vary].mask |= ((1 << comp) - 1) << frac;
1120
1121          if (nir->info.outputs_read & 1ll << slot)
1122             info->out[vary].oread = 1;
1123       }
1124       info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1125    }
1126
1127    info->numSysVals = 0;
1128    for (uint8_t i = 0; i < 64; ++i) {
1129       if (!(nir->info.system_values_read & 1ll << i))
1130          continue;
1131
1132       system_val_to_tgsi_semantic(i, &name, &index);
1133       info->sv[info->numSysVals].sn = name;
1134       info->sv[info->numSysVals].si = index;
1135       info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1136
1137       switch (i) {
1138       case SYSTEM_VALUE_INSTANCE_ID:
1139          info->io.instanceId = info->numSysVals;
1140          break;
1141       case SYSTEM_VALUE_TESS_LEVEL_INNER:
1142       case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1143          info->sv[info->numSysVals].patch = 1;
1144          break;
1145       case SYSTEM_VALUE_VERTEX_ID:
1146          info->io.vertexId = info->numSysVals;
1147          break;
1148       default:
1149          break;
1150       }
1151
1152       info->numSysVals += 1;
1153    }
1154
1155    if (info->io.genUserClip > 0) {
1156       info->io.clipDistances = info->io.genUserClip;
1157
1158       const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1159
1160       for (unsigned int n = 0; n < nOut; ++n) {
1161          unsigned int i = info->numOutputs++;
1162          info->out[i].id = i;
1163          info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1164          info->out[i].si = n;
1165          info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1166       }
1167    }
1168
1169    return info->assignSlots(info) == 0;
1170 }
1171
1172 uint32_t
1173 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1174 {
1175    DataType ty;
1176    int offset = nir_intrinsic_component(insn);
1177    bool input;
1178
1179    if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1180       ty = getDType(insn);
1181    else
1182       ty = getSType(insn->src[0], false, false);
1183
1184    switch (insn->intrinsic) {
1185    case nir_intrinsic_load_input:
1186    case nir_intrinsic_load_interpolated_input:
1187    case nir_intrinsic_load_per_vertex_input:
1188       input = true;
1189       break;
1190    case nir_intrinsic_load_output:
1191    case nir_intrinsic_load_per_vertex_output:
1192    case nir_intrinsic_store_output:
1193    case nir_intrinsic_store_per_vertex_output:
1194       input = false;
1195       break;
1196    default:
1197       ERROR("unknown intrinsic in getSlotAddress %s",
1198             nir_intrinsic_infos[insn->intrinsic].name);
1199       input = false;
1200       assert(false);
1201       break;
1202    }
1203
1204    if (typeSizeof(ty) == 8) {
1205       slot *= 2;
1206       slot += offset;
1207       if (slot >= 4) {
1208          idx += 1;
1209          slot -= 4;
1210       }
1211    } else {
1212       slot += offset;
1213    }
1214
1215    assert(slot < 4);
1216    assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1217    assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1218
1219    const nv50_ir_varying *vary = input ? info->in : info->out;
1220    return vary[idx].slot[slot] * 4;
1221 }
1222
1223 Instruction *
1224 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1225                     uint32_t base, uint8_t c, Value *indirect0,
1226                     Value *indirect1, bool patch)
1227 {
1228    unsigned int tySize = typeSizeof(ty);
1229
1230    if (tySize == 8 &&
1231        (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1232       Value *lo = getSSA();
1233       Value *hi = getSSA();
1234
1235       Instruction *loi =
1236          mkLoad(TYPE_U32, lo,
1237                 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1238                 indirect0);
1239       loi->setIndirect(0, 1, indirect1);
1240       loi->perPatch = patch;
1241
1242       Instruction *hii =
1243          mkLoad(TYPE_U32, hi,
1244                 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1245                 indirect0);
1246       hii->setIndirect(0, 1, indirect1);
1247       hii->perPatch = patch;
1248
1249       return mkOp2(OP_MERGE, ty, def, lo, hi);
1250    } else {
1251       Instruction *ld =
1252          mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1253       ld->setIndirect(0, 1, indirect1);
1254       ld->perPatch = patch;
1255       return ld;
1256    }
1257 }
1258
1259 void
1260 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1261                    DataType ty, Value *src, uint8_t idx, uint8_t c,
1262                    Value *indirect0, Value *indirect1)
1263 {
1264    uint8_t size = typeSizeof(ty);
1265    uint32_t address = getSlotAddress(insn, idx, c);
1266
1267    if (size == 8 && indirect0) {
1268       Value *split[2];
1269       mkSplit(split, 4, src);
1270
1271       if (op == OP_EXPORT) {
1272          split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1273          split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1274       }
1275
1276       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1277               split[0])->perPatch = info->out[idx].patch;
1278       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1279               split[1])->perPatch = info->out[idx].patch;
1280    } else {
1281       if (op == OP_EXPORT)
1282          src = mkMov(getSSA(size), src, ty)->getDef(0);
1283       mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1284               src)->perPatch = info->out[idx].patch;
1285    }
1286 }
1287
1288 bool
1289 Converter::parseNIR()
1290 {
1291    info->io.clipDistances = nir->info.clip_distance_array_size;
1292    info->io.cullDistances = nir->info.cull_distance_array_size;
1293
1294    switch(prog->getType()) {
1295    case Program::TYPE_COMPUTE:
1296       info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1297       info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1298       info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1299       info->bin.smemSize = nir->info.cs.shared_size;
1300       break;
1301    case Program::TYPE_FRAGMENT:
1302       info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1303       info->prop.fp.persampleInvocation =
1304          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1305          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1306       info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1307       info->prop.fp.readsSampleLocations =
1308          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1309       info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1310       info->prop.fp.usesSampleMaskIn =
1311          !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1312       break;
1313    case Program::TYPE_GEOMETRY:
1314       info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1315       info->prop.gp.instanceCount = nir->info.gs.invocations;
1316       info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1317       info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1318       break;
1319    case Program::TYPE_TESSELLATION_CONTROL:
1320    case Program::TYPE_TESSELLATION_EVAL:
1321       if (nir->info.tess.primitive_mode == GL_ISOLINES)
1322          info->prop.tp.domain = GL_LINES;
1323       else
1324          info->prop.tp.domain = nir->info.tess.primitive_mode;
1325       info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1326       info->prop.tp.outputPrim =
1327          nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1328       info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1329       info->prop.tp.winding = !nir->info.tess.ccw;
1330       break;
1331    case Program::TYPE_VERTEX:
1332       info->prop.vp.usesDrawParameters =
1333          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1334          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1335          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1336       break;
1337    default:
1338       break;
1339    }
1340
1341    return true;
1342 }
1343
1344 bool
1345 Converter::visit(nir_function *function)
1346 {
1347    // we only support emiting the main function for now
1348    assert(!strcmp(function->name, "main"));
1349    assert(function->impl);
1350
1351    // usually the blocks will set everything up, but main is special
1352    BasicBlock *entry = new BasicBlock(prog->main);
1353    exit = new BasicBlock(prog->main);
1354    blocks[nir_start_block(function->impl)->index] = entry;
1355    prog->main->setEntry(entry);
1356    prog->main->setExit(exit);
1357
1358    setPosition(entry, true);
1359
1360    if (info->io.genUserClip > 0) {
1361       for (int c = 0; c < 4; ++c)
1362          clipVtx[c] = getScratch();
1363    }
1364
1365    switch (prog->getType()) {
1366    case Program::TYPE_TESSELLATION_CONTROL:
1367       outBase = mkOp2v(
1368          OP_SUB, TYPE_U32, getSSA(),
1369          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1370          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1371       break;
1372    case Program::TYPE_FRAGMENT: {
1373       Symbol *sv = mkSysVal(SV_POSITION, 3);
1374       fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1375       fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1376       break;
1377    }
1378    default:
1379       break;
1380    }
1381
1382    nir_index_ssa_defs(function->impl);
1383    foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1384       if (!visit(node))
1385          return false;
1386    }
1387
1388    bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1389    setPosition(exit, true);
1390
1391    if (info->io.genUserClip > 0)
1392       handleUserClipPlanes();
1393
1394    // TODO: for non main function this needs to be a OP_RETURN
1395    mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1396    return true;
1397 }
1398
1399 bool
1400 Converter::visit(nir_cf_node *node)
1401 {
1402    switch (node->type) {
1403    case nir_cf_node_block:
1404       return visit(nir_cf_node_as_block(node));
1405    case nir_cf_node_if:
1406       return visit(nir_cf_node_as_if(node));
1407    case nir_cf_node_loop:
1408       return visit(nir_cf_node_as_loop(node));
1409    default:
1410       ERROR("unknown nir_cf_node type %u\n", node->type);
1411       return false;
1412    }
1413 }
1414
1415 bool
1416 Converter::visit(nir_block *block)
1417 {
1418    if (!block->predecessors->entries && block->instr_list.is_empty())
1419       return true;
1420
1421    BasicBlock *bb = convert(block);
1422
1423    setPosition(bb, true);
1424    nir_foreach_instr(insn, block) {
1425       if (!visit(insn))
1426          return false;
1427    }
1428    return true;
1429 }
1430
1431 bool
1432 Converter::visit(nir_if *nif)
1433 {
1434    DataType sType = getSType(nif->condition, false, false);
1435    Value *src = getSrc(&nif->condition, 0);
1436
1437    nir_block *lastThen = nir_if_last_then_block(nif);
1438    nir_block *lastElse = nir_if_last_else_block(nif);
1439
1440    assert(!lastThen->successors[1]);
1441    assert(!lastElse->successors[1]);
1442
1443    BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1444    BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1445
1446    bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1447    bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1448
1449    // we only insert joinats, if both nodes end up at the end of the if again.
1450    // the reason for this to not happens are breaks/continues/ret/... which
1451    // have their own handling
1452    if (lastThen->successors[0] == lastElse->successors[0])
1453       bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1454                           CC_ALWAYS, NULL);
1455
1456    mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1457
1458    foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1459       if (!visit(node))
1460          return false;
1461    }
1462    setPosition(convert(lastThen), true);
1463    if (!bb->getExit() ||
1464        !bb->getExit()->asFlow() ||
1465         bb->getExit()->asFlow()->op == OP_JOIN) {
1466       BasicBlock *tailBB = convert(lastThen->successors[0]);
1467       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1468       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1469    }
1470
1471    foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1472       if (!visit(node))
1473          return false;
1474    }
1475    setPosition(convert(lastElse), true);
1476    if (!bb->getExit() ||
1477        !bb->getExit()->asFlow() ||
1478         bb->getExit()->asFlow()->op == OP_JOIN) {
1479       BasicBlock *tailBB = convert(lastElse->successors[0]);
1480       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1481       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1482    }
1483
1484    if (lastThen->successors[0] == lastElse->successors[0]) {
1485       setPosition(convert(lastThen->successors[0]), true);
1486       mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1487    }
1488
1489    return true;
1490 }
1491
1492 bool
1493 Converter::visit(nir_loop *loop)
1494 {
1495    curLoopDepth += 1;
1496    func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1497
1498    BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1499    BasicBlock *tailBB =
1500       convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1501    bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1502
1503    mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1504    setPosition(loopBB, false);
1505    mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1506
1507    foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1508       if (!visit(node))
1509          return false;
1510    }
1511    Instruction *insn = bb->getExit();
1512    if (bb->cfg.incidentCount() != 0) {
1513       if (!insn || !insn->asFlow()) {
1514          mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1515          bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1516       } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1517                  tailBB->cfg.incidentCount() == 0) {
1518          // RA doesn't like having blocks around with no incident edge,
1519          // so we create a fake one to make it happy
1520          bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1521       }
1522    }
1523
1524    curLoopDepth -= 1;
1525
1526    return true;
1527 }
1528
1529 bool
1530 Converter::visit(nir_instr *insn)
1531 {
1532    switch (insn->type) {
1533    case nir_instr_type_alu:
1534       return visit(nir_instr_as_alu(insn));
1535    case nir_instr_type_intrinsic:
1536       return visit(nir_instr_as_intrinsic(insn));
1537    case nir_instr_type_jump:
1538       return visit(nir_instr_as_jump(insn));
1539    case nir_instr_type_load_const:
1540       return visit(nir_instr_as_load_const(insn));
1541    default:
1542       ERROR("unknown nir_instr type %u\n", insn->type);
1543       return false;
1544    }
1545    return true;
1546 }
1547
1548 SVSemantic
1549 Converter::convert(nir_intrinsic_op intr)
1550 {
1551    switch (intr) {
1552    case nir_intrinsic_load_base_vertex:
1553       return SV_BASEVERTEX;
1554    case nir_intrinsic_load_base_instance:
1555       return SV_BASEINSTANCE;
1556    case nir_intrinsic_load_draw_id:
1557       return SV_DRAWID;
1558    case nir_intrinsic_load_front_face:
1559       return SV_FACE;
1560    case nir_intrinsic_load_helper_invocation:
1561       return SV_THREAD_KILL;
1562    case nir_intrinsic_load_instance_id:
1563       return SV_INSTANCE_ID;
1564    case nir_intrinsic_load_invocation_id:
1565       return SV_INVOCATION_ID;
1566    case nir_intrinsic_load_local_group_size:
1567       return SV_NTID;
1568    case nir_intrinsic_load_local_invocation_id:
1569       return SV_TID;
1570    case nir_intrinsic_load_num_work_groups:
1571       return SV_NCTAID;
1572    case nir_intrinsic_load_patch_vertices_in:
1573       return SV_VERTEX_COUNT;
1574    case nir_intrinsic_load_primitive_id:
1575       return SV_PRIMITIVE_ID;
1576    case nir_intrinsic_load_sample_id:
1577       return SV_SAMPLE_INDEX;
1578    case nir_intrinsic_load_sample_mask_in:
1579       return SV_SAMPLE_MASK;
1580    case nir_intrinsic_load_sample_pos:
1581       return SV_SAMPLE_POS;
1582    case nir_intrinsic_load_subgroup_eq_mask:
1583       return SV_LANEMASK_EQ;
1584    case nir_intrinsic_load_subgroup_ge_mask:
1585       return SV_LANEMASK_GE;
1586    case nir_intrinsic_load_subgroup_gt_mask:
1587       return SV_LANEMASK_GT;
1588    case nir_intrinsic_load_subgroup_le_mask:
1589       return SV_LANEMASK_LE;
1590    case nir_intrinsic_load_subgroup_lt_mask:
1591       return SV_LANEMASK_LT;
1592    case nir_intrinsic_load_subgroup_invocation:
1593       return SV_LANEID;
1594    case nir_intrinsic_load_tess_coord:
1595       return SV_TESS_COORD;
1596    case nir_intrinsic_load_tess_level_inner:
1597       return SV_TESS_INNER;
1598    case nir_intrinsic_load_tess_level_outer:
1599       return SV_TESS_OUTER;
1600    case nir_intrinsic_load_vertex_id:
1601       return SV_VERTEX_ID;
1602    case nir_intrinsic_load_work_group_id:
1603       return SV_CTAID;
1604    default:
1605       ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1606             nir_intrinsic_infos[intr].name);
1607       assert(false);
1608       return SV_LAST;
1609    }
1610 }
1611
1612 bool
1613 Converter::visit(nir_intrinsic_instr *insn)
1614 {
1615    nir_intrinsic_op op = insn->intrinsic;
1616
1617    switch (op) {
1618    case nir_intrinsic_load_uniform: {
1619       LValues &newDefs = convert(&insn->dest);
1620       const DataType dType = getDType(insn);
1621       Value *indirect;
1622       uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1623       for (uint8_t i = 0; i < insn->num_components; ++i) {
1624          loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1625       }
1626       break;
1627    }
1628    case nir_intrinsic_store_output:
1629    case nir_intrinsic_store_per_vertex_output: {
1630       Value *indirect;
1631       DataType dType = getSType(insn->src[0], false, false);
1632       uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1633
1634       for (uint8_t i = 0u; i < insn->num_components; ++i) {
1635          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1636             continue;
1637
1638          uint8_t offset = 0;
1639          Value *src = getSrc(&insn->src[0], i);
1640          switch (prog->getType()) {
1641          case Program::TYPE_FRAGMENT: {
1642             if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1643                // TGSI uses a different interface than NIR, TGSI stores that
1644                // value in the z component, NIR in X
1645                offset += 2;
1646                src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1647             }
1648             break;
1649          }
1650          case Program::TYPE_VERTEX: {
1651             if (info->io.genUserClip > 0 && idx == clipVertexOutput) {
1652                mkMov(clipVtx[i], src);
1653                src = clipVtx[i];
1654             }
1655             break;
1656          }
1657          default:
1658             break;
1659          }
1660
1661          storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1662       }
1663       break;
1664    }
1665    case nir_intrinsic_load_input:
1666    case nir_intrinsic_load_interpolated_input:
1667    case nir_intrinsic_load_output: {
1668       LValues &newDefs = convert(&insn->dest);
1669
1670       // FBFetch
1671       if (prog->getType() == Program::TYPE_FRAGMENT &&
1672           op == nir_intrinsic_load_output) {
1673          std::vector<Value*> defs, srcs;
1674          uint8_t mask = 0;
1675
1676          srcs.push_back(getSSA());
1677          srcs.push_back(getSSA());
1678          Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1679          Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1680          mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1681          mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1682
1683          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1684          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1685
1686          for (uint8_t i = 0u; i < insn->num_components; ++i) {
1687             defs.push_back(newDefs[i]);
1688             mask |= 1 << i;
1689          }
1690
1691          TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1692          texi->tex.levelZero = 1;
1693          texi->tex.mask = mask;
1694          texi->tex.useOffsets = 0;
1695          texi->tex.r = 0xffff;
1696          texi->tex.s = 0xffff;
1697
1698          info->prop.fp.readsFramebuffer = true;
1699          break;
1700       }
1701
1702       const DataType dType = getDType(insn);
1703       Value *indirect;
1704       bool input = op != nir_intrinsic_load_output;
1705       operation nvirOp;
1706       uint32_t mode = 0;
1707
1708       uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1709       nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
1710
1711       // see load_barycentric_* handling
1712       if (prog->getType() == Program::TYPE_FRAGMENT) {
1713          mode = translateInterpMode(&vary, nvirOp);
1714          if (op == nir_intrinsic_load_interpolated_input) {
1715             ImmediateValue immMode;
1716             if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1717                mode |= immMode.reg.data.u32;
1718          }
1719       }
1720
1721       for (uint8_t i = 0u; i < insn->num_components; ++i) {
1722          uint32_t address = getSlotAddress(insn, idx, i);
1723          Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
1724          if (prog->getType() == Program::TYPE_FRAGMENT) {
1725             int s = 1;
1726             if (typeSizeof(dType) == 8) {
1727                Value *lo = getSSA();
1728                Value *hi = getSSA();
1729                Instruction *interp;
1730
1731                interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1732                if (nvirOp == OP_PINTERP)
1733                   interp->setSrc(s++, fp.position);
1734                if (mode & NV50_IR_INTERP_OFFSET)
1735                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1736                interp->setInterpolate(mode);
1737                interp->setIndirect(0, 0, indirect);
1738
1739                Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
1740                interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1741                if (nvirOp == OP_PINTERP)
1742                   interp->setSrc(s++, fp.position);
1743                if (mode & NV50_IR_INTERP_OFFSET)
1744                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1745                interp->setInterpolate(mode);
1746                interp->setIndirect(0, 0, indirect);
1747
1748                mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
1749             } else {
1750                Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
1751                if (nvirOp == OP_PINTERP)
1752                   interp->setSrc(s++, fp.position);
1753                if (mode & NV50_IR_INTERP_OFFSET)
1754                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1755                interp->setInterpolate(mode);
1756                interp->setIndirect(0, 0, indirect);
1757             }
1758          } else {
1759             mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
1760          }
1761       }
1762       break;
1763    }
1764    case nir_intrinsic_load_barycentric_at_offset:
1765    case nir_intrinsic_load_barycentric_at_sample:
1766    case nir_intrinsic_load_barycentric_centroid:
1767    case nir_intrinsic_load_barycentric_pixel:
1768    case nir_intrinsic_load_barycentric_sample: {
1769       LValues &newDefs = convert(&insn->dest);
1770       uint32_t mode;
1771
1772       if (op == nir_intrinsic_load_barycentric_centroid ||
1773           op == nir_intrinsic_load_barycentric_sample) {
1774          mode = NV50_IR_INTERP_CENTROID;
1775       } else if (op == nir_intrinsic_load_barycentric_at_offset) {
1776          Value *offs[2];
1777          for (uint8_t c = 0; c < 2; c++) {
1778             offs[c] = getScratch();
1779             mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
1780             mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
1781             mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
1782             mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
1783          }
1784          mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
1785
1786          mode = NV50_IR_INTERP_OFFSET;
1787       } else if (op == nir_intrinsic_load_barycentric_pixel) {
1788          mode = NV50_IR_INTERP_DEFAULT;
1789       } else if (op == nir_intrinsic_load_barycentric_at_sample) {
1790          info->prop.fp.readsSampleLocations = true;
1791          mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
1792          mode = NV50_IR_INTERP_OFFSET;
1793       } else {
1794          unreachable("all intrinsics already handled above");
1795       }
1796
1797       loadImm(newDefs[1], mode);
1798       break;
1799    }
1800    case nir_intrinsic_discard:
1801       mkOp(OP_DISCARD, TYPE_NONE, NULL);
1802       break;
1803    case nir_intrinsic_discard_if: {
1804       Value *pred = getSSA(1, FILE_PREDICATE);
1805       if (insn->num_components > 1) {
1806          ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
1807          assert(false);
1808          return false;
1809       }
1810       mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1811       mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
1812       break;
1813    }
1814    case nir_intrinsic_load_base_vertex:
1815    case nir_intrinsic_load_base_instance:
1816    case nir_intrinsic_load_draw_id:
1817    case nir_intrinsic_load_front_face:
1818    case nir_intrinsic_load_helper_invocation:
1819    case nir_intrinsic_load_instance_id:
1820    case nir_intrinsic_load_invocation_id:
1821    case nir_intrinsic_load_local_group_size:
1822    case nir_intrinsic_load_local_invocation_id:
1823    case nir_intrinsic_load_num_work_groups:
1824    case nir_intrinsic_load_patch_vertices_in:
1825    case nir_intrinsic_load_primitive_id:
1826    case nir_intrinsic_load_sample_id:
1827    case nir_intrinsic_load_sample_mask_in:
1828    case nir_intrinsic_load_sample_pos:
1829    case nir_intrinsic_load_subgroup_eq_mask:
1830    case nir_intrinsic_load_subgroup_ge_mask:
1831    case nir_intrinsic_load_subgroup_gt_mask:
1832    case nir_intrinsic_load_subgroup_le_mask:
1833    case nir_intrinsic_load_subgroup_lt_mask:
1834    case nir_intrinsic_load_subgroup_invocation:
1835    case nir_intrinsic_load_tess_coord:
1836    case nir_intrinsic_load_tess_level_inner:
1837    case nir_intrinsic_load_tess_level_outer:
1838    case nir_intrinsic_load_vertex_id:
1839    case nir_intrinsic_load_work_group_id: {
1840       const DataType dType = getDType(insn);
1841       SVSemantic sv = convert(op);
1842       LValues &newDefs = convert(&insn->dest);
1843
1844       for (uint8_t i = 0u; i < insn->num_components; ++i) {
1845          Value *def;
1846          if (typeSizeof(dType) == 8)
1847             def = getSSA();
1848          else
1849             def = newDefs[i];
1850
1851          if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
1852             loadImm(def, 0u);
1853          } else {
1854             Symbol *sym = mkSysVal(sv, i);
1855             Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
1856             if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
1857                rdsv->perPatch = 1;
1858          }
1859
1860          if (typeSizeof(dType) == 8)
1861             mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
1862       }
1863       break;
1864    }
1865    // constants
1866    case nir_intrinsic_load_subgroup_size: {
1867       LValues &newDefs = convert(&insn->dest);
1868       loadImm(newDefs[0], 32u);
1869       break;
1870    }
1871    default:
1872       ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
1873       return false;
1874    }
1875
1876    return true;
1877 }
1878
1879 bool
1880 Converter::visit(nir_jump_instr *insn)
1881 {
1882    switch (insn->type) {
1883    case nir_jump_return:
1884       // TODO: this only works in the main function
1885       mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
1886       bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
1887       break;
1888    case nir_jump_break:
1889    case nir_jump_continue: {
1890       bool isBreak = insn->type == nir_jump_break;
1891       nir_block *block = insn->instr.block;
1892       assert(!block->successors[1]);
1893       BasicBlock *target = convert(block->successors[0]);
1894       mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
1895       bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
1896       break;
1897    }
1898    default:
1899       ERROR("unknown nir_jump_type %u\n", insn->type);
1900       return false;
1901    }
1902
1903    return true;
1904 }
1905
1906 bool
1907 Converter::visit(nir_load_const_instr *insn)
1908 {
1909    assert(insn->def.bit_size <= 64);
1910
1911    LValues &newDefs = convert(&insn->def);
1912    for (int i = 0; i < insn->def.num_components; i++) {
1913       switch (insn->def.bit_size) {
1914       case 64:
1915          loadImm(newDefs[i], insn->value.u64[i]);
1916          break;
1917       case 32:
1918          loadImm(newDefs[i], insn->value.u32[i]);
1919          break;
1920       case 16:
1921          loadImm(newDefs[i], insn->value.u16[i]);
1922          break;
1923       case 8:
1924          loadImm(newDefs[i], insn->value.u8[i]);
1925          break;
1926       }
1927    }
1928    return true;
1929 }
1930
1931 #define DEFAULT_CHECKS \
1932       if (insn->dest.dest.ssa.num_components > 1) { \
1933          ERROR("nir_alu_instr only supported with 1 component!\n"); \
1934          return false; \
1935       } \
1936       if (insn->dest.write_mask != 1) { \
1937          ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
1938          return false; \
1939       }
1940 bool
1941 Converter::visit(nir_alu_instr *insn)
1942 {
1943    const nir_op op = insn->op;
1944    const nir_op_info &info = nir_op_infos[op];
1945    DataType dType = getDType(insn);
1946    const std::vector<DataType> sTypes = getSTypes(insn);
1947
1948    Instruction *oldPos = this->bb->getExit();
1949
1950    switch (op) {
1951    case nir_op_fabs:
1952    case nir_op_iabs:
1953    case nir_op_fadd:
1954    case nir_op_iadd:
1955    case nir_op_fand:
1956    case nir_op_iand:
1957    case nir_op_fceil:
1958    case nir_op_fcos:
1959    case nir_op_fddx:
1960    case nir_op_fddx_coarse:
1961    case nir_op_fddx_fine:
1962    case nir_op_fddy:
1963    case nir_op_fddy_coarse:
1964    case nir_op_fddy_fine:
1965    case nir_op_fdiv:
1966    case nir_op_idiv:
1967    case nir_op_udiv:
1968    case nir_op_fexp2:
1969    case nir_op_ffloor:
1970    case nir_op_ffma:
1971    case nir_op_flog2:
1972    case nir_op_fmax:
1973    case nir_op_imax:
1974    case nir_op_umax:
1975    case nir_op_fmin:
1976    case nir_op_imin:
1977    case nir_op_umin:
1978    case nir_op_fmod:
1979    case nir_op_imod:
1980    case nir_op_umod:
1981    case nir_op_fmul:
1982    case nir_op_imul:
1983    case nir_op_imul_high:
1984    case nir_op_umul_high:
1985    case nir_op_fneg:
1986    case nir_op_ineg:
1987    case nir_op_fnot:
1988    case nir_op_inot:
1989    case nir_op_for:
1990    case nir_op_ior:
1991    case nir_op_pack_64_2x32_split:
1992    case nir_op_fpow:
1993    case nir_op_frcp:
1994    case nir_op_frem:
1995    case nir_op_irem:
1996    case nir_op_frsq:
1997    case nir_op_fsat:
1998    case nir_op_ishr:
1999    case nir_op_ushr:
2000    case nir_op_fsin:
2001    case nir_op_fsqrt:
2002    case nir_op_fsub:
2003    case nir_op_isub:
2004    case nir_op_ftrunc:
2005    case nir_op_ishl:
2006    case nir_op_fxor:
2007    case nir_op_ixor: {
2008       DEFAULT_CHECKS;
2009       LValues &newDefs = convert(&insn->dest);
2010       operation preOp = preOperationNeeded(op);
2011       if (preOp != OP_NOP) {
2012          assert(info.num_inputs < 2);
2013          Value *tmp = getSSA(typeSizeof(dType));
2014          Instruction *i0 = mkOp(preOp, dType, tmp);
2015          Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2016          if (info.num_inputs) {
2017             i0->setSrc(0, getSrc(&insn->src[0]));
2018             i1->setSrc(0, tmp);
2019          }
2020          i1->subOp = getSubOp(op);
2021       } else {
2022          Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2023          for (unsigned s = 0u; s < info.num_inputs; ++s) {
2024             i->setSrc(s, getSrc(&insn->src[s]));
2025          }
2026          i->subOp = getSubOp(op);
2027       }
2028       break;
2029    }
2030    case nir_op_ifind_msb:
2031    case nir_op_ufind_msb: {
2032       DEFAULT_CHECKS;
2033       LValues &newDefs = convert(&insn->dest);
2034       dType = sTypes[0];
2035       mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2036       break;
2037    }
2038    case nir_op_fround_even: {
2039       DEFAULT_CHECKS;
2040       LValues &newDefs = convert(&insn->dest);
2041       mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2042       break;
2043    }
2044    // convert instructions
2045    case nir_op_f2f32:
2046    case nir_op_f2i32:
2047    case nir_op_f2u32:
2048    case nir_op_i2f32:
2049    case nir_op_i2i32:
2050    case nir_op_u2f32:
2051    case nir_op_u2u32:
2052    case nir_op_f2f64:
2053    case nir_op_f2i64:
2054    case nir_op_f2u64:
2055    case nir_op_i2f64:
2056    case nir_op_i2i64:
2057    case nir_op_u2f64:
2058    case nir_op_u2u64: {
2059       DEFAULT_CHECKS;
2060       LValues &newDefs = convert(&insn->dest);
2061       Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2062       if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2063          i->rnd = ROUND_Z;
2064       i->sType = sTypes[0];
2065       break;
2066    }
2067    // compare instructions
2068    case nir_op_feq32:
2069    case nir_op_ieq32:
2070    case nir_op_fge32:
2071    case nir_op_ige32:
2072    case nir_op_uge32:
2073    case nir_op_flt32:
2074    case nir_op_ilt32:
2075    case nir_op_ult32:
2076    case nir_op_fne32:
2077    case nir_op_ine32: {
2078       DEFAULT_CHECKS;
2079       LValues &newDefs = convert(&insn->dest);
2080       Instruction *i = mkCmp(getOperation(op),
2081                              getCondCode(op),
2082                              dType,
2083                              newDefs[0],
2084                              dType,
2085                              getSrc(&insn->src[0]),
2086                              getSrc(&insn->src[1]));
2087       if (info.num_inputs == 3)
2088          i->setSrc(2, getSrc(&insn->src[2]));
2089       i->sType = sTypes[0];
2090       break;
2091    }
2092    // those are weird ALU ops and need special handling, because
2093    //   1. they are always componend based
2094    //   2. they basically just merge multiple values into one data type
2095    case nir_op_imov:
2096    case nir_op_fmov:
2097    case nir_op_vec2:
2098    case nir_op_vec3:
2099    case nir_op_vec4: {
2100       LValues &newDefs = convert(&insn->dest);
2101       for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2102          mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2103       }
2104       break;
2105    }
2106    // (un)pack
2107    case nir_op_pack_64_2x32: {
2108       LValues &newDefs = convert(&insn->dest);
2109       Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2110       merge->setSrc(0, getSrc(&insn->src[0], 0));
2111       merge->setSrc(1, getSrc(&insn->src[0], 1));
2112       break;
2113    }
2114    case nir_op_pack_half_2x16_split: {
2115       LValues &newDefs = convert(&insn->dest);
2116       Value *tmpH = getSSA();
2117       Value *tmpL = getSSA();
2118
2119       mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2120       mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2121       mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2122       break;
2123    }
2124    case nir_op_unpack_half_2x16_split_x:
2125    case nir_op_unpack_half_2x16_split_y: {
2126       LValues &newDefs = convert(&insn->dest);
2127       Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2128       if (op == nir_op_unpack_half_2x16_split_y)
2129          cvt->subOp = 1;
2130       break;
2131    }
2132    case nir_op_unpack_64_2x32: {
2133       LValues &newDefs = convert(&insn->dest);
2134       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2135       break;
2136    }
2137    case nir_op_unpack_64_2x32_split_x: {
2138       LValues &newDefs = convert(&insn->dest);
2139       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2140       break;
2141    }
2142    case nir_op_unpack_64_2x32_split_y: {
2143       LValues &newDefs = convert(&insn->dest);
2144       mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2145       break;
2146    }
2147    // special instructions
2148    case nir_op_fsign:
2149    case nir_op_isign: {
2150       DEFAULT_CHECKS;
2151       DataType iType;
2152       if (::isFloatType(dType))
2153          iType = TYPE_F32;
2154       else
2155          iType = TYPE_S32;
2156
2157       LValues &newDefs = convert(&insn->dest);
2158       LValue *val0 = getScratch();
2159       LValue *val1 = getScratch();
2160       mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2161       mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2162
2163       if (dType == TYPE_F64) {
2164          mkOp2(OP_SUB, iType, val0, val0, val1);
2165          mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2166       } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2167          mkOp2(OP_SUB, iType, val0, val1, val0);
2168          mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2169          mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2170       } else if (::isFloatType(dType))
2171          mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2172       else
2173          mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2174       break;
2175    }
2176    case nir_op_fcsel:
2177    case nir_op_b32csel: {
2178       DEFAULT_CHECKS;
2179       LValues &newDefs = convert(&insn->dest);
2180       mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2181       break;
2182    }
2183    case nir_op_ibitfield_extract:
2184    case nir_op_ubitfield_extract: {
2185       DEFAULT_CHECKS;
2186       Value *tmp = getSSA();
2187       LValues &newDefs = convert(&insn->dest);
2188       mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2189       mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2190       break;
2191    }
2192    case nir_op_bfm: {
2193       DEFAULT_CHECKS;
2194       LValues &newDefs = convert(&insn->dest);
2195       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2196       break;
2197    }
2198    case nir_op_bitfield_insert: {
2199       DEFAULT_CHECKS;
2200       LValues &newDefs = convert(&insn->dest);
2201       LValue *temp = getSSA();
2202       mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2203       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2204       break;
2205    }
2206    case nir_op_bit_count: {
2207       DEFAULT_CHECKS;
2208       LValues &newDefs = convert(&insn->dest);
2209       mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
2210       break;
2211    }
2212    case nir_op_bitfield_reverse: {
2213       DEFAULT_CHECKS;
2214       LValues &newDefs = convert(&insn->dest);
2215       mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2216       break;
2217    }
2218    case nir_op_find_lsb: {
2219       DEFAULT_CHECKS;
2220       LValues &newDefs = convert(&insn->dest);
2221       Value *tmp = getSSA();
2222       mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2223       mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2224       break;
2225    }
2226    // boolean conversions
2227    case nir_op_b2f32: {
2228       DEFAULT_CHECKS;
2229       LValues &newDefs = convert(&insn->dest);
2230       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
2231       break;
2232    }
2233    case nir_op_b2f64: {
2234       DEFAULT_CHECKS;
2235       LValues &newDefs = convert(&insn->dest);
2236       Value *tmp = getSSA(4);
2237       mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
2238       mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
2239       break;
2240    }
2241    case nir_op_f2b32:
2242    case nir_op_i2b32: {
2243       DEFAULT_CHECKS;
2244       LValues &newDefs = convert(&insn->dest);
2245       Value *src1;
2246       if (typeSizeof(sTypes[0]) == 8) {
2247          src1 = loadImm(getSSA(8), 0.0);
2248       } else {
2249          src1 = zero;
2250       }
2251       CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
2252       mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
2253       break;
2254    }
2255    case nir_op_b2i32: {
2256       DEFAULT_CHECKS;
2257       LValues &newDefs = convert(&insn->dest);
2258       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
2259       break;
2260    }
2261    case nir_op_b2i64: {
2262       DEFAULT_CHECKS;
2263       LValues &newDefs = convert(&insn->dest);
2264       LValue *def = getScratch();
2265       mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
2266       mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
2267       break;
2268    }
2269    default:
2270       ERROR("unknown nir_op %s\n", info.name);
2271       return false;
2272    }
2273
2274    if (!oldPos) {
2275       oldPos = this->bb->getEntry();
2276       oldPos->precise = insn->exact;
2277    }
2278
2279    if (unlikely(!oldPos))
2280       return true;
2281
2282    while (oldPos->next) {
2283       oldPos = oldPos->next;
2284       oldPos->precise = insn->exact;
2285    }
2286    oldPos->saturate = insn->dest.saturate;
2287
2288    return true;
2289 }
2290 #undef DEFAULT_CHECKS
2291
2292 bool
2293 Converter::run()
2294 {
2295    bool progress;
2296
2297    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
2298       nir_print_shader(nir, stderr);
2299
2300    NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
2301    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
2302    NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
2303    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2304    NIR_PASS_V(nir, nir_lower_alu_to_scalar);
2305    NIR_PASS_V(nir, nir_lower_phis_to_scalar);
2306
2307    do {
2308       progress = false;
2309       NIR_PASS(progress, nir, nir_copy_prop);
2310       NIR_PASS(progress, nir, nir_opt_remove_phis);
2311       NIR_PASS(progress, nir, nir_opt_trivial_continues);
2312       NIR_PASS(progress, nir, nir_opt_cse);
2313       NIR_PASS(progress, nir, nir_opt_algebraic);
2314       NIR_PASS(progress, nir, nir_opt_constant_folding);
2315       NIR_PASS(progress, nir, nir_copy_prop);
2316       NIR_PASS(progress, nir, nir_opt_dce);
2317       NIR_PASS(progress, nir, nir_opt_dead_cf);
2318    } while (progress);
2319
2320    NIR_PASS_V(nir, nir_lower_bool_to_int32);
2321    NIR_PASS_V(nir, nir_lower_locals_to_regs);
2322    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
2323    NIR_PASS_V(nir, nir_convert_from_ssa, true);
2324
2325    // Garbage collect dead instructions
2326    nir_sweep(nir);
2327
2328    if (!parseNIR()) {
2329       ERROR("Couldn't prase NIR!\n");
2330       return false;
2331    }
2332
2333    if (!assignSlots()) {
2334       ERROR("Couldn't assign slots!\n");
2335       return false;
2336    }
2337
2338    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
2339       nir_print_shader(nir, stderr);
2340
2341    nir_foreach_function(function, nir) {
2342       if (!visit(function))
2343          return false;
2344    }
2345
2346    return true;
2347 }
2348
2349 } // unnamed namespace
2350
2351 namespace nv50_ir {
2352
2353 bool
2354 Program::makeFromNIR(struct nv50_ir_prog_info *info)
2355 {
2356    nir_shader *nir = (nir_shader*)info->bin.source;
2357    Converter converter(this, nir, info);
2358    bool result = converter.run();
2359    if (!result)
2360       return result;
2361    LoweringHelper lowering;
2362    lowering.run(this);
2363    tlsSize = info->bin.tlsSpace;
2364    return result;
2365 }
2366
2367 } // namespace nv50_ir