src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

   1 /*
   2  * Copyright 2017 Red Hat Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * Authors: Karol Herbst <kherbst@redhat.com>
  23  */
  24
  25 #include "compiler/nir/nir.h"
  26
  27 #include "util/u_debug.h"
  28
  29 #include "codegen/nv50_ir.h"
  30 #include "codegen/nv50_ir_from_common.h"
  31 #include "codegen/nv50_ir_lowering_helper.h"
  32 #include "codegen/nv50_ir_util.h"
  33
  34 #if __cplusplus >= 201103L
  35 #include <unordered_map>
  36 #else
  37 #include <tr1/unordered_map>
  38 #endif
  39 #include <vector>
  40
  41 namespace {
  42
  43 #if __cplusplus >= 201103L
  44 using std::hash;
  45 using std::unordered_map;
  46 #else
  47 using std::tr1::hash;
  48 using std::tr1::unordered_map;
  49 #endif
  50
  51 using namespace nv50_ir;
  52
  53 int
  54 type_size(const struct glsl_type *type)
  55 {
  56    return glsl_count_attribute_slots(type, false);
  57 }
  58
  59 class Converter : public ConverterCommon
  60 {
  61 public:
  62    Converter(Program *, nir_shader *, nv50_ir_prog_info *);
  63
  64    bool run();
  65 private:
  66    typedef std::vector<LValue*> LValues;
  67    typedef unordered_map<unsigned, LValues> NirDefMap;
  68    typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
  69
  70    LValues& convert(nir_alu_dest *);
  71    BasicBlock* convert(nir_block *);
  72    LValues& convert(nir_dest *);
  73    SVSemantic convert(nir_intrinsic_op);
  74    LValues& convert(nir_register *);
  75    LValues& convert(nir_ssa_def *);
  76
  77    Value* getSrc(nir_alu_src *, uint8_t component = 0);
  78    Value* getSrc(nir_register *, uint8_t);
  79    Value* getSrc(nir_src *, uint8_t, bool indirect = false);
  80    Value* getSrc(nir_ssa_def *, uint8_t);
  81
  82    // returned value is the constant part of the given source (either the
  83    // nir_src or the selected source component of an intrinsic). Even though
  84    // this is mostly an optimization to be able to skip indirects in a few
  85    // cases, sometimes we require immediate values or set some fileds on
  86    // instructions (e.g. tex) in order for codegen to consume those.
  87    // If the found value has not a constant part, the Value gets returned
  88    // through the Value parameter.
  89    uint32_t getIndirect(nir_src *, uint8_t, Value *&);
  90    uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&);
  91
  92    uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
  93
  94    void setInterpolate(nv50_ir_varying *,
  95                        uint8_t,
  96                        bool centroid,
  97                        unsigned semantics);
  98
  99    Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
 100                          uint8_t c, Value *indirect0 = NULL,
 101                          Value *indirect1 = NULL, bool patch = false);
 102    void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
 103                 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
 104                 Value *indirect1 = NULL);
 105
 106    bool isFloatType(nir_alu_type);
 107    bool isSignedType(nir_alu_type);
 108    bool isResultFloat(nir_op);
 109    bool isResultSigned(nir_op);
 110
 111    DataType getDType(nir_alu_instr *);
 112    DataType getDType(nir_intrinsic_instr *);
 113    DataType getDType(nir_op, uint8_t);
 114
 115    std::vector<DataType> getSTypes(nir_alu_instr *);
 116    DataType getSType(nir_src &, bool isFloat, bool isSigned);
 117
 118    operation getOperation(nir_op);
 119    operation preOperationNeeded(nir_op);
 120
 121    int getSubOp(nir_op);
 122
 123    CondCode getCondCode(nir_op);
 124
 125    bool assignSlots();
 126    bool parseNIR();
 127
 128    bool visit(nir_alu_instr *);
 129    bool visit(nir_block *);
 130    bool visit(nir_cf_node *);
 131    bool visit(nir_function *);
 132    bool visit(nir_if *);
 133    bool visit(nir_instr *);
 134    bool visit(nir_intrinsic_instr *);
 135    bool visit(nir_jump_instr *);
 136    bool visit(nir_load_const_instr*);
 137    bool visit(nir_loop *);
 138    bool visit(nir_ssa_undef_instr *);
 139
 140    nir_shader *nir;
 141
 142    NirDefMap ssaDefs;
 143    NirDefMap regDefs;
 144    NirBlockMap blocks;
 145    unsigned int curLoopDepth;
 146
 147    BasicBlock *exit;
 148    Value *zero;
 149
 150    int clipVertexOutput;
 151
 152    union {
 153       struct {
 154          Value *position;
 155       } fp;
 156    };
 157 };
 158
 159 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
 160    : ConverterCommon(prog, info),
 161      nir(nir),
 162      curLoopDepth(0),
 163      clipVertexOutput(-1)
 164 {
 165    zero = mkImm((uint32_t)0);
 166 }
 167
 168 BasicBlock *
 169 Converter::convert(nir_block *block)
 170 {
 171    NirBlockMap::iterator it = blocks.find(block->index);
 172    if (it != blocks.end())
 173       return it->second;
 174
 175    BasicBlock *bb = new BasicBlock(func);
 176    blocks[block->index] = bb;
 177    return bb;
 178 }
 179
 180 bool
 181 Converter::isFloatType(nir_alu_type type)
 182 {
 183    return nir_alu_type_get_base_type(type) == nir_type_float;
 184 }
 185
 186 bool
 187 Converter::isSignedType(nir_alu_type type)
 188 {
 189    return nir_alu_type_get_base_type(type) == nir_type_int;
 190 }
 191
 192 bool
 193 Converter::isResultFloat(nir_op op)
 194 {
 195    const nir_op_info &info = nir_op_infos[op];
 196    if (info.output_type != nir_type_invalid)
 197       return isFloatType(info.output_type);
 198
 199    ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
 200    assert(false);
 201    return true;
 202 }
 203
 204 bool
 205 Converter::isResultSigned(nir_op op)
 206 {
 207    switch (op) {
 208    // there is no umul and we get wrong results if we treat all muls as signed
 209    case nir_op_imul:
 210    case nir_op_inot:
 211       return false;
 212    default:
 213       const nir_op_info &info = nir_op_infos[op];
 214       if (info.output_type != nir_type_invalid)
 215          return isSignedType(info.output_type);
 216       ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
 217       assert(false);
 218       return true;
 219    }
 220 }
 221
 222 DataType
 223 Converter::getDType(nir_alu_instr *insn)
 224 {
 225    if (insn->dest.dest.is_ssa)
 226       return getDType(insn->op, insn->dest.dest.ssa.bit_size);
 227    else
 228       return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
 229 }
 230
 231 DataType
 232 Converter::getDType(nir_intrinsic_instr *insn)
 233 {
 234    if (insn->dest.is_ssa)
 235       return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
 236    else
 237       return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
 238 }
 239
 240 DataType
 241 Converter::getDType(nir_op op, uint8_t bitSize)
 242 {
 243    DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
 244    if (ty == TYPE_NONE) {
 245       ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
 246       assert(false);
 247    }
 248    return ty;
 249 }
 250
 251 std::vector<DataType>
 252 Converter::getSTypes(nir_alu_instr *insn)
 253 {
 254    const nir_op_info &info = nir_op_infos[insn->op];
 255    std::vector<DataType> res(info.num_inputs);
 256
 257    for (uint8_t i = 0; i < info.num_inputs; ++i) {
 258       if (info.input_types[i] != nir_type_invalid) {
 259          res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
 260       } else {
 261          ERROR("getSType not implemented for %s idx %u\n", info.name, i);
 262          assert(false);
 263          res[i] = TYPE_NONE;
 264          break;
 265       }
 266    }
 267
 268    return res;
 269 }
 270
 271 DataType
 272 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
 273 {
 274    uint8_t bitSize;
 275    if (src.is_ssa)
 276       bitSize = src.ssa->bit_size;
 277    else
 278       bitSize = src.reg.reg->bit_size;
 279
 280    DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
 281    if (ty == TYPE_NONE) {
 282       const char *str;
 283       if (isFloat)
 284          str = "float";
 285       else if (isSigned)
 286          str = "int";
 287       else
 288          str = "uint";
 289       ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
 290       assert(false);
 291    }
 292    return ty;
 293 }
 294
 295 operation
 296 Converter::getOperation(nir_op op)
 297 {
 298    switch (op) {
 299    // basic ops with float and int variants
 300    case nir_op_fabs:
 301    case nir_op_iabs:
 302       return OP_ABS;
 303    case nir_op_fadd:
 304    case nir_op_iadd:
 305       return OP_ADD;
 306    case nir_op_fand:
 307    case nir_op_iand:
 308       return OP_AND;
 309    case nir_op_ifind_msb:
 310    case nir_op_ufind_msb:
 311       return OP_BFIND;
 312    case nir_op_fceil:
 313       return OP_CEIL;
 314    case nir_op_fcos:
 315       return OP_COS;
 316    case nir_op_f2f32:
 317    case nir_op_f2f64:
 318    case nir_op_f2i32:
 319    case nir_op_f2i64:
 320    case nir_op_f2u32:
 321    case nir_op_f2u64:
 322    case nir_op_i2f32:
 323    case nir_op_i2f64:
 324    case nir_op_i2i32:
 325    case nir_op_i2i64:
 326    case nir_op_u2f32:
 327    case nir_op_u2f64:
 328    case nir_op_u2u32:
 329    case nir_op_u2u64:
 330       return OP_CVT;
 331    case nir_op_fddx:
 332    case nir_op_fddx_coarse:
 333    case nir_op_fddx_fine:
 334       return OP_DFDX;
 335    case nir_op_fddy:
 336    case nir_op_fddy_coarse:
 337    case nir_op_fddy_fine:
 338       return OP_DFDY;
 339    case nir_op_fdiv:
 340    case nir_op_idiv:
 341    case nir_op_udiv:
 342       return OP_DIV;
 343    case nir_op_fexp2:
 344       return OP_EX2;
 345    case nir_op_ffloor:
 346       return OP_FLOOR;
 347    case nir_op_ffma:
 348       return OP_FMA;
 349    case nir_op_flog2:
 350       return OP_LG2;
 351    case nir_op_fmax:
 352    case nir_op_imax:
 353    case nir_op_umax:
 354       return OP_MAX;
 355    case nir_op_pack_64_2x32_split:
 356       return OP_MERGE;
 357    case nir_op_fmin:
 358    case nir_op_imin:
 359    case nir_op_umin:
 360       return OP_MIN;
 361    case nir_op_fmod:
 362    case nir_op_imod:
 363    case nir_op_umod:
 364    case nir_op_frem:
 365    case nir_op_irem:
 366       return OP_MOD;
 367    case nir_op_fmul:
 368    case nir_op_imul:
 369    case nir_op_imul_high:
 370    case nir_op_umul_high:
 371       return OP_MUL;
 372    case nir_op_fneg:
 373    case nir_op_ineg:
 374       return OP_NEG;
 375    case nir_op_fnot:
 376    case nir_op_inot:
 377       return OP_NOT;
 378    case nir_op_for:
 379    case nir_op_ior:
 380       return OP_OR;
 381    case nir_op_fpow:
 382       return OP_POW;
 383    case nir_op_frcp:
 384       return OP_RCP;
 385    case nir_op_frsq:
 386       return OP_RSQ;
 387    case nir_op_fsat:
 388       return OP_SAT;
 389    case nir_op_feq32:
 390    case nir_op_ieq32:
 391    case nir_op_fge32:
 392    case nir_op_ige32:
 393    case nir_op_uge32:
 394    case nir_op_flt32:
 395    case nir_op_ilt32:
 396    case nir_op_ult32:
 397    case nir_op_fne32:
 398    case nir_op_ine32:
 399       return OP_SET;
 400    case nir_op_ishl:
 401       return OP_SHL;
 402    case nir_op_ishr:
 403    case nir_op_ushr:
 404       return OP_SHR;
 405    case nir_op_fsin:
 406       return OP_SIN;
 407    case nir_op_fsqrt:
 408       return OP_SQRT;
 409    case nir_op_fsub:
 410    case nir_op_isub:
 411       return OP_SUB;
 412    case nir_op_ftrunc:
 413       return OP_TRUNC;
 414    case nir_op_fxor:
 415    case nir_op_ixor:
 416       return OP_XOR;
 417    default:
 418       ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
 419       assert(false);
 420       return OP_NOP;
 421    }
 422 }
 423
 424 operation
 425 Converter::preOperationNeeded(nir_op op)
 426 {
 427    switch (op) {
 428    case nir_op_fcos:
 429    case nir_op_fsin:
 430       return OP_PRESIN;
 431    default:
 432       return OP_NOP;
 433    }
 434 }
 435
 436 int
 437 Converter::getSubOp(nir_op op)
 438 {
 439    switch (op) {
 440    case nir_op_imul_high:
 441    case nir_op_umul_high:
 442       return NV50_IR_SUBOP_MUL_HIGH;
 443    default:
 444       return 0;
 445    }
 446 }
 447
 448 CondCode
 449 Converter::getCondCode(nir_op op)
 450 {
 451    switch (op) {
 452    case nir_op_feq32:
 453    case nir_op_ieq32:
 454       return CC_EQ;
 455    case nir_op_fge32:
 456    case nir_op_ige32:
 457    case nir_op_uge32:
 458       return CC_GE;
 459    case nir_op_flt32:
 460    case nir_op_ilt32:
 461    case nir_op_ult32:
 462       return CC_LT;
 463    case nir_op_fne32:
 464       return CC_NEU;
 465    case nir_op_ine32:
 466       return CC_NE;
 467    default:
 468       ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
 469       assert(false);
 470       return CC_FL;
 471    }
 472 }
 473
 474 Converter::LValues&
 475 Converter::convert(nir_alu_dest *dest)
 476 {
 477    return convert(&dest->dest);
 478 }
 479
 480 Converter::LValues&
 481 Converter::convert(nir_dest *dest)
 482 {
 483    if (dest->is_ssa)
 484       return convert(&dest->ssa);
 485    if (dest->reg.indirect) {
 486       ERROR("no support for indirects.");
 487       assert(false);
 488    }
 489    return convert(dest->reg.reg);
 490 }
 491
 492 Converter::LValues&
 493 Converter::convert(nir_register *reg)
 494 {
 495    NirDefMap::iterator it = regDefs.find(reg->index);
 496    if (it != regDefs.end())
 497       return it->second;
 498
 499    LValues newDef(reg->num_components);
 500    for (uint8_t i = 0; i < reg->num_components; i++)
 501       newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
 502    return regDefs[reg->index] = newDef;
 503 }
 504
 505 Converter::LValues&
 506 Converter::convert(nir_ssa_def *def)
 507 {
 508    NirDefMap::iterator it = ssaDefs.find(def->index);
 509    if (it != ssaDefs.end())
 510       return it->second;
 511
 512    LValues newDef(def->num_components);
 513    for (uint8_t i = 0; i < def->num_components; i++)
 514       newDef[i] = getSSA(std::max(4, def->bit_size / 8));
 515    return ssaDefs[def->index] = newDef;
 516 }
 517
 518 Value*
 519 Converter::getSrc(nir_alu_src *src, uint8_t component)
 520 {
 521    if (src->abs || src->negate) {
 522       ERROR("modifiers currently not supported on nir_alu_src\n");
 523       assert(false);
 524    }
 525    return getSrc(&src->src, src->swizzle[component]);
 526 }
 527
 528 Value*
 529 Converter::getSrc(nir_register *reg, uint8_t idx)
 530 {
 531    NirDefMap::iterator it = regDefs.find(reg->index);
 532    if (it == regDefs.end())
 533       return convert(reg)[idx];
 534    return it->second[idx];
 535 }
 536
 537 Value*
 538 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
 539 {
 540    if (src->is_ssa)
 541       return getSrc(src->ssa, idx);
 542
 543    if (src->reg.indirect) {
 544       if (indirect)
 545          return getSrc(src->reg.indirect, idx);
 546       ERROR("no support for indirects.");
 547       assert(false);
 548       return NULL;
 549    }
 550
 551    return getSrc(src->reg.reg, idx);
 552 }
 553
 554 Value*
 555 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
 556 {
 557    NirDefMap::iterator it = ssaDefs.find(src->index);
 558    if (it == ssaDefs.end()) {
 559       ERROR("SSA value %u not found\n", src->index);
 560       assert(false);
 561       return NULL;
 562    }
 563    return it->second[idx];
 564 }
 565
 566 uint32_t
 567 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
 568 {
 569    nir_const_value *offset = nir_src_as_const_value(*src);
 570
 571    if (offset) {
 572       indirect = NULL;
 573       return offset->u32[0];
 574    }
 575
 576    indirect = getSrc(src, idx, true);
 577    return 0;
 578 }
 579
 580 uint32_t
 581 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect)
 582 {
 583    int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
 584    if (indirect)
 585       indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
 586    return idx;
 587 }
 588
 589 static void
 590 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
 591 {
 592    assert(name && index);
 593
 594    if (slot >= VERT_ATTRIB_MAX) {
 595       ERROR("invalid varying slot %u\n", slot);
 596       assert(false);
 597       return;
 598    }
 599
 600    if (slot >= VERT_ATTRIB_GENERIC0 &&
 601        slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
 602       *name = TGSI_SEMANTIC_GENERIC;
 603       *index = slot - VERT_ATTRIB_GENERIC0;
 604       return;
 605    }
 606
 607    if (slot >= VERT_ATTRIB_TEX0 &&
 608        slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
 609       *name = TGSI_SEMANTIC_TEXCOORD;
 610       *index = slot - VERT_ATTRIB_TEX0;
 611       return;
 612    }
 613
 614    switch (slot) {
 615    case VERT_ATTRIB_COLOR0:
 616       *name = TGSI_SEMANTIC_COLOR;
 617       *index = 0;
 618       break;
 619    case VERT_ATTRIB_COLOR1:
 620       *name = TGSI_SEMANTIC_COLOR;
 621       *index = 1;
 622       break;
 623    case VERT_ATTRIB_EDGEFLAG:
 624       *name = TGSI_SEMANTIC_EDGEFLAG;
 625       *index = 0;
 626       break;
 627    case VERT_ATTRIB_FOG:
 628       *name = TGSI_SEMANTIC_FOG;
 629       *index = 0;
 630       break;
 631    case VERT_ATTRIB_NORMAL:
 632       *name = TGSI_SEMANTIC_NORMAL;
 633       *index = 0;
 634       break;
 635    case VERT_ATTRIB_POS:
 636       *name = TGSI_SEMANTIC_POSITION;
 637       *index = 0;
 638       break;
 639    case VERT_ATTRIB_POINT_SIZE:
 640       *name = TGSI_SEMANTIC_PSIZE;
 641       *index = 0;
 642       break;
 643    default:
 644       ERROR("unknown vert attrib slot %u\n", slot);
 645       assert(false);
 646       break;
 647    }
 648 }
 649
 650 static void
 651 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
 652 {
 653    assert(name && index);
 654
 655    if (slot >= VARYING_SLOT_TESS_MAX) {
 656       ERROR("invalid varying slot %u\n", slot);
 657       assert(false);
 658       return;
 659    }
 660
 661    if (slot >= VARYING_SLOT_PATCH0) {
 662       *name = TGSI_SEMANTIC_PATCH;
 663       *index = slot - VARYING_SLOT_PATCH0;
 664       return;
 665    }
 666
 667    if (slot >= VARYING_SLOT_VAR0) {
 668       *name = TGSI_SEMANTIC_GENERIC;
 669       *index = slot - VARYING_SLOT_VAR0;
 670       return;
 671    }
 672
 673    if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
 674       *name = TGSI_SEMANTIC_TEXCOORD;
 675       *index = slot - VARYING_SLOT_TEX0;
 676       return;
 677    }
 678
 679    switch (slot) {
 680    case VARYING_SLOT_BFC0:
 681       *name = TGSI_SEMANTIC_BCOLOR;
 682       *index = 0;
 683       break;
 684    case VARYING_SLOT_BFC1:
 685       *name = TGSI_SEMANTIC_BCOLOR;
 686       *index = 1;
 687       break;
 688    case VARYING_SLOT_CLIP_DIST0:
 689       *name = TGSI_SEMANTIC_CLIPDIST;
 690       *index = 0;
 691       break;
 692    case VARYING_SLOT_CLIP_DIST1:
 693       *name = TGSI_SEMANTIC_CLIPDIST;
 694       *index = 1;
 695       break;
 696    case VARYING_SLOT_CLIP_VERTEX:
 697       *name = TGSI_SEMANTIC_CLIPVERTEX;
 698       *index = 0;
 699       break;
 700    case VARYING_SLOT_COL0:
 701       *name = TGSI_SEMANTIC_COLOR;
 702       *index = 0;
 703       break;
 704    case VARYING_SLOT_COL1:
 705       *name = TGSI_SEMANTIC_COLOR;
 706       *index = 1;
 707       break;
 708    case VARYING_SLOT_EDGE:
 709       *name = TGSI_SEMANTIC_EDGEFLAG;
 710       *index = 0;
 711       break;
 712    case VARYING_SLOT_FACE:
 713       *name = TGSI_SEMANTIC_FACE;
 714       *index = 0;
 715       break;
 716    case VARYING_SLOT_FOGC:
 717       *name = TGSI_SEMANTIC_FOG;
 718       *index = 0;
 719       break;
 720    case VARYING_SLOT_LAYER:
 721       *name = TGSI_SEMANTIC_LAYER;
 722       *index = 0;
 723       break;
 724    case VARYING_SLOT_PNTC:
 725       *name = TGSI_SEMANTIC_PCOORD;
 726       *index = 0;
 727       break;
 728    case VARYING_SLOT_POS:
 729       *name = TGSI_SEMANTIC_POSITION;
 730       *index = 0;
 731       break;
 732    case VARYING_SLOT_PRIMITIVE_ID:
 733       *name = TGSI_SEMANTIC_PRIMID;
 734       *index = 0;
 735       break;
 736    case VARYING_SLOT_PSIZ:
 737       *name = TGSI_SEMANTIC_PSIZE;
 738       *index = 0;
 739       break;
 740    case VARYING_SLOT_TESS_LEVEL_INNER:
 741       *name = TGSI_SEMANTIC_TESSINNER;
 742       *index = 0;
 743       break;
 744    case VARYING_SLOT_TESS_LEVEL_OUTER:
 745       *name = TGSI_SEMANTIC_TESSOUTER;
 746       *index = 0;
 747       break;
 748    case VARYING_SLOT_VIEWPORT:
 749       *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
 750       *index = 0;
 751       break;
 752    default:
 753       ERROR("unknown varying slot %u\n", slot);
 754       assert(false);
 755       break;
 756    }
 757 }
 758
 759 static void
 760 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
 761 {
 762    if (slot >= FRAG_RESULT_DATA0) {
 763       *name = TGSI_SEMANTIC_COLOR;
 764       *index = slot - FRAG_RESULT_COLOR - 2; // intentional
 765       return;
 766    }
 767
 768    switch (slot) {
 769    case FRAG_RESULT_COLOR:
 770       *name = TGSI_SEMANTIC_COLOR;
 771       *index = 0;
 772       break;
 773    case FRAG_RESULT_DEPTH:
 774       *name = TGSI_SEMANTIC_POSITION;
 775       *index = 0;
 776       break;
 777    case FRAG_RESULT_SAMPLE_MASK:
 778       *name = TGSI_SEMANTIC_SAMPLEMASK;
 779       *index = 0;
 780       break;
 781    default:
 782       ERROR("unknown frag result slot %u\n", slot);
 783       assert(false);
 784       break;
 785    }
 786 }
 787
 788 // copy of _mesa_sysval_to_semantic
 789 static void
 790 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
 791 {
 792    *index = 0;
 793    switch (val) {
 794    // Vertex shader
 795    case SYSTEM_VALUE_VERTEX_ID:
 796       *name = TGSI_SEMANTIC_VERTEXID;
 797       break;
 798    case SYSTEM_VALUE_INSTANCE_ID:
 799       *name = TGSI_SEMANTIC_INSTANCEID;
 800       break;
 801    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
 802       *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
 803       break;
 804    case SYSTEM_VALUE_BASE_VERTEX:
 805       *name = TGSI_SEMANTIC_BASEVERTEX;
 806       break;
 807    case SYSTEM_VALUE_BASE_INSTANCE:
 808       *name = TGSI_SEMANTIC_BASEINSTANCE;
 809       break;
 810    case SYSTEM_VALUE_DRAW_ID:
 811       *name = TGSI_SEMANTIC_DRAWID;
 812       break;
 813
 814    // Geometry shader
 815    case SYSTEM_VALUE_INVOCATION_ID:
 816       *name = TGSI_SEMANTIC_INVOCATIONID;
 817       break;
 818
 819    // Fragment shader
 820    case SYSTEM_VALUE_FRAG_COORD:
 821       *name = TGSI_SEMANTIC_POSITION;
 822       break;
 823    case SYSTEM_VALUE_FRONT_FACE:
 824       *name = TGSI_SEMANTIC_FACE;
 825       break;
 826    case SYSTEM_VALUE_SAMPLE_ID:
 827       *name = TGSI_SEMANTIC_SAMPLEID;
 828       break;
 829    case SYSTEM_VALUE_SAMPLE_POS:
 830       *name = TGSI_SEMANTIC_SAMPLEPOS;
 831       break;
 832    case SYSTEM_VALUE_SAMPLE_MASK_IN:
 833       *name = TGSI_SEMANTIC_SAMPLEMASK;
 834       break;
 835    case SYSTEM_VALUE_HELPER_INVOCATION:
 836       *name = TGSI_SEMANTIC_HELPER_INVOCATION;
 837       break;
 838
 839    // Tessellation shader
 840    case SYSTEM_VALUE_TESS_COORD:
 841       *name = TGSI_SEMANTIC_TESSCOORD;
 842       break;
 843    case SYSTEM_VALUE_VERTICES_IN:
 844       *name = TGSI_SEMANTIC_VERTICESIN;
 845       break;
 846    case SYSTEM_VALUE_PRIMITIVE_ID:
 847       *name = TGSI_SEMANTIC_PRIMID;
 848       break;
 849    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
 850       *name = TGSI_SEMANTIC_TESSOUTER;
 851       break;
 852    case SYSTEM_VALUE_TESS_LEVEL_INNER:
 853       *name = TGSI_SEMANTIC_TESSINNER;
 854       break;
 855
 856    // Compute shader
 857    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
 858       *name = TGSI_SEMANTIC_THREAD_ID;
 859       break;
 860    case SYSTEM_VALUE_WORK_GROUP_ID:
 861       *name = TGSI_SEMANTIC_BLOCK_ID;
 862       break;
 863    case SYSTEM_VALUE_NUM_WORK_GROUPS:
 864       *name = TGSI_SEMANTIC_GRID_SIZE;
 865       break;
 866    case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
 867       *name = TGSI_SEMANTIC_BLOCK_SIZE;
 868       break;
 869
 870    // ARB_shader_ballot
 871    case SYSTEM_VALUE_SUBGROUP_SIZE:
 872       *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
 873       break;
 874    case SYSTEM_VALUE_SUBGROUP_INVOCATION:
 875       *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
 876       break;
 877    case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
 878       *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
 879       break;
 880    case SYSTEM_VALUE_SUBGROUP_GE_MASK:
 881       *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
 882       break;
 883    case SYSTEM_VALUE_SUBGROUP_GT_MASK:
 884       *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
 885       break;
 886    case SYSTEM_VALUE_SUBGROUP_LE_MASK:
 887       *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
 888       break;
 889    case SYSTEM_VALUE_SUBGROUP_LT_MASK:
 890       *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
 891       break;
 892
 893    default:
 894       ERROR("unknown system value %u\n", val);
 895       assert(false);
 896       break;
 897    }
 898 }
 899
 900 void
 901 Converter::setInterpolate(nv50_ir_varying *var,
 902                           uint8_t mode,
 903                           bool centroid,
 904                           unsigned semantic)
 905 {
 906    switch (mode) {
 907    case INTERP_MODE_FLAT:
 908       var->flat = 1;
 909       break;
 910    case INTERP_MODE_NONE:
 911       if (semantic == TGSI_SEMANTIC_COLOR)
 912          var->sc = 1;
 913       else if (semantic == TGSI_SEMANTIC_POSITION)
 914          var->linear = 1;
 915       break;
 916    case INTERP_MODE_NOPERSPECTIVE:
 917       var->linear = 1;
 918       break;
 919    case INTERP_MODE_SMOOTH:
 920       break;
 921    }
 922    var->centroid = centroid;
 923 }
 924
 925 static uint16_t
 926 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
 927           bool input, const nir_variable *var)
 928 {
 929    if (!type->is_array())
 930       return type->count_attribute_slots(false);
 931
 932    uint16_t slots;
 933    switch (stage) {
 934    case Program::TYPE_GEOMETRY:
 935       slots = type->uniform_locations();
 936       if (input)
 937          slots /= info.gs.vertices_in;
 938       break;
 939    case Program::TYPE_TESSELLATION_CONTROL:
 940    case Program::TYPE_TESSELLATION_EVAL:
 941       // remove first dimension
 942       if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
 943          slots = type->uniform_locations();
 944       else
 945          slots = type->fields.array->uniform_locations();
 946       break;
 947    default:
 948       slots = type->count_attribute_slots(false);
 949       break;
 950    }
 951
 952    return slots;
 953 }
 954
 955 bool Converter::assignSlots() {
 956    unsigned name;
 957    unsigned index;
 958
 959    info->io.viewportId = -1;
 960    info->numInputs = 0;
 961
 962    // we have to fixup the uniform locations for arrays
 963    unsigned numImages = 0;
 964    nir_foreach_variable(var, &nir->uniforms) {
 965       const glsl_type *type = var->type;
 966       if (!type->without_array()->is_image())
 967          continue;
 968       var->data.driver_location = numImages;
 969       numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
 970    }
 971
 972    nir_foreach_variable(var, &nir->inputs) {
 973       const glsl_type *type = var->type;
 974       int slot = var->data.location;
 975       uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
 976       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
 977                                        : type->component_slots();
 978       uint32_t frac = var->data.location_frac;
 979       uint32_t vary = var->data.driver_location;
 980
 981       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
 982          if (comp > 2)
 983             slots *= 2;
 984       }
 985
 986       assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
 987
 988       switch(prog->getType()) {
 989       case Program::TYPE_FRAGMENT:
 990          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
 991          for (uint16_t i = 0; i < slots; ++i) {
 992             setInterpolate(&info->in[vary + i], var->data.interpolation,
 993                            var->data.centroid | var->data.sample, name);
 994          }
 995          break;
 996       case Program::TYPE_GEOMETRY:
 997          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
 998          break;
 999       case Program::TYPE_TESSELLATION_CONTROL:
1000       case Program::TYPE_TESSELLATION_EVAL:
1001          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1002          if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
1003             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1004          break;
1005       case Program::TYPE_VERTEX:
1006          vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1007          switch (name) {
1008          case TGSI_SEMANTIC_EDGEFLAG:
1009             info->io.edgeFlagIn = vary;
1010             break;
1011          default:
1012             break;
1013          }
1014          break;
1015       default:
1016          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1017          return false;
1018       }
1019
1020       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1021          info->in[vary].id = vary;
1022          info->in[vary].patch = var->data.patch;
1023          info->in[vary].sn = name;
1024          info->in[vary].si = index + i;
1025          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1026             if (i & 0x1)
1027                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1028             else
1029                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1030          else
1031             info->in[vary].mask |= ((1 << comp) - 1) << frac;
1032       }
1033       info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1034    }
1035
1036    info->numOutputs = 0;
1037    nir_foreach_variable(var, &nir->outputs) {
1038       const glsl_type *type = var->type;
1039       int slot = var->data.location;
1040       uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1041       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1042                                        : type->component_slots();
1043       uint32_t frac = var->data.location_frac;
1044       uint32_t vary = var->data.driver_location;
1045
1046       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1047          if (comp > 2)
1048             slots *= 2;
1049       }
1050
1051       assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1052
1053       switch(prog->getType()) {
1054       case Program::TYPE_FRAGMENT:
1055          frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1056          switch (name) {
1057          case TGSI_SEMANTIC_COLOR:
1058             if (!var->data.fb_fetch_output)
1059                info->prop.fp.numColourResults++;
1060             info->prop.fp.separateFragData = true;
1061             // sometimes we get FRAG_RESULT_DATAX with data.index 0
1062             // sometimes we get FRAG_RESULT_DATA0 with data.index X
1063             index = index == 0 ? var->data.index : index;
1064             break;
1065          case TGSI_SEMANTIC_POSITION:
1066             info->io.fragDepth = vary;
1067             info->prop.fp.writesDepth = true;
1068             break;
1069          case TGSI_SEMANTIC_SAMPLEMASK:
1070             info->io.sampleMask = vary;
1071             break;
1072          default:
1073             break;
1074          }
1075          break;
1076       case Program::TYPE_GEOMETRY:
1077       case Program::TYPE_TESSELLATION_CONTROL:
1078       case Program::TYPE_TESSELLATION_EVAL:
1079       case Program::TYPE_VERTEX:
1080          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1081
1082          if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1083              name != TGSI_SEMANTIC_TESSOUTER)
1084             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1085
1086          switch (name) {
1087          case TGSI_SEMANTIC_CLIPDIST:
1088             info->io.genUserClip = -1;
1089             break;
1090          case TGSI_SEMANTIC_CLIPVERTEX:
1091             clipVertexOutput = vary;
1092             break;
1093          case TGSI_SEMANTIC_EDGEFLAG:
1094             info->io.edgeFlagOut = vary;
1095             break;
1096          case TGSI_SEMANTIC_POSITION:
1097             if (clipVertexOutput < 0)
1098                clipVertexOutput = vary;
1099             break;
1100          default:
1101             break;
1102          }
1103          break;
1104       default:
1105          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1106          return false;
1107       }
1108
1109       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1110          info->out[vary].id = vary;
1111          info->out[vary].patch = var->data.patch;
1112          info->out[vary].sn = name;
1113          info->out[vary].si = index + i;
1114          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1115             if (i & 0x1)
1116                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1117             else
1118                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1119          else
1120             info->out[vary].mask |= ((1 << comp) - 1) << frac;
1121
1122          if (nir->info.outputs_read & 1ll << slot)
1123             info->out[vary].oread = 1;
1124       }
1125       info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1126    }
1127
1128    info->numSysVals = 0;
1129    for (uint8_t i = 0; i < 64; ++i) {
1130       if (!(nir->info.system_values_read & 1ll << i))
1131          continue;
1132
1133       system_val_to_tgsi_semantic(i, &name, &index);
1134       info->sv[info->numSysVals].sn = name;
1135       info->sv[info->numSysVals].si = index;
1136       info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1137
1138       switch (i) {
1139       case SYSTEM_VALUE_INSTANCE_ID:
1140          info->io.instanceId = info->numSysVals;
1141          break;
1142       case SYSTEM_VALUE_TESS_LEVEL_INNER:
1143       case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1144          info->sv[info->numSysVals].patch = 1;
1145          break;
1146       case SYSTEM_VALUE_VERTEX_ID:
1147          info->io.vertexId = info->numSysVals;
1148          break;
1149       default:
1150          break;
1151       }
1152
1153       info->numSysVals += 1;
1154    }
1155
1156    if (info->io.genUserClip > 0) {
1157       info->io.clipDistances = info->io.genUserClip;
1158
1159       const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1160
1161       for (unsigned int n = 0; n < nOut; ++n) {
1162          unsigned int i = info->numOutputs++;
1163          info->out[i].id = i;
1164          info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1165          info->out[i].si = n;
1166          info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1167       }
1168    }
1169
1170    return info->assignSlots(info) == 0;
1171 }
1172
1173 uint32_t
1174 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1175 {
1176    DataType ty;
1177    int offset = nir_intrinsic_component(insn);
1178    bool input;
1179
1180    if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1181       ty = getDType(insn);
1182    else
1183       ty = getSType(insn->src[0], false, false);
1184
1185    switch (insn->intrinsic) {
1186    case nir_intrinsic_load_input:
1187    case nir_intrinsic_load_interpolated_input:
1188    case nir_intrinsic_load_per_vertex_input:
1189       input = true;
1190       break;
1191    case nir_intrinsic_load_output:
1192    case nir_intrinsic_load_per_vertex_output:
1193    case nir_intrinsic_store_output:
1194    case nir_intrinsic_store_per_vertex_output:
1195       input = false;
1196       break;
1197    default:
1198       ERROR("unknown intrinsic in getSlotAddress %s",
1199             nir_intrinsic_infos[insn->intrinsic].name);
1200       input = false;
1201       assert(false);
1202       break;
1203    }
1204
1205    if (typeSizeof(ty) == 8) {
1206       slot *= 2;
1207       slot += offset;
1208       if (slot >= 4) {
1209          idx += 1;
1210          slot -= 4;
1211       }
1212    } else {
1213       slot += offset;
1214    }
1215
1216    assert(slot < 4);
1217    assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1218    assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1219
1220    const nv50_ir_varying *vary = input ? info->in : info->out;
1221    return vary[idx].slot[slot] * 4;
1222 }
1223
1224 Instruction *
1225 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1226                     uint32_t base, uint8_t c, Value *indirect0,
1227                     Value *indirect1, bool patch)
1228 {
1229    unsigned int tySize = typeSizeof(ty);
1230
1231    if (tySize == 8 &&
1232        (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1233       Value *lo = getSSA();
1234       Value *hi = getSSA();
1235
1236       Instruction *loi =
1237          mkLoad(TYPE_U32, lo,
1238                 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1239                 indirect0);
1240       loi->setIndirect(0, 1, indirect1);
1241       loi->perPatch = patch;
1242
1243       Instruction *hii =
1244          mkLoad(TYPE_U32, hi,
1245                 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1246                 indirect0);
1247       hii->setIndirect(0, 1, indirect1);
1248       hii->perPatch = patch;
1249
1250       return mkOp2(OP_MERGE, ty, def, lo, hi);
1251    } else {
1252       Instruction *ld =
1253          mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1254       ld->setIndirect(0, 1, indirect1);
1255       ld->perPatch = patch;
1256       return ld;
1257    }
1258 }
1259
1260 void
1261 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1262                    DataType ty, Value *src, uint8_t idx, uint8_t c,
1263                    Value *indirect0, Value *indirect1)
1264 {
1265    uint8_t size = typeSizeof(ty);
1266    uint32_t address = getSlotAddress(insn, idx, c);
1267
1268    if (size == 8 && indirect0) {
1269       Value *split[2];
1270       mkSplit(split, 4, src);
1271
1272       if (op == OP_EXPORT) {
1273          split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1274          split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1275       }
1276
1277       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1278               split[0])->perPatch = info->out[idx].patch;
1279       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1280               split[1])->perPatch = info->out[idx].patch;
1281    } else {
1282       if (op == OP_EXPORT)
1283          src = mkMov(getSSA(size), src, ty)->getDef(0);
1284       mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1285               src)->perPatch = info->out[idx].patch;
1286    }
1287 }
1288
1289 bool
1290 Converter::parseNIR()
1291 {
1292    info->io.clipDistances = nir->info.clip_distance_array_size;
1293    info->io.cullDistances = nir->info.cull_distance_array_size;
1294
1295    switch(prog->getType()) {
1296    case Program::TYPE_COMPUTE:
1297       info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1298       info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1299       info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1300       info->bin.smemSize = nir->info.cs.shared_size;
1301       break;
1302    case Program::TYPE_FRAGMENT:
1303       info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1304       info->prop.fp.persampleInvocation =
1305          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1306          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1307       info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1308       info->prop.fp.readsSampleLocations =
1309          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1310       info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1311       info->prop.fp.usesSampleMaskIn =
1312          !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1313       break;
1314    case Program::TYPE_GEOMETRY:
1315       info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1316       info->prop.gp.instanceCount = nir->info.gs.invocations;
1317       info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1318       info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1319       break;
1320    case Program::TYPE_TESSELLATION_CONTROL:
1321    case Program::TYPE_TESSELLATION_EVAL:
1322       if (nir->info.tess.primitive_mode == GL_ISOLINES)
1323          info->prop.tp.domain = GL_LINES;
1324       else
1325          info->prop.tp.domain = nir->info.tess.primitive_mode;
1326       info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1327       info->prop.tp.outputPrim =
1328          nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1329       info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1330       info->prop.tp.winding = !nir->info.tess.ccw;
1331       break;
1332    case Program::TYPE_VERTEX:
1333       info->prop.vp.usesDrawParameters =
1334          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1335          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1336          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1337       break;
1338    default:
1339       break;
1340    }
1341
1342    return true;
1343 }
1344
1345 bool
1346 Converter::visit(nir_function *function)
1347 {
1348    // we only support emiting the main function for now
1349    assert(!strcmp(function->name, "main"));
1350    assert(function->impl);
1351
1352    // usually the blocks will set everything up, but main is special
1353    BasicBlock *entry = new BasicBlock(prog->main);
1354    exit = new BasicBlock(prog->main);
1355    blocks[nir_start_block(function->impl)->index] = entry;
1356    prog->main->setEntry(entry);
1357    prog->main->setExit(exit);
1358
1359    setPosition(entry, true);
1360
1361    if (info->io.genUserClip > 0) {
1362       for (int c = 0; c < 4; ++c)
1363          clipVtx[c] = getScratch();
1364    }
1365
1366    switch (prog->getType()) {
1367    case Program::TYPE_TESSELLATION_CONTROL:
1368       outBase = mkOp2v(
1369          OP_SUB, TYPE_U32, getSSA(),
1370          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1371          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1372       break;
1373    case Program::TYPE_FRAGMENT: {
1374       Symbol *sv = mkSysVal(SV_POSITION, 3);
1375       fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1376       fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1377       break;
1378    }
1379    default:
1380       break;
1381    }
1382
1383    nir_index_ssa_defs(function->impl);
1384    foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1385       if (!visit(node))
1386          return false;
1387    }
1388
1389    bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1390    setPosition(exit, true);
1391
1392    if (info->io.genUserClip > 0)
1393       handleUserClipPlanes();
1394
1395    // TODO: for non main function this needs to be a OP_RETURN
1396    mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1397    return true;
1398 }
1399
1400 bool
1401 Converter::visit(nir_cf_node *node)
1402 {
1403    switch (node->type) {
1404    case nir_cf_node_block:
1405       return visit(nir_cf_node_as_block(node));
1406    case nir_cf_node_if:
1407       return visit(nir_cf_node_as_if(node));
1408    case nir_cf_node_loop:
1409       return visit(nir_cf_node_as_loop(node));
1410    default:
1411       ERROR("unknown nir_cf_node type %u\n", node->type);
1412       return false;
1413    }
1414 }
1415
1416 bool
1417 Converter::visit(nir_block *block)
1418 {
1419    if (!block->predecessors->entries && block->instr_list.is_empty())
1420       return true;
1421
1422    BasicBlock *bb = convert(block);
1423
1424    setPosition(bb, true);
1425    nir_foreach_instr(insn, block) {
1426       if (!visit(insn))
1427          return false;
1428    }
1429    return true;
1430 }
1431
1432 bool
1433 Converter::visit(nir_if *nif)
1434 {
1435    DataType sType = getSType(nif->condition, false, false);
1436    Value *src = getSrc(&nif->condition, 0);
1437
1438    nir_block *lastThen = nir_if_last_then_block(nif);
1439    nir_block *lastElse = nir_if_last_else_block(nif);
1440
1441    assert(!lastThen->successors[1]);
1442    assert(!lastElse->successors[1]);
1443
1444    BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1445    BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1446
1447    bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1448    bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1449
1450    // we only insert joinats, if both nodes end up at the end of the if again.
1451    // the reason for this to not happens are breaks/continues/ret/... which
1452    // have their own handling
1453    if (lastThen->successors[0] == lastElse->successors[0])
1454       bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1455                           CC_ALWAYS, NULL);
1456
1457    mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1458
1459    foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1460       if (!visit(node))
1461          return false;
1462    }
1463    setPosition(convert(lastThen), true);
1464    if (!bb->getExit() ||
1465        !bb->getExit()->asFlow() ||
1466         bb->getExit()->asFlow()->op == OP_JOIN) {
1467       BasicBlock *tailBB = convert(lastThen->successors[0]);
1468       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1469       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1470    }
1471
1472    foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1473       if (!visit(node))
1474          return false;
1475    }
1476    setPosition(convert(lastElse), true);
1477    if (!bb->getExit() ||
1478        !bb->getExit()->asFlow() ||
1479         bb->getExit()->asFlow()->op == OP_JOIN) {
1480       BasicBlock *tailBB = convert(lastElse->successors[0]);
1481       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1482       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1483    }
1484
1485    if (lastThen->successors[0] == lastElse->successors[0]) {
1486       setPosition(convert(lastThen->successors[0]), true);
1487       mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1488    }
1489
1490    return true;
1491 }
1492
1493 bool
1494 Converter::visit(nir_loop *loop)
1495 {
1496    curLoopDepth += 1;
1497    func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1498
1499    BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1500    BasicBlock *tailBB =
1501       convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1502    bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1503
1504    mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1505    setPosition(loopBB, false);
1506    mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1507
1508    foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1509       if (!visit(node))
1510          return false;
1511    }
1512    Instruction *insn = bb->getExit();
1513    if (bb->cfg.incidentCount() != 0) {
1514       if (!insn || !insn->asFlow()) {
1515          mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1516          bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1517       } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1518                  tailBB->cfg.incidentCount() == 0) {
1519          // RA doesn't like having blocks around with no incident edge,
1520          // so we create a fake one to make it happy
1521          bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1522       }
1523    }
1524
1525    curLoopDepth -= 1;
1526
1527    return true;
1528 }
1529
1530 bool
1531 Converter::visit(nir_instr *insn)
1532 {
1533    switch (insn->type) {
1534    case nir_instr_type_alu:
1535       return visit(nir_instr_as_alu(insn));
1536    case nir_instr_type_intrinsic:
1537       return visit(nir_instr_as_intrinsic(insn));
1538    case nir_instr_type_jump:
1539       return visit(nir_instr_as_jump(insn));
1540    case nir_instr_type_load_const:
1541       return visit(nir_instr_as_load_const(insn));
1542    case nir_instr_type_ssa_undef:
1543       return visit(nir_instr_as_ssa_undef(insn));
1544    default:
1545       ERROR("unknown nir_instr type %u\n", insn->type);
1546       return false;
1547    }
1548    return true;
1549 }
1550
1551 SVSemantic
1552 Converter::convert(nir_intrinsic_op intr)
1553 {
1554    switch (intr) {
1555    case nir_intrinsic_load_base_vertex:
1556       return SV_BASEVERTEX;
1557    case nir_intrinsic_load_base_instance:
1558       return SV_BASEINSTANCE;
1559    case nir_intrinsic_load_draw_id:
1560       return SV_DRAWID;
1561    case nir_intrinsic_load_front_face:
1562       return SV_FACE;
1563    case nir_intrinsic_load_helper_invocation:
1564       return SV_THREAD_KILL;
1565    case nir_intrinsic_load_instance_id:
1566       return SV_INSTANCE_ID;
1567    case nir_intrinsic_load_invocation_id:
1568       return SV_INVOCATION_ID;
1569    case nir_intrinsic_load_local_group_size:
1570       return SV_NTID;
1571    case nir_intrinsic_load_local_invocation_id:
1572       return SV_TID;
1573    case nir_intrinsic_load_num_work_groups:
1574       return SV_NCTAID;
1575    case nir_intrinsic_load_patch_vertices_in:
1576       return SV_VERTEX_COUNT;
1577    case nir_intrinsic_load_primitive_id:
1578       return SV_PRIMITIVE_ID;
1579    case nir_intrinsic_load_sample_id:
1580       return SV_SAMPLE_INDEX;
1581    case nir_intrinsic_load_sample_mask_in:
1582       return SV_SAMPLE_MASK;
1583    case nir_intrinsic_load_sample_pos:
1584       return SV_SAMPLE_POS;
1585    case nir_intrinsic_load_subgroup_eq_mask:
1586       return SV_LANEMASK_EQ;
1587    case nir_intrinsic_load_subgroup_ge_mask:
1588       return SV_LANEMASK_GE;
1589    case nir_intrinsic_load_subgroup_gt_mask:
1590       return SV_LANEMASK_GT;
1591    case nir_intrinsic_load_subgroup_le_mask:
1592       return SV_LANEMASK_LE;
1593    case nir_intrinsic_load_subgroup_lt_mask:
1594       return SV_LANEMASK_LT;
1595    case nir_intrinsic_load_subgroup_invocation:
1596       return SV_LANEID;
1597    case nir_intrinsic_load_tess_coord:
1598       return SV_TESS_COORD;
1599    case nir_intrinsic_load_tess_level_inner:
1600       return SV_TESS_INNER;
1601    case nir_intrinsic_load_tess_level_outer:
1602       return SV_TESS_OUTER;
1603    case nir_intrinsic_load_vertex_id:
1604       return SV_VERTEX_ID;
1605    case nir_intrinsic_load_work_group_id:
1606       return SV_CTAID;
1607    default:
1608       ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
1609             nir_intrinsic_infos[intr].name);
1610       assert(false);
1611       return SV_LAST;
1612    }
1613 }
1614
1615 bool
1616 Converter::visit(nir_intrinsic_instr *insn)
1617 {
1618    nir_intrinsic_op op = insn->intrinsic;
1619
1620    switch (op) {
1621    case nir_intrinsic_load_uniform: {
1622       LValues &newDefs = convert(&insn->dest);
1623       const DataType dType = getDType(insn);
1624       Value *indirect;
1625       uint32_t coffset = getIndirect(insn, 0, 0, indirect);
1626       for (uint8_t i = 0; i < insn->num_components; ++i) {
1627          loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
1628       }
1629       break;
1630    }
1631    case nir_intrinsic_store_output:
1632    case nir_intrinsic_store_per_vertex_output: {
1633       Value *indirect;
1634       DataType dType = getSType(insn->src[0], false, false);
1635       uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
1636
1637       for (uint8_t i = 0u; i < insn->num_components; ++i) {
1638          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
1639             continue;
1640
1641          uint8_t offset = 0;
1642          Value *src = getSrc(&insn->src[0], i);
1643          switch (prog->getType()) {
1644          case Program::TYPE_FRAGMENT: {
1645             if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
1646                // TGSI uses a different interface than NIR, TGSI stores that
1647                // value in the z component, NIR in X
1648                offset += 2;
1649                src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
1650             }
1651             break;
1652          }
1653          case Program::TYPE_VERTEX: {
1654             if (info->io.genUserClip > 0 && idx == clipVertexOutput) {
1655                mkMov(clipVtx[i], src);
1656                src = clipVtx[i];
1657             }
1658             break;
1659          }
1660          default:
1661             break;
1662          }
1663
1664          storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);
1665       }
1666       break;
1667    }
1668    case nir_intrinsic_load_input:
1669    case nir_intrinsic_load_interpolated_input:
1670    case nir_intrinsic_load_output: {
1671       LValues &newDefs = convert(&insn->dest);
1672
1673       // FBFetch
1674       if (prog->getType() == Program::TYPE_FRAGMENT &&
1675           op == nir_intrinsic_load_output) {
1676          std::vector<Value*> defs, srcs;
1677          uint8_t mask = 0;
1678
1679          srcs.push_back(getSSA());
1680          srcs.push_back(getSSA());
1681          Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));
1682          Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));
1683          mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
1684          mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
1685
1686          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
1687          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
1688
1689          for (uint8_t i = 0u; i < insn->num_components; ++i) {
1690             defs.push_back(newDefs[i]);
1691             mask |= 1 << i;
1692          }
1693
1694          TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);
1695          texi->tex.levelZero = 1;
1696          texi->tex.mask = mask;
1697          texi->tex.useOffsets = 0;
1698          texi->tex.r = 0xffff;
1699          texi->tex.s = 0xffff;
1700
1701          info->prop.fp.readsFramebuffer = true;
1702          break;
1703       }
1704
1705       const DataType dType = getDType(insn);
1706       Value *indirect;
1707       bool input = op != nir_intrinsic_load_output;
1708       operation nvirOp;
1709       uint32_t mode = 0;
1710
1711       uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
1712       nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
1713
1714       // see load_barycentric_* handling
1715       if (prog->getType() == Program::TYPE_FRAGMENT) {
1716          mode = translateInterpMode(&vary, nvirOp);
1717          if (op == nir_intrinsic_load_interpolated_input) {
1718             ImmediateValue immMode;
1719             if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
1720                mode |= immMode.reg.data.u32;
1721          }
1722       }
1723
1724       for (uint8_t i = 0u; i < insn->num_components; ++i) {
1725          uint32_t address = getSlotAddress(insn, idx, i);
1726          Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
1727          if (prog->getType() == Program::TYPE_FRAGMENT) {
1728             int s = 1;
1729             if (typeSizeof(dType) == 8) {
1730                Value *lo = getSSA();
1731                Value *hi = getSSA();
1732                Instruction *interp;
1733
1734                interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
1735                if (nvirOp == OP_PINTERP)
1736                   interp->setSrc(s++, fp.position);
1737                if (mode & NV50_IR_INTERP_OFFSET)
1738                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1739                interp->setInterpolate(mode);
1740                interp->setIndirect(0, 0, indirect);
1741
1742                Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);
1743                interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
1744                if (nvirOp == OP_PINTERP)
1745                   interp->setSrc(s++, fp.position);
1746                if (mode & NV50_IR_INTERP_OFFSET)
1747                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1748                interp->setInterpolate(mode);
1749                interp->setIndirect(0, 0, indirect);
1750
1751                mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
1752             } else {
1753                Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
1754                if (nvirOp == OP_PINTERP)
1755                   interp->setSrc(s++, fp.position);
1756                if (mode & NV50_IR_INTERP_OFFSET)
1757                   interp->setSrc(s++, getSrc(&insn->src[0], 0));
1758                interp->setInterpolate(mode);
1759                interp->setIndirect(0, 0, indirect);
1760             }
1761          } else {
1762             mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
1763          }
1764       }
1765       break;
1766    }
1767    case nir_intrinsic_load_barycentric_at_offset:
1768    case nir_intrinsic_load_barycentric_at_sample:
1769    case nir_intrinsic_load_barycentric_centroid:
1770    case nir_intrinsic_load_barycentric_pixel:
1771    case nir_intrinsic_load_barycentric_sample: {
1772       LValues &newDefs = convert(&insn->dest);
1773       uint32_t mode;
1774
1775       if (op == nir_intrinsic_load_barycentric_centroid ||
1776           op == nir_intrinsic_load_barycentric_sample) {
1777          mode = NV50_IR_INTERP_CENTROID;
1778       } else if (op == nir_intrinsic_load_barycentric_at_offset) {
1779          Value *offs[2];
1780          for (uint8_t c = 0; c < 2; c++) {
1781             offs[c] = getScratch();
1782             mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));
1783             mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
1784             mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
1785             mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
1786          }
1787          mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);
1788
1789          mode = NV50_IR_INTERP_OFFSET;
1790       } else if (op == nir_intrinsic_load_barycentric_pixel) {
1791          mode = NV50_IR_INTERP_DEFAULT;
1792       } else if (op == nir_intrinsic_load_barycentric_at_sample) {
1793          info->prop.fp.readsSampleLocations = true;
1794          mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
1795          mode = NV50_IR_INTERP_OFFSET;
1796       } else {
1797          unreachable("all intrinsics already handled above");
1798       }
1799
1800       loadImm(newDefs[1], mode);
1801       break;
1802    }
1803    case nir_intrinsic_discard:
1804       mkOp(OP_DISCARD, TYPE_NONE, NULL);
1805       break;
1806    case nir_intrinsic_discard_if: {
1807       Value *pred = getSSA(1, FILE_PREDICATE);
1808       if (insn->num_components > 1) {
1809          ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
1810          assert(false);
1811          return false;
1812       }
1813       mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);
1814       mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
1815       break;
1816    }
1817    case nir_intrinsic_load_base_vertex:
1818    case nir_intrinsic_load_base_instance:
1819    case nir_intrinsic_load_draw_id:
1820    case nir_intrinsic_load_front_face:
1821    case nir_intrinsic_load_helper_invocation:
1822    case nir_intrinsic_load_instance_id:
1823    case nir_intrinsic_load_invocation_id:
1824    case nir_intrinsic_load_local_group_size:
1825    case nir_intrinsic_load_local_invocation_id:
1826    case nir_intrinsic_load_num_work_groups:
1827    case nir_intrinsic_load_patch_vertices_in:
1828    case nir_intrinsic_load_primitive_id:
1829    case nir_intrinsic_load_sample_id:
1830    case nir_intrinsic_load_sample_mask_in:
1831    case nir_intrinsic_load_sample_pos:
1832    case nir_intrinsic_load_subgroup_eq_mask:
1833    case nir_intrinsic_load_subgroup_ge_mask:
1834    case nir_intrinsic_load_subgroup_gt_mask:
1835    case nir_intrinsic_load_subgroup_le_mask:
1836    case nir_intrinsic_load_subgroup_lt_mask:
1837    case nir_intrinsic_load_subgroup_invocation:
1838    case nir_intrinsic_load_tess_coord:
1839    case nir_intrinsic_load_tess_level_inner:
1840    case nir_intrinsic_load_tess_level_outer:
1841    case nir_intrinsic_load_vertex_id:
1842    case nir_intrinsic_load_work_group_id: {
1843       const DataType dType = getDType(insn);
1844       SVSemantic sv = convert(op);
1845       LValues &newDefs = convert(&insn->dest);
1846
1847       for (uint8_t i = 0u; i < insn->num_components; ++i) {
1848          Value *def;
1849          if (typeSizeof(dType) == 8)
1850             def = getSSA();
1851          else
1852             def = newDefs[i];
1853
1854          if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
1855             loadImm(def, 0u);
1856          } else {
1857             Symbol *sym = mkSysVal(sv, i);
1858             Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);
1859             if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)
1860                rdsv->perPatch = 1;
1861          }
1862
1863          if (typeSizeof(dType) == 8)
1864             mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));
1865       }
1866       break;
1867    }
1868    // constants
1869    case nir_intrinsic_load_subgroup_size: {
1870       LValues &newDefs = convert(&insn->dest);
1871       loadImm(newDefs[0], 32u);
1872       break;
1873    }
1874    default:
1875       ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
1876       return false;
1877    }
1878
1879    return true;
1880 }
1881
1882 bool
1883 Converter::visit(nir_jump_instr *insn)
1884 {
1885    switch (insn->type) {
1886    case nir_jump_return:
1887       // TODO: this only works in the main function
1888       mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
1889       bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
1890       break;
1891    case nir_jump_break:
1892    case nir_jump_continue: {
1893       bool isBreak = insn->type == nir_jump_break;
1894       nir_block *block = insn->instr.block;
1895       assert(!block->successors[1]);
1896       BasicBlock *target = convert(block->successors[0]);
1897       mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
1898       bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
1899       break;
1900    }
1901    default:
1902       ERROR("unknown nir_jump_type %u\n", insn->type);
1903       return false;
1904    }
1905
1906    return true;
1907 }
1908
1909 bool
1910 Converter::visit(nir_load_const_instr *insn)
1911 {
1912    assert(insn->def.bit_size <= 64);
1913
1914    LValues &newDefs = convert(&insn->def);
1915    for (int i = 0; i < insn->def.num_components; i++) {
1916       switch (insn->def.bit_size) {
1917       case 64:
1918          loadImm(newDefs[i], insn->value.u64[i]);
1919          break;
1920       case 32:
1921          loadImm(newDefs[i], insn->value.u32[i]);
1922          break;
1923       case 16:
1924          loadImm(newDefs[i], insn->value.u16[i]);
1925          break;
1926       case 8:
1927          loadImm(newDefs[i], insn->value.u8[i]);
1928          break;
1929       }
1930    }
1931    return true;
1932 }
1933
1934 #define DEFAULT_CHECKS \
1935       if (insn->dest.dest.ssa.num_components > 1) { \
1936          ERROR("nir_alu_instr only supported with 1 component!\n"); \
1937          return false; \
1938       } \
1939       if (insn->dest.write_mask != 1) { \
1940          ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
1941          return false; \
1942       }
1943 bool
1944 Converter::visit(nir_alu_instr *insn)
1945 {
1946    const nir_op op = insn->op;
1947    const nir_op_info &info = nir_op_infos[op];
1948    DataType dType = getDType(insn);
1949    const std::vector<DataType> sTypes = getSTypes(insn);
1950
1951    Instruction *oldPos = this->bb->getExit();
1952
1953    switch (op) {
1954    case nir_op_fabs:
1955    case nir_op_iabs:
1956    case nir_op_fadd:
1957    case nir_op_iadd:
1958    case nir_op_fand:
1959    case nir_op_iand:
1960    case nir_op_fceil:
1961    case nir_op_fcos:
1962    case nir_op_fddx:
1963    case nir_op_fddx_coarse:
1964    case nir_op_fddx_fine:
1965    case nir_op_fddy:
1966    case nir_op_fddy_coarse:
1967    case nir_op_fddy_fine:
1968    case nir_op_fdiv:
1969    case nir_op_idiv:
1970    case nir_op_udiv:
1971    case nir_op_fexp2:
1972    case nir_op_ffloor:
1973    case nir_op_ffma:
1974    case nir_op_flog2:
1975    case nir_op_fmax:
1976    case nir_op_imax:
1977    case nir_op_umax:
1978    case nir_op_fmin:
1979    case nir_op_imin:
1980    case nir_op_umin:
1981    case nir_op_fmod:
1982    case nir_op_imod:
1983    case nir_op_umod:
1984    case nir_op_fmul:
1985    case nir_op_imul:
1986    case nir_op_imul_high:
1987    case nir_op_umul_high:
1988    case nir_op_fneg:
1989    case nir_op_ineg:
1990    case nir_op_fnot:
1991    case nir_op_inot:
1992    case nir_op_for:
1993    case nir_op_ior:
1994    case nir_op_pack_64_2x32_split:
1995    case nir_op_fpow:
1996    case nir_op_frcp:
1997    case nir_op_frem:
1998    case nir_op_irem:
1999    case nir_op_frsq:
2000    case nir_op_fsat:
2001    case nir_op_ishr:
2002    case nir_op_ushr:
2003    case nir_op_fsin:
2004    case nir_op_fsqrt:
2005    case nir_op_fsub:
2006    case nir_op_isub:
2007    case nir_op_ftrunc:
2008    case nir_op_ishl:
2009    case nir_op_fxor:
2010    case nir_op_ixor: {
2011       DEFAULT_CHECKS;
2012       LValues &newDefs = convert(&insn->dest);
2013       operation preOp = preOperationNeeded(op);
2014       if (preOp != OP_NOP) {
2015          assert(info.num_inputs < 2);
2016          Value *tmp = getSSA(typeSizeof(dType));
2017          Instruction *i0 = mkOp(preOp, dType, tmp);
2018          Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
2019          if (info.num_inputs) {
2020             i0->setSrc(0, getSrc(&insn->src[0]));
2021             i1->setSrc(0, tmp);
2022          }
2023          i1->subOp = getSubOp(op);
2024       } else {
2025          Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
2026          for (unsigned s = 0u; s < info.num_inputs; ++s) {
2027             i->setSrc(s, getSrc(&insn->src[s]));
2028          }
2029          i->subOp = getSubOp(op);
2030       }
2031       break;
2032    }
2033    case nir_op_ifind_msb:
2034    case nir_op_ufind_msb: {
2035       DEFAULT_CHECKS;
2036       LValues &newDefs = convert(&insn->dest);
2037       dType = sTypes[0];
2038       mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2039       break;
2040    }
2041    case nir_op_fround_even: {
2042       DEFAULT_CHECKS;
2043       LValues &newDefs = convert(&insn->dest);
2044       mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
2045       break;
2046    }
2047    // convert instructions
2048    case nir_op_f2f32:
2049    case nir_op_f2i32:
2050    case nir_op_f2u32:
2051    case nir_op_i2f32:
2052    case nir_op_i2i32:
2053    case nir_op_u2f32:
2054    case nir_op_u2u32:
2055    case nir_op_f2f64:
2056    case nir_op_f2i64:
2057    case nir_op_f2u64:
2058    case nir_op_i2f64:
2059    case nir_op_i2i64:
2060    case nir_op_u2f64:
2061    case nir_op_u2u64: {
2062       DEFAULT_CHECKS;
2063       LValues &newDefs = convert(&insn->dest);
2064       Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
2065       if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
2066          i->rnd = ROUND_Z;
2067       i->sType = sTypes[0];
2068       break;
2069    }
2070    // compare instructions
2071    case nir_op_feq32:
2072    case nir_op_ieq32:
2073    case nir_op_fge32:
2074    case nir_op_ige32:
2075    case nir_op_uge32:
2076    case nir_op_flt32:
2077    case nir_op_ilt32:
2078    case nir_op_ult32:
2079    case nir_op_fne32:
2080    case nir_op_ine32: {
2081       DEFAULT_CHECKS;
2082       LValues &newDefs = convert(&insn->dest);
2083       Instruction *i = mkCmp(getOperation(op),
2084                              getCondCode(op),
2085                              dType,
2086                              newDefs[0],
2087                              dType,
2088                              getSrc(&insn->src[0]),
2089                              getSrc(&insn->src[1]));
2090       if (info.num_inputs == 3)
2091          i->setSrc(2, getSrc(&insn->src[2]));
2092       i->sType = sTypes[0];
2093       break;
2094    }
2095    // those are weird ALU ops and need special handling, because
2096    //   1. they are always componend based
2097    //   2. they basically just merge multiple values into one data type
2098    case nir_op_imov:
2099    case nir_op_fmov:
2100    case nir_op_vec2:
2101    case nir_op_vec3:
2102    case nir_op_vec4: {
2103       LValues &newDefs = convert(&insn->dest);
2104       for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
2105          mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
2106       }
2107       break;
2108    }
2109    // (un)pack
2110    case nir_op_pack_64_2x32: {
2111       LValues &newDefs = convert(&insn->dest);
2112       Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
2113       merge->setSrc(0, getSrc(&insn->src[0], 0));
2114       merge->setSrc(1, getSrc(&insn->src[0], 1));
2115       break;
2116    }
2117    case nir_op_pack_half_2x16_split: {
2118       LValues &newDefs = convert(&insn->dest);
2119       Value *tmpH = getSSA();
2120       Value *tmpL = getSSA();
2121
2122       mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
2123       mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
2124       mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
2125       break;
2126    }
2127    case nir_op_unpack_half_2x16_split_x:
2128    case nir_op_unpack_half_2x16_split_y: {
2129       LValues &newDefs = convert(&insn->dest);
2130       Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
2131       if (op == nir_op_unpack_half_2x16_split_y)
2132          cvt->subOp = 1;
2133       break;
2134    }
2135    case nir_op_unpack_64_2x32: {
2136       LValues &newDefs = convert(&insn->dest);
2137       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
2138       break;
2139    }
2140    case nir_op_unpack_64_2x32_split_x: {
2141       LValues &newDefs = convert(&insn->dest);
2142       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
2143       break;
2144    }
2145    case nir_op_unpack_64_2x32_split_y: {
2146       LValues &newDefs = convert(&insn->dest);
2147       mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
2148       break;
2149    }
2150    // special instructions
2151    case nir_op_fsign:
2152    case nir_op_isign: {
2153       DEFAULT_CHECKS;
2154       DataType iType;
2155       if (::isFloatType(dType))
2156          iType = TYPE_F32;
2157       else
2158          iType = TYPE_S32;
2159
2160       LValues &newDefs = convert(&insn->dest);
2161       LValue *val0 = getScratch();
2162       LValue *val1 = getScratch();
2163       mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
2164       mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
2165
2166       if (dType == TYPE_F64) {
2167          mkOp2(OP_SUB, iType, val0, val0, val1);
2168          mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
2169       } else if (dType == TYPE_S64 || dType == TYPE_U64) {
2170          mkOp2(OP_SUB, iType, val0, val1, val0);
2171          mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
2172          mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
2173       } else if (::isFloatType(dType))
2174          mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
2175       else
2176          mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
2177       break;
2178    }
2179    case nir_op_fcsel:
2180    case nir_op_b32csel: {
2181       DEFAULT_CHECKS;
2182       LValues &newDefs = convert(&insn->dest);
2183       mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
2184       break;
2185    }
2186    case nir_op_ibitfield_extract:
2187    case nir_op_ubitfield_extract: {
2188       DEFAULT_CHECKS;
2189       Value *tmp = getSSA();
2190       LValues &newDefs = convert(&insn->dest);
2191       mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2192       mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
2193       break;
2194    }
2195    case nir_op_bfm: {
2196       DEFAULT_CHECKS;
2197       LValues &newDefs = convert(&insn->dest);
2198       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
2199       break;
2200    }
2201    case nir_op_bitfield_insert: {
2202       DEFAULT_CHECKS;
2203       LValues &newDefs = convert(&insn->dest);
2204       LValue *temp = getSSA();
2205       mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
2206       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
2207       break;
2208    }
2209    case nir_op_bit_count: {
2210       DEFAULT_CHECKS;
2211       LValues &newDefs = convert(&insn->dest);
2212       mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
2213       break;
2214    }
2215    case nir_op_bitfield_reverse: {
2216       DEFAULT_CHECKS;
2217       LValues &newDefs = convert(&insn->dest);
2218       mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2219       break;
2220    }
2221    case nir_op_find_lsb: {
2222       DEFAULT_CHECKS;
2223       LValues &newDefs = convert(&insn->dest);
2224       Value *tmp = getSSA();
2225       mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
2226       mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
2227       break;
2228    }
2229    // boolean conversions
2230    case nir_op_b2f32: {
2231       DEFAULT_CHECKS;
2232       LValues &newDefs = convert(&insn->dest);
2233       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
2234       break;
2235    }
2236    case nir_op_b2f64: {
2237       DEFAULT_CHECKS;
2238       LValues &newDefs = convert(&insn->dest);
2239       Value *tmp = getSSA(4);
2240       mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
2241       mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
2242       break;
2243    }
2244    case nir_op_f2b32:
2245    case nir_op_i2b32: {
2246       DEFAULT_CHECKS;
2247       LValues &newDefs = convert(&insn->dest);
2248       Value *src1;
2249       if (typeSizeof(sTypes[0]) == 8) {
2250          src1 = loadImm(getSSA(8), 0.0);
2251       } else {
2252          src1 = zero;
2253       }
2254       CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
2255       mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
2256       break;
2257    }
2258    case nir_op_b2i32: {
2259       DEFAULT_CHECKS;
2260       LValues &newDefs = convert(&insn->dest);
2261       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
2262       break;
2263    }
2264    case nir_op_b2i64: {
2265       DEFAULT_CHECKS;
2266       LValues &newDefs = convert(&insn->dest);
2267       LValue *def = getScratch();
2268       mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
2269       mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
2270       break;
2271    }
2272    default:
2273       ERROR("unknown nir_op %s\n", info.name);
2274       return false;
2275    }
2276
2277    if (!oldPos) {
2278       oldPos = this->bb->getEntry();
2279       oldPos->precise = insn->exact;
2280    }
2281
2282    if (unlikely(!oldPos))
2283       return true;
2284
2285    while (oldPos->next) {
2286       oldPos = oldPos->next;
2287       oldPos->precise = insn->exact;
2288    }
2289    oldPos->saturate = insn->dest.saturate;
2290
2291    return true;
2292 }
2293 #undef DEFAULT_CHECKS
2294
2295 bool
2296 Converter::visit(nir_ssa_undef_instr *insn)
2297 {
2298    LValues &newDefs = convert(&insn->def);
2299    for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
2300       mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
2301    }
2302    return true;
2303 }
2304
2305 bool
2306 Converter::run()
2307 {
2308    bool progress;
2309
2310    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
2311       nir_print_shader(nir, stderr);
2312
2313    NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
2314    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
2315    NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
2316    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2317    NIR_PASS_V(nir, nir_lower_alu_to_scalar);
2318    NIR_PASS_V(nir, nir_lower_phis_to_scalar);
2319
2320    do {
2321       progress = false;
2322       NIR_PASS(progress, nir, nir_copy_prop);
2323       NIR_PASS(progress, nir, nir_opt_remove_phis);
2324       NIR_PASS(progress, nir, nir_opt_trivial_continues);
2325       NIR_PASS(progress, nir, nir_opt_cse);
2326       NIR_PASS(progress, nir, nir_opt_algebraic);
2327       NIR_PASS(progress, nir, nir_opt_constant_folding);
2328       NIR_PASS(progress, nir, nir_copy_prop);
2329       NIR_PASS(progress, nir, nir_opt_dce);
2330       NIR_PASS(progress, nir, nir_opt_dead_cf);
2331    } while (progress);
2332
2333    NIR_PASS_V(nir, nir_lower_bool_to_int32);
2334    NIR_PASS_V(nir, nir_lower_locals_to_regs);
2335    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
2336    NIR_PASS_V(nir, nir_convert_from_ssa, true);
2337
2338    // Garbage collect dead instructions
2339    nir_sweep(nir);
2340
2341    if (!parseNIR()) {
2342       ERROR("Couldn't prase NIR!\n");
2343       return false;
2344    }
2345
2346    if (!assignSlots()) {
2347       ERROR("Couldn't assign slots!\n");
2348       return false;
2349    }
2350
2351    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
2352       nir_print_shader(nir, stderr);
2353
2354    nir_foreach_function(function, nir) {
2355       if (!visit(function))
2356          return false;
2357    }
2358
2359    return true;
2360 }
2361
2362 } // unnamed namespace
2363
2364 namespace nv50_ir {
2365
2366 bool
2367 Program::makeFromNIR(struct nv50_ir_prog_info *info)
2368 {
2369    nir_shader *nir = (nir_shader*)info->bin.source;
2370    Converter converter(this, nir, info);
2371    bool result = converter.run();
2372    if (!result)
2373       return result;
2374    LoweringHelper lowering;
2375    lowering.run(this);
2376    tlsSize = info->bin.tlsSpace;
2377    return result;
2378 }
2379
2380 } // namespace nv50_ir