src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

   1 /*
   2  * Copyright 2017 Red Hat Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * Authors: Karol Herbst <kherbst@redhat.com>
  23  */
  24
  25 #include "compiler/nir/nir.h"
  26
  27 #include "util/u_debug.h"
  28
  29 #include "codegen/nv50_ir.h"
  30 #include "codegen/nv50_ir_from_common.h"
  31 #include "codegen/nv50_ir_lowering_helper.h"
  32 #include "codegen/nv50_ir_util.h"
  33
  34 #if __cplusplus >= 201103L
  35 #include <unordered_map>
  36 #else
  37 #include <tr1/unordered_map>
  38 #endif
  39 #include <vector>
  40
  41 namespace {
  42
  43 #if __cplusplus >= 201103L
  44 using std::hash;
  45 using std::unordered_map;
  46 #else
  47 using std::tr1::hash;
  48 using std::tr1::unordered_map;
  49 #endif
  50
  51 using namespace nv50_ir;
  52
  53 int
  54 type_size(const struct glsl_type *type)
  55 {
  56    return glsl_count_attribute_slots(type, false);
  57 }
  58
  59 class Converter : public ConverterCommon
  60 {
  61 public:
  62    Converter(Program *, nir_shader *, nv50_ir_prog_info *);
  63
  64    bool run();
  65 private:
  66    typedef std::vector<LValue*> LValues;
  67    typedef unordered_map<unsigned, LValues> NirDefMap;
  68    typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
  69
  70    LValues& convert(nir_alu_dest *);
  71    BasicBlock* convert(nir_block *);
  72    LValues& convert(nir_dest *);
  73    LValues& convert(nir_register *);
  74    LValues& convert(nir_ssa_def *);
  75
  76    Value* getSrc(nir_alu_src *, uint8_t component = 0);
  77    Value* getSrc(nir_register *, uint8_t);
  78    Value* getSrc(nir_src *, uint8_t, bool indirect = false);
  79    Value* getSrc(nir_ssa_def *, uint8_t);
  80
  81    // returned value is the constant part of the given source (either the
  82    // nir_src or the selected source component of an intrinsic). Even though
  83    // this is mostly an optimization to be able to skip indirects in a few
  84    // cases, sometimes we require immediate values or set some fileds on
  85    // instructions (e.g. tex) in order for codegen to consume those.
  86    // If the found value has not a constant part, the Value gets returned
  87    // through the Value parameter.
  88    uint32_t getIndirect(nir_src *, uint8_t, Value *&);
  89    uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&);
  90
  91    uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
  92
  93    void setInterpolate(nv50_ir_varying *,
  94                        uint8_t,
  95                        bool centroid,
  96                        unsigned semantics);
  97
  98    Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
  99                          uint8_t c, Value *indirect0 = NULL,
 100                          Value *indirect1 = NULL, bool patch = false);
 101    void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
 102                 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
 103                 Value *indirect1 = NULL);
 104
 105    bool isFloatType(nir_alu_type);
 106    bool isSignedType(nir_alu_type);
 107    bool isResultFloat(nir_op);
 108    bool isResultSigned(nir_op);
 109
 110    DataType getDType(nir_alu_instr *);
 111    DataType getDType(nir_intrinsic_instr *);
 112    DataType getDType(nir_op, uint8_t);
 113
 114    std::vector<DataType> getSTypes(nir_alu_instr *);
 115    DataType getSType(nir_src &, bool isFloat, bool isSigned);
 116
 117    operation getOperation(nir_op);
 118    operation preOperationNeeded(nir_op);
 119
 120    int getSubOp(nir_op);
 121
 122    CondCode getCondCode(nir_op);
 123
 124    bool assignSlots();
 125    bool parseNIR();
 126
 127    bool visit(nir_alu_instr *);
 128    bool visit(nir_block *);
 129    bool visit(nir_cf_node *);
 130    bool visit(nir_function *);
 131    bool visit(nir_if *);
 132    bool visit(nir_instr *);
 133    bool visit(nir_intrinsic_instr *);
 134    bool visit(nir_jump_instr *);
 135    bool visit(nir_load_const_instr*);
 136    bool visit(nir_loop *);
 137
 138    nir_shader *nir;
 139
 140    NirDefMap ssaDefs;
 141    NirDefMap regDefs;
 142    NirBlockMap blocks;
 143    unsigned int curLoopDepth;
 144
 145    BasicBlock *exit;
 146    Value *zero;
 147
 148    union {
 149       struct {
 150          Value *position;
 151       } fp;
 152    };
 153 };
 154
 155 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
 156    : ConverterCommon(prog, info),
 157      nir(nir),
 158      curLoopDepth(0)
 159 {
 160    zero = mkImm((uint32_t)0);
 161 }
 162
 163 BasicBlock *
 164 Converter::convert(nir_block *block)
 165 {
 166    NirBlockMap::iterator it = blocks.find(block->index);
 167    if (it != blocks.end())
 168       return it->second;
 169
 170    BasicBlock *bb = new BasicBlock(func);
 171    blocks[block->index] = bb;
 172    return bb;
 173 }
 174
 175 bool
 176 Converter::isFloatType(nir_alu_type type)
 177 {
 178    return nir_alu_type_get_base_type(type) == nir_type_float;
 179 }
 180
 181 bool
 182 Converter::isSignedType(nir_alu_type type)
 183 {
 184    return nir_alu_type_get_base_type(type) == nir_type_int;
 185 }
 186
 187 bool
 188 Converter::isResultFloat(nir_op op)
 189 {
 190    const nir_op_info &info = nir_op_infos[op];
 191    if (info.output_type != nir_type_invalid)
 192       return isFloatType(info.output_type);
 193
 194    ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
 195    assert(false);
 196    return true;
 197 }
 198
 199 bool
 200 Converter::isResultSigned(nir_op op)
 201 {
 202    switch (op) {
 203    // there is no umul and we get wrong results if we treat all muls as signed
 204    case nir_op_imul:
 205    case nir_op_inot:
 206       return false;
 207    default:
 208       const nir_op_info &info = nir_op_infos[op];
 209       if (info.output_type != nir_type_invalid)
 210          return isSignedType(info.output_type);
 211       ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
 212       assert(false);
 213       return true;
 214    }
 215 }
 216
 217 DataType
 218 Converter::getDType(nir_alu_instr *insn)
 219 {
 220    if (insn->dest.dest.is_ssa)
 221       return getDType(insn->op, insn->dest.dest.ssa.bit_size);
 222    else
 223       return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
 224 }
 225
 226 DataType
 227 Converter::getDType(nir_intrinsic_instr *insn)
 228 {
 229    if (insn->dest.is_ssa)
 230       return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
 231    else
 232       return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
 233 }
 234
 235 DataType
 236 Converter::getDType(nir_op op, uint8_t bitSize)
 237 {
 238    DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
 239    if (ty == TYPE_NONE) {
 240       ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
 241       assert(false);
 242    }
 243    return ty;
 244 }
 245
 246 std::vector<DataType>
 247 Converter::getSTypes(nir_alu_instr *insn)
 248 {
 249    const nir_op_info &info = nir_op_infos[insn->op];
 250    std::vector<DataType> res(info.num_inputs);
 251
 252    for (uint8_t i = 0; i < info.num_inputs; ++i) {
 253       if (info.input_types[i] != nir_type_invalid) {
 254          res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
 255       } else {
 256          ERROR("getSType not implemented for %s idx %u\n", info.name, i);
 257          assert(false);
 258          res[i] = TYPE_NONE;
 259          break;
 260       }
 261    }
 262
 263    return res;
 264 }
 265
 266 DataType
 267 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
 268 {
 269    uint8_t bitSize;
 270    if (src.is_ssa)
 271       bitSize = src.ssa->bit_size;
 272    else
 273       bitSize = src.reg.reg->bit_size;
 274
 275    DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
 276    if (ty == TYPE_NONE) {
 277       const char *str;
 278       if (isFloat)
 279          str = "float";
 280       else if (isSigned)
 281          str = "int";
 282       else
 283          str = "uint";
 284       ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
 285       assert(false);
 286    }
 287    return ty;
 288 }
 289
 290 operation
 291 Converter::getOperation(nir_op op)
 292 {
 293    switch (op) {
 294    // basic ops with float and int variants
 295    case nir_op_fabs:
 296    case nir_op_iabs:
 297       return OP_ABS;
 298    case nir_op_fadd:
 299    case nir_op_iadd:
 300       return OP_ADD;
 301    case nir_op_fand:
 302    case nir_op_iand:
 303       return OP_AND;
 304    case nir_op_ifind_msb:
 305    case nir_op_ufind_msb:
 306       return OP_BFIND;
 307    case nir_op_fceil:
 308       return OP_CEIL;
 309    case nir_op_fcos:
 310       return OP_COS;
 311    case nir_op_f2f32:
 312    case nir_op_f2f64:
 313    case nir_op_f2i32:
 314    case nir_op_f2i64:
 315    case nir_op_f2u32:
 316    case nir_op_f2u64:
 317    case nir_op_i2f32:
 318    case nir_op_i2f64:
 319    case nir_op_i2i32:
 320    case nir_op_i2i64:
 321    case nir_op_u2f32:
 322    case nir_op_u2f64:
 323    case nir_op_u2u32:
 324    case nir_op_u2u64:
 325       return OP_CVT;
 326    case nir_op_fddx:
 327    case nir_op_fddx_coarse:
 328    case nir_op_fddx_fine:
 329       return OP_DFDX;
 330    case nir_op_fddy:
 331    case nir_op_fddy_coarse:
 332    case nir_op_fddy_fine:
 333       return OP_DFDY;
 334    case nir_op_fdiv:
 335    case nir_op_idiv:
 336    case nir_op_udiv:
 337       return OP_DIV;
 338    case nir_op_fexp2:
 339       return OP_EX2;
 340    case nir_op_ffloor:
 341       return OP_FLOOR;
 342    case nir_op_ffma:
 343       return OP_FMA;
 344    case nir_op_flog2:
 345       return OP_LG2;
 346    case nir_op_fmax:
 347    case nir_op_imax:
 348    case nir_op_umax:
 349       return OP_MAX;
 350    case nir_op_pack_64_2x32_split:
 351       return OP_MERGE;
 352    case nir_op_fmin:
 353    case nir_op_imin:
 354    case nir_op_umin:
 355       return OP_MIN;
 356    case nir_op_fmod:
 357    case nir_op_imod:
 358    case nir_op_umod:
 359    case nir_op_frem:
 360    case nir_op_irem:
 361       return OP_MOD;
 362    case nir_op_fmul:
 363    case nir_op_imul:
 364    case nir_op_imul_high:
 365    case nir_op_umul_high:
 366       return OP_MUL;
 367    case nir_op_fneg:
 368    case nir_op_ineg:
 369       return OP_NEG;
 370    case nir_op_fnot:
 371    case nir_op_inot:
 372       return OP_NOT;
 373    case nir_op_for:
 374    case nir_op_ior:
 375       return OP_OR;
 376    case nir_op_fpow:
 377       return OP_POW;
 378    case nir_op_frcp:
 379       return OP_RCP;
 380    case nir_op_frsq:
 381       return OP_RSQ;
 382    case nir_op_fsat:
 383       return OP_SAT;
 384    case nir_op_feq32:
 385    case nir_op_ieq32:
 386    case nir_op_fge32:
 387    case nir_op_ige32:
 388    case nir_op_uge32:
 389    case nir_op_flt32:
 390    case nir_op_ilt32:
 391    case nir_op_ult32:
 392    case nir_op_fne32:
 393    case nir_op_ine32:
 394       return OP_SET;
 395    case nir_op_ishl:
 396       return OP_SHL;
 397    case nir_op_ishr:
 398    case nir_op_ushr:
 399       return OP_SHR;
 400    case nir_op_fsin:
 401       return OP_SIN;
 402    case nir_op_fsqrt:
 403       return OP_SQRT;
 404    case nir_op_fsub:
 405    case nir_op_isub:
 406       return OP_SUB;
 407    case nir_op_ftrunc:
 408       return OP_TRUNC;
 409    case nir_op_fxor:
 410    case nir_op_ixor:
 411       return OP_XOR;
 412    default:
 413       ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
 414       assert(false);
 415       return OP_NOP;
 416    }
 417 }
 418
 419 operation
 420 Converter::preOperationNeeded(nir_op op)
 421 {
 422    switch (op) {
 423    case nir_op_fcos:
 424    case nir_op_fsin:
 425       return OP_PRESIN;
 426    default:
 427       return OP_NOP;
 428    }
 429 }
 430
 431 int
 432 Converter::getSubOp(nir_op op)
 433 {
 434    switch (op) {
 435    case nir_op_imul_high:
 436    case nir_op_umul_high:
 437       return NV50_IR_SUBOP_MUL_HIGH;
 438    default:
 439       return 0;
 440    }
 441 }
 442
 443 CondCode
 444 Converter::getCondCode(nir_op op)
 445 {
 446    switch (op) {
 447    case nir_op_feq32:
 448    case nir_op_ieq32:
 449       return CC_EQ;
 450    case nir_op_fge32:
 451    case nir_op_ige32:
 452    case nir_op_uge32:
 453       return CC_GE;
 454    case nir_op_flt32:
 455    case nir_op_ilt32:
 456    case nir_op_ult32:
 457       return CC_LT;
 458    case nir_op_fne32:
 459       return CC_NEU;
 460    case nir_op_ine32:
 461       return CC_NE;
 462    default:
 463       ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
 464       assert(false);
 465       return CC_FL;
 466    }
 467 }
 468
 469 Converter::LValues&
 470 Converter::convert(nir_alu_dest *dest)
 471 {
 472    return convert(&dest->dest);
 473 }
 474
 475 Converter::LValues&
 476 Converter::convert(nir_dest *dest)
 477 {
 478    if (dest->is_ssa)
 479       return convert(&dest->ssa);
 480    if (dest->reg.indirect) {
 481       ERROR("no support for indirects.");
 482       assert(false);
 483    }
 484    return convert(dest->reg.reg);
 485 }
 486
 487 Converter::LValues&
 488 Converter::convert(nir_register *reg)
 489 {
 490    NirDefMap::iterator it = regDefs.find(reg->index);
 491    if (it != regDefs.end())
 492       return it->second;
 493
 494    LValues newDef(reg->num_components);
 495    for (uint8_t i = 0; i < reg->num_components; i++)
 496       newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
 497    return regDefs[reg->index] = newDef;
 498 }
 499
 500 Converter::LValues&
 501 Converter::convert(nir_ssa_def *def)
 502 {
 503    NirDefMap::iterator it = ssaDefs.find(def->index);
 504    if (it != ssaDefs.end())
 505       return it->second;
 506
 507    LValues newDef(def->num_components);
 508    for (uint8_t i = 0; i < def->num_components; i++)
 509       newDef[i] = getSSA(std::max(4, def->bit_size / 8));
 510    return ssaDefs[def->index] = newDef;
 511 }
 512
 513 Value*
 514 Converter::getSrc(nir_alu_src *src, uint8_t component)
 515 {
 516    if (src->abs || src->negate) {
 517       ERROR("modifiers currently not supported on nir_alu_src\n");
 518       assert(false);
 519    }
 520    return getSrc(&src->src, src->swizzle[component]);
 521 }
 522
 523 Value*
 524 Converter::getSrc(nir_register *reg, uint8_t idx)
 525 {
 526    NirDefMap::iterator it = regDefs.find(reg->index);
 527    if (it == regDefs.end())
 528       return convert(reg)[idx];
 529    return it->second[idx];
 530 }
 531
 532 Value*
 533 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
 534 {
 535    if (src->is_ssa)
 536       return getSrc(src->ssa, idx);
 537
 538    if (src->reg.indirect) {
 539       if (indirect)
 540          return getSrc(src->reg.indirect, idx);
 541       ERROR("no support for indirects.");
 542       assert(false);
 543       return NULL;
 544    }
 545
 546    return getSrc(src->reg.reg, idx);
 547 }
 548
 549 Value*
 550 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
 551 {
 552    NirDefMap::iterator it = ssaDefs.find(src->index);
 553    if (it == ssaDefs.end()) {
 554       ERROR("SSA value %u not found\n", src->index);
 555       assert(false);
 556       return NULL;
 557    }
 558    return it->second[idx];
 559 }
 560
 561 uint32_t
 562 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
 563 {
 564    nir_const_value *offset = nir_src_as_const_value(*src);
 565
 566    if (offset) {
 567       indirect = NULL;
 568       return offset->u32[0];
 569    }
 570
 571    indirect = getSrc(src, idx, true);
 572    return 0;
 573 }
 574
 575 uint32_t
 576 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect)
 577 {
 578    int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
 579    if (indirect)
 580       indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
 581    return idx;
 582 }
 583
 584 static void
 585 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
 586 {
 587    assert(name && index);
 588
 589    if (slot >= VERT_ATTRIB_MAX) {
 590       ERROR("invalid varying slot %u\n", slot);
 591       assert(false);
 592       return;
 593    }
 594
 595    if (slot >= VERT_ATTRIB_GENERIC0 &&
 596        slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
 597       *name = TGSI_SEMANTIC_GENERIC;
 598       *index = slot - VERT_ATTRIB_GENERIC0;
 599       return;
 600    }
 601
 602    if (slot >= VERT_ATTRIB_TEX0 &&
 603        slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
 604       *name = TGSI_SEMANTIC_TEXCOORD;
 605       *index = slot - VERT_ATTRIB_TEX0;
 606       return;
 607    }
 608
 609    switch (slot) {
 610    case VERT_ATTRIB_COLOR0:
 611       *name = TGSI_SEMANTIC_COLOR;
 612       *index = 0;
 613       break;
 614    case VERT_ATTRIB_COLOR1:
 615       *name = TGSI_SEMANTIC_COLOR;
 616       *index = 1;
 617       break;
 618    case VERT_ATTRIB_EDGEFLAG:
 619       *name = TGSI_SEMANTIC_EDGEFLAG;
 620       *index = 0;
 621       break;
 622    case VERT_ATTRIB_FOG:
 623       *name = TGSI_SEMANTIC_FOG;
 624       *index = 0;
 625       break;
 626    case VERT_ATTRIB_NORMAL:
 627       *name = TGSI_SEMANTIC_NORMAL;
 628       *index = 0;
 629       break;
 630    case VERT_ATTRIB_POS:
 631       *name = TGSI_SEMANTIC_POSITION;
 632       *index = 0;
 633       break;
 634    case VERT_ATTRIB_POINT_SIZE:
 635       *name = TGSI_SEMANTIC_PSIZE;
 636       *index = 0;
 637       break;
 638    default:
 639       ERROR("unknown vert attrib slot %u\n", slot);
 640       assert(false);
 641       break;
 642    }
 643 }
 644
 645 static void
 646 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
 647 {
 648    assert(name && index);
 649
 650    if (slot >= VARYING_SLOT_TESS_MAX) {
 651       ERROR("invalid varying slot %u\n", slot);
 652       assert(false);
 653       return;
 654    }
 655
 656    if (slot >= VARYING_SLOT_PATCH0) {
 657       *name = TGSI_SEMANTIC_PATCH;
 658       *index = slot - VARYING_SLOT_PATCH0;
 659       return;
 660    }
 661
 662    if (slot >= VARYING_SLOT_VAR0) {
 663       *name = TGSI_SEMANTIC_GENERIC;
 664       *index = slot - VARYING_SLOT_VAR0;
 665       return;
 666    }
 667
 668    if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
 669       *name = TGSI_SEMANTIC_TEXCOORD;
 670       *index = slot - VARYING_SLOT_TEX0;
 671       return;
 672    }
 673
 674    switch (slot) {
 675    case VARYING_SLOT_BFC0:
 676       *name = TGSI_SEMANTIC_BCOLOR;
 677       *index = 0;
 678       break;
 679    case VARYING_SLOT_BFC1:
 680       *name = TGSI_SEMANTIC_BCOLOR;
 681       *index = 1;
 682       break;
 683    case VARYING_SLOT_CLIP_DIST0:
 684       *name = TGSI_SEMANTIC_CLIPDIST;
 685       *index = 0;
 686       break;
 687    case VARYING_SLOT_CLIP_DIST1:
 688       *name = TGSI_SEMANTIC_CLIPDIST;
 689       *index = 1;
 690       break;
 691    case VARYING_SLOT_CLIP_VERTEX:
 692       *name = TGSI_SEMANTIC_CLIPVERTEX;
 693       *index = 0;
 694       break;
 695    case VARYING_SLOT_COL0:
 696       *name = TGSI_SEMANTIC_COLOR;
 697       *index = 0;
 698       break;
 699    case VARYING_SLOT_COL1:
 700       *name = TGSI_SEMANTIC_COLOR;
 701       *index = 1;
 702       break;
 703    case VARYING_SLOT_EDGE:
 704       *name = TGSI_SEMANTIC_EDGEFLAG;
 705       *index = 0;
 706       break;
 707    case VARYING_SLOT_FACE:
 708       *name = TGSI_SEMANTIC_FACE;
 709       *index = 0;
 710       break;
 711    case VARYING_SLOT_FOGC:
 712       *name = TGSI_SEMANTIC_FOG;
 713       *index = 0;
 714       break;
 715    case VARYING_SLOT_LAYER:
 716       *name = TGSI_SEMANTIC_LAYER;
 717       *index = 0;
 718       break;
 719    case VARYING_SLOT_PNTC:
 720       *name = TGSI_SEMANTIC_PCOORD;
 721       *index = 0;
 722       break;
 723    case VARYING_SLOT_POS:
 724       *name = TGSI_SEMANTIC_POSITION;
 725       *index = 0;
 726       break;
 727    case VARYING_SLOT_PRIMITIVE_ID:
 728       *name = TGSI_SEMANTIC_PRIMID;
 729       *index = 0;
 730       break;
 731    case VARYING_SLOT_PSIZ:
 732       *name = TGSI_SEMANTIC_PSIZE;
 733       *index = 0;
 734       break;
 735    case VARYING_SLOT_TESS_LEVEL_INNER:
 736       *name = TGSI_SEMANTIC_TESSINNER;
 737       *index = 0;
 738       break;
 739    case VARYING_SLOT_TESS_LEVEL_OUTER:
 740       *name = TGSI_SEMANTIC_TESSOUTER;
 741       *index = 0;
 742       break;
 743    case VARYING_SLOT_VIEWPORT:
 744       *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
 745       *index = 0;
 746       break;
 747    default:
 748       ERROR("unknown varying slot %u\n", slot);
 749       assert(false);
 750       break;
 751    }
 752 }
 753
 754 static void
 755 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
 756 {
 757    if (slot >= FRAG_RESULT_DATA0) {
 758       *name = TGSI_SEMANTIC_COLOR;
 759       *index = slot - FRAG_RESULT_COLOR - 2; // intentional
 760       return;
 761    }
 762
 763    switch (slot) {
 764    case FRAG_RESULT_COLOR:
 765       *name = TGSI_SEMANTIC_COLOR;
 766       *index = 0;
 767       break;
 768    case FRAG_RESULT_DEPTH:
 769       *name = TGSI_SEMANTIC_POSITION;
 770       *index = 0;
 771       break;
 772    case FRAG_RESULT_SAMPLE_MASK:
 773       *name = TGSI_SEMANTIC_SAMPLEMASK;
 774       *index = 0;
 775       break;
 776    default:
 777       ERROR("unknown frag result slot %u\n", slot);
 778       assert(false);
 779       break;
 780    }
 781 }
 782
 783 // copy of _mesa_sysval_to_semantic
 784 static void
 785 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
 786 {
 787    *index = 0;
 788    switch (val) {
 789    // Vertex shader
 790    case SYSTEM_VALUE_VERTEX_ID:
 791       *name = TGSI_SEMANTIC_VERTEXID;
 792       break;
 793    case SYSTEM_VALUE_INSTANCE_ID:
 794       *name = TGSI_SEMANTIC_INSTANCEID;
 795       break;
 796    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
 797       *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
 798       break;
 799    case SYSTEM_VALUE_BASE_VERTEX:
 800       *name = TGSI_SEMANTIC_BASEVERTEX;
 801       break;
 802    case SYSTEM_VALUE_BASE_INSTANCE:
 803       *name = TGSI_SEMANTIC_BASEINSTANCE;
 804       break;
 805    case SYSTEM_VALUE_DRAW_ID:
 806       *name = TGSI_SEMANTIC_DRAWID;
 807       break;
 808
 809    // Geometry shader
 810    case SYSTEM_VALUE_INVOCATION_ID:
 811       *name = TGSI_SEMANTIC_INVOCATIONID;
 812       break;
 813
 814    // Fragment shader
 815    case SYSTEM_VALUE_FRAG_COORD:
 816       *name = TGSI_SEMANTIC_POSITION;
 817       break;
 818    case SYSTEM_VALUE_FRONT_FACE:
 819       *name = TGSI_SEMANTIC_FACE;
 820       break;
 821    case SYSTEM_VALUE_SAMPLE_ID:
 822       *name = TGSI_SEMANTIC_SAMPLEID;
 823       break;
 824    case SYSTEM_VALUE_SAMPLE_POS:
 825       *name = TGSI_SEMANTIC_SAMPLEPOS;
 826       break;
 827    case SYSTEM_VALUE_SAMPLE_MASK_IN:
 828       *name = TGSI_SEMANTIC_SAMPLEMASK;
 829       break;
 830    case SYSTEM_VALUE_HELPER_INVOCATION:
 831       *name = TGSI_SEMANTIC_HELPER_INVOCATION;
 832       break;
 833
 834    // Tessellation shader
 835    case SYSTEM_VALUE_TESS_COORD:
 836       *name = TGSI_SEMANTIC_TESSCOORD;
 837       break;
 838    case SYSTEM_VALUE_VERTICES_IN:
 839       *name = TGSI_SEMANTIC_VERTICESIN;
 840       break;
 841    case SYSTEM_VALUE_PRIMITIVE_ID:
 842       *name = TGSI_SEMANTIC_PRIMID;
 843       break;
 844    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
 845       *name = TGSI_SEMANTIC_TESSOUTER;
 846       break;
 847    case SYSTEM_VALUE_TESS_LEVEL_INNER:
 848       *name = TGSI_SEMANTIC_TESSINNER;
 849       break;
 850
 851    // Compute shader
 852    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
 853       *name = TGSI_SEMANTIC_THREAD_ID;
 854       break;
 855    case SYSTEM_VALUE_WORK_GROUP_ID:
 856       *name = TGSI_SEMANTIC_BLOCK_ID;
 857       break;
 858    case SYSTEM_VALUE_NUM_WORK_GROUPS:
 859       *name = TGSI_SEMANTIC_GRID_SIZE;
 860       break;
 861    case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
 862       *name = TGSI_SEMANTIC_BLOCK_SIZE;
 863       break;
 864
 865    // ARB_shader_ballot
 866    case SYSTEM_VALUE_SUBGROUP_SIZE:
 867       *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
 868       break;
 869    case SYSTEM_VALUE_SUBGROUP_INVOCATION:
 870       *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
 871       break;
 872    case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
 873       *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
 874       break;
 875    case SYSTEM_VALUE_SUBGROUP_GE_MASK:
 876       *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
 877       break;
 878    case SYSTEM_VALUE_SUBGROUP_GT_MASK:
 879       *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
 880       break;
 881    case SYSTEM_VALUE_SUBGROUP_LE_MASK:
 882       *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
 883       break;
 884    case SYSTEM_VALUE_SUBGROUP_LT_MASK:
 885       *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
 886       break;
 887
 888    default:
 889       ERROR("unknown system value %u\n", val);
 890       assert(false);
 891       break;
 892    }
 893 }
 894
 895 void
 896 Converter::setInterpolate(nv50_ir_varying *var,
 897                           uint8_t mode,
 898                           bool centroid,
 899                           unsigned semantic)
 900 {
 901    switch (mode) {
 902    case INTERP_MODE_FLAT:
 903       var->flat = 1;
 904       break;
 905    case INTERP_MODE_NONE:
 906       if (semantic == TGSI_SEMANTIC_COLOR)
 907          var->sc = 1;
 908       else if (semantic == TGSI_SEMANTIC_POSITION)
 909          var->linear = 1;
 910       break;
 911    case INTERP_MODE_NOPERSPECTIVE:
 912       var->linear = 1;
 913       break;
 914    case INTERP_MODE_SMOOTH:
 915       break;
 916    }
 917    var->centroid = centroid;
 918 }
 919
 920 static uint16_t
 921 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
 922           bool input, const nir_variable *var)
 923 {
 924    if (!type->is_array())
 925       return type->count_attribute_slots(false);
 926
 927    uint16_t slots;
 928    switch (stage) {
 929    case Program::TYPE_GEOMETRY:
 930       slots = type->uniform_locations();
 931       if (input)
 932          slots /= info.gs.vertices_in;
 933       break;
 934    case Program::TYPE_TESSELLATION_CONTROL:
 935    case Program::TYPE_TESSELLATION_EVAL:
 936       // remove first dimension
 937       if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
 938          slots = type->uniform_locations();
 939       else
 940          slots = type->fields.array->uniform_locations();
 941       break;
 942    default:
 943       slots = type->count_attribute_slots(false);
 944       break;
 945    }
 946
 947    return slots;
 948 }
 949
 950 bool Converter::assignSlots() {
 951    unsigned name;
 952    unsigned index;
 953
 954    info->io.viewportId = -1;
 955    info->numInputs = 0;
 956
 957    // we have to fixup the uniform locations for arrays
 958    unsigned numImages = 0;
 959    nir_foreach_variable(var, &nir->uniforms) {
 960       const glsl_type *type = var->type;
 961       if (!type->without_array()->is_image())
 962          continue;
 963       var->data.driver_location = numImages;
 964       numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
 965    }
 966
 967    nir_foreach_variable(var, &nir->inputs) {
 968       const glsl_type *type = var->type;
 969       int slot = var->data.location;
 970       uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
 971       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
 972                                        : type->component_slots();
 973       uint32_t frac = var->data.location_frac;
 974       uint32_t vary = var->data.driver_location;
 975
 976       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
 977          if (comp > 2)
 978             slots *= 2;
 979       }
 980
 981       assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
 982
 983       switch(prog->getType()) {
 984       case Program::TYPE_FRAGMENT:
 985          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
 986          for (uint16_t i = 0; i < slots; ++i) {
 987             setInterpolate(&info->in[vary + i], var->data.interpolation,
 988                            var->data.centroid | var->data.sample, name);
 989          }
 990          break;
 991       case Program::TYPE_GEOMETRY:
 992          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
 993          break;
 994       case Program::TYPE_TESSELLATION_CONTROL:
 995       case Program::TYPE_TESSELLATION_EVAL:
 996          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
 997          if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
 998             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
 999          break;
1000       case Program::TYPE_VERTEX:
1001          vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
1002          switch (name) {
1003          case TGSI_SEMANTIC_EDGEFLAG:
1004             info->io.edgeFlagIn = vary;
1005             break;
1006          default:
1007             break;
1008          }
1009          break;
1010       default:
1011          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1012          return false;
1013       }
1014
1015       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1016          info->in[vary].id = vary;
1017          info->in[vary].patch = var->data.patch;
1018          info->in[vary].sn = name;
1019          info->in[vary].si = index + i;
1020          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1021             if (i & 0x1)
1022                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1023             else
1024                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1025          else
1026             info->in[vary].mask |= ((1 << comp) - 1) << frac;
1027       }
1028       info->numInputs = std::max<uint8_t>(info->numInputs, vary);
1029    }
1030
1031    info->numOutputs = 0;
1032    nir_foreach_variable(var, &nir->outputs) {
1033       const glsl_type *type = var->type;
1034       int slot = var->data.location;
1035       uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
1036       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
1037                                        : type->component_slots();
1038       uint32_t frac = var->data.location_frac;
1039       uint32_t vary = var->data.driver_location;
1040
1041       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
1042          if (comp > 2)
1043             slots *= 2;
1044       }
1045
1046       assert(vary < PIPE_MAX_SHADER_OUTPUTS);
1047
1048       switch(prog->getType()) {
1049       case Program::TYPE_FRAGMENT:
1050          frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
1051          switch (name) {
1052          case TGSI_SEMANTIC_COLOR:
1053             if (!var->data.fb_fetch_output)
1054                info->prop.fp.numColourResults++;
1055             info->prop.fp.separateFragData = true;
1056             // sometimes we get FRAG_RESULT_DATAX with data.index 0
1057             // sometimes we get FRAG_RESULT_DATA0 with data.index X
1058             index = index == 0 ? var->data.index : index;
1059             break;
1060          case TGSI_SEMANTIC_POSITION:
1061             info->io.fragDepth = vary;
1062             info->prop.fp.writesDepth = true;
1063             break;
1064          case TGSI_SEMANTIC_SAMPLEMASK:
1065             info->io.sampleMask = vary;
1066             break;
1067          default:
1068             break;
1069          }
1070          break;
1071       case Program::TYPE_GEOMETRY:
1072       case Program::TYPE_TESSELLATION_CONTROL:
1073       case Program::TYPE_TESSELLATION_EVAL:
1074       case Program::TYPE_VERTEX:
1075          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
1076
1077          if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
1078              name != TGSI_SEMANTIC_TESSOUTER)
1079             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
1080
1081          switch (name) {
1082          case TGSI_SEMANTIC_CLIPDIST:
1083             info->io.genUserClip = -1;
1084             break;
1085          case TGSI_SEMANTIC_EDGEFLAG:
1086             info->io.edgeFlagOut = vary;
1087             break;
1088          default:
1089             break;
1090          }
1091          break;
1092       default:
1093          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
1094          return false;
1095       }
1096
1097       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
1098          info->out[vary].id = vary;
1099          info->out[vary].patch = var->data.patch;
1100          info->out[vary].sn = name;
1101          info->out[vary].si = index + i;
1102          if (glsl_base_type_is_64bit(type->without_array()->base_type))
1103             if (i & 0x1)
1104                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
1105             else
1106                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
1107          else
1108             info->out[vary].mask |= ((1 << comp) - 1) << frac;
1109
1110          if (nir->info.outputs_read & 1ll << slot)
1111             info->out[vary].oread = 1;
1112       }
1113       info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
1114    }
1115
1116    info->numSysVals = 0;
1117    for (uint8_t i = 0; i < 64; ++i) {
1118       if (!(nir->info.system_values_read & 1ll << i))
1119          continue;
1120
1121       system_val_to_tgsi_semantic(i, &name, &index);
1122       info->sv[info->numSysVals].sn = name;
1123       info->sv[info->numSysVals].si = index;
1124       info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
1125
1126       switch (i) {
1127       case SYSTEM_VALUE_INSTANCE_ID:
1128          info->io.instanceId = info->numSysVals;
1129          break;
1130       case SYSTEM_VALUE_TESS_LEVEL_INNER:
1131       case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1132          info->sv[info->numSysVals].patch = 1;
1133          break;
1134       case SYSTEM_VALUE_VERTEX_ID:
1135          info->io.vertexId = info->numSysVals;
1136          break;
1137       default:
1138          break;
1139       }
1140
1141       info->numSysVals += 1;
1142    }
1143
1144    if (info->io.genUserClip > 0) {
1145       info->io.clipDistances = info->io.genUserClip;
1146
1147       const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1148
1149       for (unsigned int n = 0; n < nOut; ++n) {
1150          unsigned int i = info->numOutputs++;
1151          info->out[i].id = i;
1152          info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1153          info->out[i].si = n;
1154          info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1155       }
1156    }
1157
1158    return info->assignSlots(info) == 0;
1159 }
1160
1161 uint32_t
1162 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
1163 {
1164    DataType ty;
1165    int offset = nir_intrinsic_component(insn);
1166    bool input;
1167
1168    if (nir_intrinsic_infos[insn->intrinsic].has_dest)
1169       ty = getDType(insn);
1170    else
1171       ty = getSType(insn->src[0], false, false);
1172
1173    switch (insn->intrinsic) {
1174    case nir_intrinsic_load_input:
1175    case nir_intrinsic_load_interpolated_input:
1176    case nir_intrinsic_load_per_vertex_input:
1177       input = true;
1178       break;
1179    case nir_intrinsic_load_output:
1180    case nir_intrinsic_load_per_vertex_output:
1181    case nir_intrinsic_store_output:
1182    case nir_intrinsic_store_per_vertex_output:
1183       input = false;
1184       break;
1185    default:
1186       ERROR("unknown intrinsic in getSlotAddress %s",
1187             nir_intrinsic_infos[insn->intrinsic].name);
1188       input = false;
1189       assert(false);
1190       break;
1191    }
1192
1193    if (typeSizeof(ty) == 8) {
1194       slot *= 2;
1195       slot += offset;
1196       if (slot >= 4) {
1197          idx += 1;
1198          slot -= 4;
1199       }
1200    } else {
1201       slot += offset;
1202    }
1203
1204    assert(slot < 4);
1205    assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
1206    assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
1207
1208    const nv50_ir_varying *vary = input ? info->in : info->out;
1209    return vary[idx].slot[slot] * 4;
1210 }
1211
1212 Instruction *
1213 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
1214                     uint32_t base, uint8_t c, Value *indirect0,
1215                     Value *indirect1, bool patch)
1216 {
1217    unsigned int tySize = typeSizeof(ty);
1218
1219    if (tySize == 8 &&
1220        (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
1221       Value *lo = getSSA();
1222       Value *hi = getSSA();
1223
1224       Instruction *loi =
1225          mkLoad(TYPE_U32, lo,
1226                 mkSymbol(file, i, TYPE_U32, base + c * tySize),
1227                 indirect0);
1228       loi->setIndirect(0, 1, indirect1);
1229       loi->perPatch = patch;
1230
1231       Instruction *hii =
1232          mkLoad(TYPE_U32, hi,
1233                 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1234                 indirect0);
1235       hii->setIndirect(0, 1, indirect1);
1236       hii->perPatch = patch;
1237
1238       return mkOp2(OP_MERGE, ty, def, lo, hi);
1239    } else {
1240       Instruction *ld =
1241          mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1242       ld->setIndirect(0, 1, indirect1);
1243       ld->perPatch = patch;
1244       return ld;
1245    }
1246 }
1247
1248 void
1249 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1250                    DataType ty, Value *src, uint8_t idx, uint8_t c,
1251                    Value *indirect0, Value *indirect1)
1252 {
1253    uint8_t size = typeSizeof(ty);
1254    uint32_t address = getSlotAddress(insn, idx, c);
1255
1256    if (size == 8 && indirect0) {
1257       Value *split[2];
1258       mkSplit(split, 4, src);
1259
1260       if (op == OP_EXPORT) {
1261          split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1262          split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1263       }
1264
1265       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1266               split[0])->perPatch = info->out[idx].patch;
1267       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1268               split[1])->perPatch = info->out[idx].patch;
1269    } else {
1270       if (op == OP_EXPORT)
1271          src = mkMov(getSSA(size), src, ty)->getDef(0);
1272       mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1273               src)->perPatch = info->out[idx].patch;
1274    }
1275 }
1276
1277 bool
1278 Converter::parseNIR()
1279 {
1280    info->io.clipDistances = nir->info.clip_distance_array_size;
1281    info->io.cullDistances = nir->info.cull_distance_array_size;
1282
1283    switch(prog->getType()) {
1284    case Program::TYPE_COMPUTE:
1285       info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
1286       info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
1287       info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
1288       info->bin.smemSize = nir->info.cs.shared_size;
1289       break;
1290    case Program::TYPE_FRAGMENT:
1291       info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
1292       info->prop.fp.persampleInvocation =
1293          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
1294          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1295       info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
1296       info->prop.fp.readsSampleLocations =
1297          (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
1298       info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
1299       info->prop.fp.usesSampleMaskIn =
1300          !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
1301       break;
1302    case Program::TYPE_GEOMETRY:
1303       info->prop.gp.inputPrim = nir->info.gs.input_primitive;
1304       info->prop.gp.instanceCount = nir->info.gs.invocations;
1305       info->prop.gp.maxVertices = nir->info.gs.vertices_out;
1306       info->prop.gp.outputPrim = nir->info.gs.output_primitive;
1307       break;
1308    case Program::TYPE_TESSELLATION_CONTROL:
1309    case Program::TYPE_TESSELLATION_EVAL:
1310       if (nir->info.tess.primitive_mode == GL_ISOLINES)
1311          info->prop.tp.domain = GL_LINES;
1312       else
1313          info->prop.tp.domain = nir->info.tess.primitive_mode;
1314       info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
1315       info->prop.tp.outputPrim =
1316          nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
1317       info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
1318       info->prop.tp.winding = !nir->info.tess.ccw;
1319       break;
1320    case Program::TYPE_VERTEX:
1321       info->prop.vp.usesDrawParameters =
1322          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
1323          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
1324          (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
1325       break;
1326    default:
1327       break;
1328    }
1329
1330    return true;
1331 }
1332
1333 bool
1334 Converter::visit(nir_function *function)
1335 {
1336    // we only support emiting the main function for now
1337    assert(!strcmp(function->name, "main"));
1338    assert(function->impl);
1339
1340    // usually the blocks will set everything up, but main is special
1341    BasicBlock *entry = new BasicBlock(prog->main);
1342    exit = new BasicBlock(prog->main);
1343    blocks[nir_start_block(function->impl)->index] = entry;
1344    prog->main->setEntry(entry);
1345    prog->main->setExit(exit);
1346
1347    setPosition(entry, true);
1348
1349    switch (prog->getType()) {
1350    case Program::TYPE_TESSELLATION_CONTROL:
1351       outBase = mkOp2v(
1352          OP_SUB, TYPE_U32, getSSA(),
1353          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
1354          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
1355       break;
1356    case Program::TYPE_FRAGMENT: {
1357       Symbol *sv = mkSysVal(SV_POSITION, 3);
1358       fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
1359       fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
1360       break;
1361    }
1362    default:
1363       break;
1364    }
1365
1366    nir_index_ssa_defs(function->impl);
1367    foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
1368       if (!visit(node))
1369          return false;
1370    }
1371
1372    bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
1373    setPosition(exit, true);
1374
1375    // TODO: for non main function this needs to be a OP_RETURN
1376    mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1377    return true;
1378 }
1379
1380 bool
1381 Converter::visit(nir_cf_node *node)
1382 {
1383    switch (node->type) {
1384    case nir_cf_node_block:
1385       return visit(nir_cf_node_as_block(node));
1386    case nir_cf_node_if:
1387       return visit(nir_cf_node_as_if(node));
1388    case nir_cf_node_loop:
1389       return visit(nir_cf_node_as_loop(node));
1390    default:
1391       ERROR("unknown nir_cf_node type %u\n", node->type);
1392       return false;
1393    }
1394 }
1395
1396 bool
1397 Converter::visit(nir_block *block)
1398 {
1399    if (!block->predecessors->entries && block->instr_list.is_empty())
1400       return true;
1401
1402    BasicBlock *bb = convert(block);
1403
1404    setPosition(bb, true);
1405    nir_foreach_instr(insn, block) {
1406       if (!visit(insn))
1407          return false;
1408    }
1409    return true;
1410 }
1411
1412 bool
1413 Converter::visit(nir_if *nif)
1414 {
1415    DataType sType = getSType(nif->condition, false, false);
1416    Value *src = getSrc(&nif->condition, 0);
1417
1418    nir_block *lastThen = nir_if_last_then_block(nif);
1419    nir_block *lastElse = nir_if_last_else_block(nif);
1420
1421    assert(!lastThen->successors[1]);
1422    assert(!lastElse->successors[1]);
1423
1424    BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
1425    BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
1426
1427    bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
1428    bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
1429
1430    // we only insert joinats, if both nodes end up at the end of the if again.
1431    // the reason for this to not happens are breaks/continues/ret/... which
1432    // have their own handling
1433    if (lastThen->successors[0] == lastElse->successors[0])
1434       bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
1435                           CC_ALWAYS, NULL);
1436
1437    mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
1438
1439    foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
1440       if (!visit(node))
1441          return false;
1442    }
1443    setPosition(convert(lastThen), true);
1444    if (!bb->getExit() ||
1445        !bb->getExit()->asFlow() ||
1446         bb->getExit()->asFlow()->op == OP_JOIN) {
1447       BasicBlock *tailBB = convert(lastThen->successors[0]);
1448       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1449       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1450    }
1451
1452    foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
1453       if (!visit(node))
1454          return false;
1455    }
1456    setPosition(convert(lastElse), true);
1457    if (!bb->getExit() ||
1458        !bb->getExit()->asFlow() ||
1459         bb->getExit()->asFlow()->op == OP_JOIN) {
1460       BasicBlock *tailBB = convert(lastElse->successors[0]);
1461       mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
1462       bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
1463    }
1464
1465    if (lastThen->successors[0] == lastElse->successors[0]) {
1466       setPosition(convert(lastThen->successors[0]), true);
1467       mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
1468    }
1469
1470    return true;
1471 }
1472
1473 bool
1474 Converter::visit(nir_loop *loop)
1475 {
1476    curLoopDepth += 1;
1477    func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
1478
1479    BasicBlock *loopBB = convert(nir_loop_first_block(loop));
1480    BasicBlock *tailBB =
1481       convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
1482    bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
1483
1484    mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
1485    setPosition(loopBB, false);
1486    mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);
1487
1488    foreach_list_typed(nir_cf_node, node, node, &loop->body) {
1489       if (!visit(node))
1490          return false;
1491    }
1492    Instruction *insn = bb->getExit();
1493    if (bb->cfg.incidentCount() != 0) {
1494       if (!insn || !insn->asFlow()) {
1495          mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
1496          bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
1497       } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
1498                  tailBB->cfg.incidentCount() == 0) {
1499          // RA doesn't like having blocks around with no incident edge,
1500          // so we create a fake one to make it happy
1501          bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
1502       }
1503    }
1504
1505    curLoopDepth -= 1;
1506
1507    return true;
1508 }
1509
1510 bool
1511 Converter::visit(nir_instr *insn)
1512 {
1513    switch (insn->type) {
1514    case nir_instr_type_alu:
1515       return visit(nir_instr_as_alu(insn));
1516    case nir_instr_type_intrinsic:
1517       return visit(nir_instr_as_intrinsic(insn));
1518    case nir_instr_type_jump:
1519       return visit(nir_instr_as_jump(insn));
1520    case nir_instr_type_load_const:
1521       return visit(nir_instr_as_load_const(insn));
1522    default:
1523       ERROR("unknown nir_instr type %u\n", insn->type);
1524       return false;
1525    }
1526    return true;
1527 }
1528
1529 bool
1530 Converter::visit(nir_intrinsic_instr *insn)
1531 {
1532    nir_intrinsic_op op = insn->intrinsic;
1533
1534    switch (op) {
1535    default:
1536       ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
1537       return false;
1538    }
1539
1540    return true;
1541 }
1542
1543 bool
1544 Converter::visit(nir_jump_instr *insn)
1545 {
1546    switch (insn->type) {
1547    case nir_jump_return:
1548       // TODO: this only works in the main function
1549       mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);
1550       bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);
1551       break;
1552    case nir_jump_break:
1553    case nir_jump_continue: {
1554       bool isBreak = insn->type == nir_jump_break;
1555       nir_block *block = insn->instr.block;
1556       assert(!block->successors[1]);
1557       BasicBlock *target = convert(block->successors[0]);
1558       mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
1559       bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
1560       break;
1561    }
1562    default:
1563       ERROR("unknown nir_jump_type %u\n", insn->type);
1564       return false;
1565    }
1566
1567    return true;
1568 }
1569
1570 bool
1571 Converter::visit(nir_load_const_instr *insn)
1572 {
1573    assert(insn->def.bit_size <= 64);
1574
1575    LValues &newDefs = convert(&insn->def);
1576    for (int i = 0; i < insn->def.num_components; i++) {
1577       switch (insn->def.bit_size) {
1578       case 64:
1579          loadImm(newDefs[i], insn->value.u64[i]);
1580          break;
1581       case 32:
1582          loadImm(newDefs[i], insn->value.u32[i]);
1583          break;
1584       case 16:
1585          loadImm(newDefs[i], insn->value.u16[i]);
1586          break;
1587       case 8:
1588          loadImm(newDefs[i], insn->value.u8[i]);
1589          break;
1590       }
1591    }
1592    return true;
1593 }
1594
1595 #define DEFAULT_CHECKS \
1596       if (insn->dest.dest.ssa.num_components > 1) { \
1597          ERROR("nir_alu_instr only supported with 1 component!\n"); \
1598          return false; \
1599       } \
1600       if (insn->dest.write_mask != 1) { \
1601          ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \
1602          return false; \
1603       }
1604 bool
1605 Converter::visit(nir_alu_instr *insn)
1606 {
1607    const nir_op op = insn->op;
1608    const nir_op_info &info = nir_op_infos[op];
1609    DataType dType = getDType(insn);
1610    const std::vector<DataType> sTypes = getSTypes(insn);
1611
1612    Instruction *oldPos = this->bb->getExit();
1613
1614    switch (op) {
1615    case nir_op_fabs:
1616    case nir_op_iabs:
1617    case nir_op_fadd:
1618    case nir_op_iadd:
1619    case nir_op_fand:
1620    case nir_op_iand:
1621    case nir_op_fceil:
1622    case nir_op_fcos:
1623    case nir_op_fddx:
1624    case nir_op_fddx_coarse:
1625    case nir_op_fddx_fine:
1626    case nir_op_fddy:
1627    case nir_op_fddy_coarse:
1628    case nir_op_fddy_fine:
1629    case nir_op_fdiv:
1630    case nir_op_idiv:
1631    case nir_op_udiv:
1632    case nir_op_fexp2:
1633    case nir_op_ffloor:
1634    case nir_op_ffma:
1635    case nir_op_flog2:
1636    case nir_op_fmax:
1637    case nir_op_imax:
1638    case nir_op_umax:
1639    case nir_op_fmin:
1640    case nir_op_imin:
1641    case nir_op_umin:
1642    case nir_op_fmod:
1643    case nir_op_imod:
1644    case nir_op_umod:
1645    case nir_op_fmul:
1646    case nir_op_imul:
1647    case nir_op_imul_high:
1648    case nir_op_umul_high:
1649    case nir_op_fneg:
1650    case nir_op_ineg:
1651    case nir_op_fnot:
1652    case nir_op_inot:
1653    case nir_op_for:
1654    case nir_op_ior:
1655    case nir_op_pack_64_2x32_split:
1656    case nir_op_fpow:
1657    case nir_op_frcp:
1658    case nir_op_frem:
1659    case nir_op_irem:
1660    case nir_op_frsq:
1661    case nir_op_fsat:
1662    case nir_op_ishr:
1663    case nir_op_ushr:
1664    case nir_op_fsin:
1665    case nir_op_fsqrt:
1666    case nir_op_fsub:
1667    case nir_op_isub:
1668    case nir_op_ftrunc:
1669    case nir_op_ishl:
1670    case nir_op_fxor:
1671    case nir_op_ixor: {
1672       DEFAULT_CHECKS;
1673       LValues &newDefs = convert(&insn->dest);
1674       operation preOp = preOperationNeeded(op);
1675       if (preOp != OP_NOP) {
1676          assert(info.num_inputs < 2);
1677          Value *tmp = getSSA(typeSizeof(dType));
1678          Instruction *i0 = mkOp(preOp, dType, tmp);
1679          Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);
1680          if (info.num_inputs) {
1681             i0->setSrc(0, getSrc(&insn->src[0]));
1682             i1->setSrc(0, tmp);
1683          }
1684          i1->subOp = getSubOp(op);
1685       } else {
1686          Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);
1687          for (unsigned s = 0u; s < info.num_inputs; ++s) {
1688             i->setSrc(s, getSrc(&insn->src[s]));
1689          }
1690          i->subOp = getSubOp(op);
1691       }
1692       break;
1693    }
1694    case nir_op_ifind_msb:
1695    case nir_op_ufind_msb: {
1696       DEFAULT_CHECKS;
1697       LValues &newDefs = convert(&insn->dest);
1698       dType = sTypes[0];
1699       mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
1700       break;
1701    }
1702    case nir_op_fround_even: {
1703       DEFAULT_CHECKS;
1704       LValues &newDefs = convert(&insn->dest);
1705       mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;
1706       break;
1707    }
1708    // convert instructions
1709    case nir_op_f2f32:
1710    case nir_op_f2i32:
1711    case nir_op_f2u32:
1712    case nir_op_i2f32:
1713    case nir_op_i2i32:
1714    case nir_op_u2f32:
1715    case nir_op_u2u32:
1716    case nir_op_f2f64:
1717    case nir_op_f2i64:
1718    case nir_op_f2u64:
1719    case nir_op_i2f64:
1720    case nir_op_i2i64:
1721    case nir_op_u2f64:
1722    case nir_op_u2u64: {
1723       DEFAULT_CHECKS;
1724       LValues &newDefs = convert(&insn->dest);
1725       Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));
1726       if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)
1727          i->rnd = ROUND_Z;
1728       i->sType = sTypes[0];
1729       break;
1730    }
1731    // compare instructions
1732    case nir_op_feq32:
1733    case nir_op_ieq32:
1734    case nir_op_fge32:
1735    case nir_op_ige32:
1736    case nir_op_uge32:
1737    case nir_op_flt32:
1738    case nir_op_ilt32:
1739    case nir_op_ult32:
1740    case nir_op_fne32:
1741    case nir_op_ine32: {
1742       DEFAULT_CHECKS;
1743       LValues &newDefs = convert(&insn->dest);
1744       Instruction *i = mkCmp(getOperation(op),
1745                              getCondCode(op),
1746                              dType,
1747                              newDefs[0],
1748                              dType,
1749                              getSrc(&insn->src[0]),
1750                              getSrc(&insn->src[1]));
1751       if (info.num_inputs == 3)
1752          i->setSrc(2, getSrc(&insn->src[2]));
1753       i->sType = sTypes[0];
1754       break;
1755    }
1756    // those are weird ALU ops and need special handling, because
1757    //   1. they are always componend based
1758    //   2. they basically just merge multiple values into one data type
1759    case nir_op_imov:
1760    case nir_op_fmov:
1761    case nir_op_vec2:
1762    case nir_op_vec3:
1763    case nir_op_vec4: {
1764       LValues &newDefs = convert(&insn->dest);
1765       for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
1766          mkMov(newDefs[c], getSrc(&insn->src[c]), dType);
1767       }
1768       break;
1769    }
1770    // (un)pack
1771    case nir_op_pack_64_2x32: {
1772       LValues &newDefs = convert(&insn->dest);
1773       Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);
1774       merge->setSrc(0, getSrc(&insn->src[0], 0));
1775       merge->setSrc(1, getSrc(&insn->src[0], 1));
1776       break;
1777    }
1778    case nir_op_pack_half_2x16_split: {
1779       LValues &newDefs = convert(&insn->dest);
1780       Value *tmpH = getSSA();
1781       Value *tmpL = getSSA();
1782
1783       mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));
1784       mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));
1785       mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);
1786       break;
1787    }
1788    case nir_op_unpack_half_2x16_split_x:
1789    case nir_op_unpack_half_2x16_split_y: {
1790       LValues &newDefs = convert(&insn->dest);
1791       Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));
1792       if (op == nir_op_unpack_half_2x16_split_y)
1793          cvt->subOp = 1;
1794       break;
1795    }
1796    case nir_op_unpack_64_2x32: {
1797       LValues &newDefs = convert(&insn->dest);
1798       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);
1799       break;
1800    }
1801    case nir_op_unpack_64_2x32_split_x: {
1802       LValues &newDefs = convert(&insn->dest);
1803       mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());
1804       break;
1805    }
1806    case nir_op_unpack_64_2x32_split_y: {
1807       LValues &newDefs = convert(&insn->dest);
1808       mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);
1809       break;
1810    }
1811    // special instructions
1812    case nir_op_fsign:
1813    case nir_op_isign: {
1814       DEFAULT_CHECKS;
1815       DataType iType;
1816       if (::isFloatType(dType))
1817          iType = TYPE_F32;
1818       else
1819          iType = TYPE_S32;
1820
1821       LValues &newDefs = convert(&insn->dest);
1822       LValue *val0 = getScratch();
1823       LValue *val1 = getScratch();
1824       mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);
1825       mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);
1826
1827       if (dType == TYPE_F64) {
1828          mkOp2(OP_SUB, iType, val0, val0, val1);
1829          mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);
1830       } else if (dType == TYPE_S64 || dType == TYPE_U64) {
1831          mkOp2(OP_SUB, iType, val0, val1, val0);
1832          mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));
1833          mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);
1834       } else if (::isFloatType(dType))
1835          mkOp2(OP_SUB, iType, newDefs[0], val0, val1);
1836       else
1837          mkOp2(OP_SUB, iType, newDefs[0], val1, val0);
1838       break;
1839    }
1840    case nir_op_fcsel:
1841    case nir_op_b32csel: {
1842       DEFAULT_CHECKS;
1843       LValues &newDefs = convert(&insn->dest);
1844       mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));
1845       break;
1846    }
1847    case nir_op_ibitfield_extract:
1848    case nir_op_ubitfield_extract: {
1849       DEFAULT_CHECKS;
1850       Value *tmp = getSSA();
1851       LValues &newDefs = convert(&insn->dest);
1852       mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
1853       mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);
1854       break;
1855    }
1856    case nir_op_bfm: {
1857       DEFAULT_CHECKS;
1858       LValues &newDefs = convert(&insn->dest);
1859       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
1860       break;
1861    }
1862    case nir_op_bitfield_insert: {
1863       DEFAULT_CHECKS;
1864       LValues &newDefs = convert(&insn->dest);
1865       LValue *temp = getSSA();
1866       mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));
1867       mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));
1868       break;
1869    }
1870    case nir_op_bit_count: {
1871       DEFAULT_CHECKS;
1872       LValues &newDefs = convert(&insn->dest);
1873       mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));
1874       break;
1875    }
1876    case nir_op_bitfield_reverse: {
1877       DEFAULT_CHECKS;
1878       LValues &newDefs = convert(&insn->dest);
1879       mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
1880       break;
1881    }
1882    case nir_op_find_lsb: {
1883       DEFAULT_CHECKS;
1884       LValues &newDefs = convert(&insn->dest);
1885       Value *tmp = getSSA();
1886       mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
1887       mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
1888       break;
1889    }
1890    // boolean conversions
1891    case nir_op_b2f32: {
1892       DEFAULT_CHECKS;
1893       LValues &newDefs = convert(&insn->dest);
1894       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));
1895       break;
1896    }
1897    case nir_op_b2f64: {
1898       DEFAULT_CHECKS;
1899       LValues &newDefs = convert(&insn->dest);
1900       Value *tmp = getSSA(4);
1901       mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));
1902       mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
1903       break;
1904    }
1905    case nir_op_f2b32:
1906    case nir_op_i2b32: {
1907       DEFAULT_CHECKS;
1908       LValues &newDefs = convert(&insn->dest);
1909       Value *src1;
1910       if (typeSizeof(sTypes[0]) == 8) {
1911          src1 = loadImm(getSSA(8), 0.0);
1912       } else {
1913          src1 = zero;
1914       }
1915       CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
1916       mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
1917       break;
1918    }
1919    case nir_op_b2i32: {
1920       DEFAULT_CHECKS;
1921       LValues &newDefs = convert(&insn->dest);
1922       mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));
1923       break;
1924    }
1925    case nir_op_b2i64: {
1926       DEFAULT_CHECKS;
1927       LValues &newDefs = convert(&insn->dest);
1928       LValue *def = getScratch();
1929       mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));
1930       mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));
1931       break;
1932    }
1933    default:
1934       ERROR("unknown nir_op %s\n", info.name);
1935       return false;
1936    }
1937
1938    if (!oldPos) {
1939       oldPos = this->bb->getEntry();
1940       oldPos->precise = insn->exact;
1941    }
1942
1943    if (unlikely(!oldPos))
1944       return true;
1945
1946    while (oldPos->next) {
1947       oldPos = oldPos->next;
1948       oldPos->precise = insn->exact;
1949    }
1950    oldPos->saturate = insn->dest.saturate;
1951
1952    return true;
1953 }
1954 #undef DEFAULT_CHECKS
1955
1956 bool
1957 Converter::run()
1958 {
1959    bool progress;
1960
1961    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1962       nir_print_shader(nir, stderr);
1963
1964    NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
1965    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
1966    NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
1967    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
1968    NIR_PASS_V(nir, nir_lower_alu_to_scalar);
1969    NIR_PASS_V(nir, nir_lower_phis_to_scalar);
1970
1971    do {
1972       progress = false;
1973       NIR_PASS(progress, nir, nir_copy_prop);
1974       NIR_PASS(progress, nir, nir_opt_remove_phis);
1975       NIR_PASS(progress, nir, nir_opt_trivial_continues);
1976       NIR_PASS(progress, nir, nir_opt_cse);
1977       NIR_PASS(progress, nir, nir_opt_algebraic);
1978       NIR_PASS(progress, nir, nir_opt_constant_folding);
1979       NIR_PASS(progress, nir, nir_copy_prop);
1980       NIR_PASS(progress, nir, nir_opt_dce);
1981       NIR_PASS(progress, nir, nir_opt_dead_cf);
1982    } while (progress);
1983
1984    NIR_PASS_V(nir, nir_lower_bool_to_int32);
1985    NIR_PASS_V(nir, nir_lower_locals_to_regs);
1986    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
1987    NIR_PASS_V(nir, nir_convert_from_ssa, true);
1988
1989    // Garbage collect dead instructions
1990    nir_sweep(nir);
1991
1992    if (!parseNIR()) {
1993       ERROR("Couldn't prase NIR!\n");
1994       return false;
1995    }
1996
1997    if (!assignSlots()) {
1998       ERROR("Couldn't assign slots!\n");
1999       return false;
2000    }
2001
2002    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
2003       nir_print_shader(nir, stderr);
2004
2005    nir_foreach_function(function, nir) {
2006       if (!visit(function))
2007          return false;
2008    }
2009
2010    return true;
2011 }
2012
2013 } // unnamed namespace
2014
2015 namespace nv50_ir {
2016
2017 bool
2018 Program::makeFromNIR(struct nv50_ir_prog_info *info)
2019 {
2020    nir_shader *nir = (nir_shader*)info->bin.source;
2021    Converter converter(this, nir, info);
2022    bool result = converter.run();
2023    if (!result)
2024       return result;
2025    LoweringHelper lowering;
2026    lowering.run(this);
2027    tlsSize = info->bin.tlsSpace;
2028    return result;
2029 }
2030
2031 } // namespace nv50_ir