src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

   1 /*
   2  * Copyright 2017 Red Hat Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * Authors: Karol Herbst <kherbst@redhat.com>
  23  */
  24
  25 #include "compiler/nir/nir.h"
  26
  27 #include "util/u_debug.h"
  28
  29 #include "codegen/nv50_ir.h"
  30 #include "codegen/nv50_ir_from_common.h"
  31 #include "codegen/nv50_ir_lowering_helper.h"
  32 #include "codegen/nv50_ir_util.h"
  33
  34 #if __cplusplus >= 201103L
  35 #include <unordered_map>
  36 #else
  37 #include <tr1/unordered_map>
  38 #endif
  39 #include <vector>
  40
  41 namespace {
  42
  43 #if __cplusplus >= 201103L
  44 using std::hash;
  45 using std::unordered_map;
  46 #else
  47 using std::tr1::hash;
  48 using std::tr1::unordered_map;
  49 #endif
  50
  51 using namespace nv50_ir;
  52
  53 int
  54 type_size(const struct glsl_type *type)
  55 {
  56    return glsl_count_attribute_slots(type, false);
  57 }
  58
  59 class Converter : public ConverterCommon
  60 {
  61 public:
  62    Converter(Program *, nir_shader *, nv50_ir_prog_info *);
  63
  64    bool run();
  65 private:
  66    typedef std::vector<LValue *> LValues;
  67    typedef unordered_map<unsigned, LValues> NirDefMap;
  68
  69    LValues& convert(nir_alu_dest *);
  70    LValues& convert(nir_dest *);
  71    LValues& convert(nir_register *);
  72    LValues& convert(nir_ssa_def *);
  73
  74    Value* getSrc(nir_alu_src *, uint8_t component = 0);
  75    Value* getSrc(nir_register *, uint8_t);
  76    Value* getSrc(nir_src *, uint8_t, bool indirect = false);
  77    Value* getSrc(nir_ssa_def *, uint8_t);
  78
  79    // returned value is the constant part of the given source (either the
  80    // nir_src or the selected source component of an intrinsic). Even though
  81    // this is mostly an optimization to be able to skip indirects in a few
  82    // cases, sometimes we require immediate values or set some fileds on
  83    // instructions (e.g. tex) in order for codegen to consume those.
  84    // If the found value has not a constant part, the Value gets returned
  85    // through the Value parameter.
  86    uint32_t getIndirect(nir_src *, uint8_t, Value *&);
  87    uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&);
  88
  89    uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
  90
  91    void setInterpolate(nv50_ir_varying *,
  92                        uint8_t,
  93                        bool centroid,
  94                        unsigned semantics);
  95
  96    Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,
  97                          uint8_t c, Value *indirect0 = NULL,
  98                          Value *indirect1 = NULL, bool patch = false);
  99    void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
 100                 Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
 101                 Value *indirect1 = NULL);
 102
 103    bool isFloatType(nir_alu_type);
 104    bool isSignedType(nir_alu_type);
 105    bool isResultFloat(nir_op);
 106    bool isResultSigned(nir_op);
 107
 108    DataType getDType(nir_alu_instr *);
 109    DataType getDType(nir_intrinsic_instr *);
 110    DataType getDType(nir_op, uint8_t);
 111
 112    std::vector<DataType> getSTypes(nir_alu_instr *);
 113    DataType getSType(nir_src &, bool isFloat, bool isSigned);
 114
 115    bool assignSlots();
 116
 117    nir_shader *nir;
 118
 119    NirDefMap ssaDefs;
 120    NirDefMap regDefs;
 121 };
 122
 123 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
 124    : ConverterCommon(prog, info),
 125      nir(nir) {}
 126
 127 bool
 128 Converter::isFloatType(nir_alu_type type)
 129 {
 130    return nir_alu_type_get_base_type(type) == nir_type_float;
 131 }
 132
 133 bool
 134 Converter::isSignedType(nir_alu_type type)
 135 {
 136    return nir_alu_type_get_base_type(type) == nir_type_int;
 137 }
 138
 139 bool
 140 Converter::isResultFloat(nir_op op)
 141 {
 142    const nir_op_info &info = nir_op_infos[op];
 143    if (info.output_type != nir_type_invalid)
 144       return isFloatType(info.output_type);
 145
 146    ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
 147    assert(false);
 148    return true;
 149 }
 150
 151 bool
 152 Converter::isResultSigned(nir_op op)
 153 {
 154    switch (op) {
 155    // there is no umul and we get wrong results if we treat all muls as signed
 156    case nir_op_imul:
 157    case nir_op_inot:
 158       return false;
 159    default:
 160       const nir_op_info &info = nir_op_infos[op];
 161       if (info.output_type != nir_type_invalid)
 162          return isSignedType(info.output_type);
 163       ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
 164       assert(false);
 165       return true;
 166    }
 167 }
 168
 169 DataType
 170 Converter::getDType(nir_alu_instr *insn)
 171 {
 172    if (insn->dest.dest.is_ssa)
 173       return getDType(insn->op, insn->dest.dest.ssa.bit_size);
 174    else
 175       return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
 176 }
 177
 178 DataType
 179 Converter::getDType(nir_intrinsic_instr *insn)
 180 {
 181    if (insn->dest.is_ssa)
 182       return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
 183    else
 184       return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
 185 }
 186
 187 DataType
 188 Converter::getDType(nir_op op, uint8_t bitSize)
 189 {
 190    DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
 191    if (ty == TYPE_NONE) {
 192       ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
 193       assert(false);
 194    }
 195    return ty;
 196 }
 197
 198 std::vector<DataType>
 199 Converter::getSTypes(nir_alu_instr *insn)
 200 {
 201    const nir_op_info &info = nir_op_infos[insn->op];
 202    std::vector<DataType> res(info.num_inputs);
 203
 204    for (uint8_t i = 0; i < info.num_inputs; ++i) {
 205       if (info.input_types[i] != nir_type_invalid) {
 206          res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
 207       } else {
 208          ERROR("getSType not implemented for %s idx %u\n", info.name, i);
 209          assert(false);
 210          res[i] = TYPE_NONE;
 211          break;
 212       }
 213    }
 214
 215    return res;
 216 }
 217
 218 DataType
 219 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
 220 {
 221    uint8_t bitSize;
 222    if (src.is_ssa)
 223       bitSize = src.ssa->bit_size;
 224    else
 225       bitSize = src.reg.reg->bit_size;
 226
 227    DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
 228    if (ty == TYPE_NONE) {
 229       const char *str;
 230       if (isFloat)
 231          str = "float";
 232       else if (isSigned)
 233          str = "int";
 234       else
 235          str = "uint";
 236       ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
 237       assert(false);
 238    }
 239    return ty;
 240 }
 241
 242 Converter::LValues&
 243 Converter::convert(nir_dest *dest)
 244 {
 245    if (dest->is_ssa)
 246       return convert(&dest->ssa);
 247    if (dest->reg.indirect) {
 248       ERROR("no support for indirects.");
 249       assert(false);
 250    }
 251    return convert(dest->reg.reg);
 252 }
 253
 254 Converter::LValues&
 255 Converter::convert(nir_register *reg)
 256 {
 257    NirDefMap::iterator it = regDefs.find(reg->index);
 258    if (it != regDefs.end())
 259       return it->second;
 260
 261    LValues newDef(reg->num_components);
 262    for (uint8_t i = 0; i < reg->num_components; i++)
 263       newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
 264    return regDefs[reg->index] = newDef;
 265 }
 266
 267 Converter::LValues&
 268 Converter::convert(nir_ssa_def *def)
 269 {
 270    NirDefMap::iterator it = ssaDefs.find(def->index);
 271    if (it != ssaDefs.end())
 272       return it->second;
 273
 274    LValues newDef(def->num_components);
 275    for (uint8_t i = 0; i < def->num_components; i++)
 276       newDef[i] = getSSA(std::max(4, def->bit_size / 8));
 277    return ssaDefs[def->index] = newDef;
 278 }
 279
 280 Value*
 281 Converter::getSrc(nir_alu_src *src, uint8_t component)
 282 {
 283    if (src->abs || src->negate) {
 284       ERROR("modifiers currently not supported on nir_alu_src\n");
 285       assert(false);
 286    }
 287    return getSrc(&src->src, src->swizzle[component]);
 288 }
 289
 290 Value*
 291 Converter::getSrc(nir_register *reg, uint8_t idx)
 292 {
 293    NirDefMap::iterator it = regDefs.find(reg->index);
 294    if (it == regDefs.end())
 295       return convert(reg)[idx];
 296    return it->second[idx];
 297 }
 298
 299 Value*
 300 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
 301 {
 302    if (src->is_ssa)
 303       return getSrc(src->ssa, idx);
 304
 305    if (src->reg.indirect) {
 306       if (indirect)
 307          return getSrc(src->reg.indirect, idx);
 308       ERROR("no support for indirects.");
 309       assert(false);
 310       return NULL;
 311    }
 312
 313    return getSrc(src->reg.reg, idx);
 314 }
 315
 316 Value*
 317 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
 318 {
 319    NirDefMap::iterator it = ssaDefs.find(src->index);
 320    if (it == ssaDefs.end()) {
 321       ERROR("SSA value %u not found\n", src->index);
 322       assert(false);
 323       return NULL;
 324    }
 325    return it->second[idx];
 326 }
 327
 328 uint32_t
 329 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
 330 {
 331    nir_const_value *offset = nir_src_as_const_value(*src);
 332
 333    if (offset) {
 334       indirect = NULL;
 335       return offset->u32[0];
 336    }
 337
 338    indirect = getSrc(src, idx, true);
 339    return 0;
 340 }
 341
 342 uint32_t
 343 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect)
 344 {
 345    int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
 346    if (indirect)
 347       indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
 348    return idx;
 349 }
 350
 351 static void
 352 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
 353 {
 354    assert(name && index);
 355
 356    if (slot >= VERT_ATTRIB_MAX) {
 357       ERROR("invalid varying slot %u\n", slot);
 358       assert(false);
 359       return;
 360    }
 361
 362    if (slot >= VERT_ATTRIB_GENERIC0 &&
 363        slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
 364       *name = TGSI_SEMANTIC_GENERIC;
 365       *index = slot - VERT_ATTRIB_GENERIC0;
 366       return;
 367    }
 368
 369    if (slot >= VERT_ATTRIB_TEX0 &&
 370        slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
 371       *name = TGSI_SEMANTIC_TEXCOORD;
 372       *index = slot - VERT_ATTRIB_TEX0;
 373       return;
 374    }
 375
 376    switch (slot) {
 377    case VERT_ATTRIB_COLOR0:
 378       *name = TGSI_SEMANTIC_COLOR;
 379       *index = 0;
 380       break;
 381    case VERT_ATTRIB_COLOR1:
 382       *name = TGSI_SEMANTIC_COLOR;
 383       *index = 1;
 384       break;
 385    case VERT_ATTRIB_EDGEFLAG:
 386       *name = TGSI_SEMANTIC_EDGEFLAG;
 387       *index = 0;
 388       break;
 389    case VERT_ATTRIB_FOG:
 390       *name = TGSI_SEMANTIC_FOG;
 391       *index = 0;
 392       break;
 393    case VERT_ATTRIB_NORMAL:
 394       *name = TGSI_SEMANTIC_NORMAL;
 395       *index = 0;
 396       break;
 397    case VERT_ATTRIB_POS:
 398       *name = TGSI_SEMANTIC_POSITION;
 399       *index = 0;
 400       break;
 401    case VERT_ATTRIB_POINT_SIZE:
 402       *name = TGSI_SEMANTIC_PSIZE;
 403       *index = 0;
 404       break;
 405    default:
 406       ERROR("unknown vert attrib slot %u\n", slot);
 407       assert(false);
 408       break;
 409    }
 410 }
 411
 412 static void
 413 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
 414 {
 415    assert(name && index);
 416
 417    if (slot >= VARYING_SLOT_TESS_MAX) {
 418       ERROR("invalid varying slot %u\n", slot);
 419       assert(false);
 420       return;
 421    }
 422
 423    if (slot >= VARYING_SLOT_PATCH0) {
 424       *name = TGSI_SEMANTIC_PATCH;
 425       *index = slot - VARYING_SLOT_PATCH0;
 426       return;
 427    }
 428
 429    if (slot >= VARYING_SLOT_VAR0) {
 430       *name = TGSI_SEMANTIC_GENERIC;
 431       *index = slot - VARYING_SLOT_VAR0;
 432       return;
 433    }
 434
 435    if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
 436       *name = TGSI_SEMANTIC_TEXCOORD;
 437       *index = slot - VARYING_SLOT_TEX0;
 438       return;
 439    }
 440
 441    switch (slot) {
 442    case VARYING_SLOT_BFC0:
 443       *name = TGSI_SEMANTIC_BCOLOR;
 444       *index = 0;
 445       break;
 446    case VARYING_SLOT_BFC1:
 447       *name = TGSI_SEMANTIC_BCOLOR;
 448       *index = 1;
 449       break;
 450    case VARYING_SLOT_CLIP_DIST0:
 451       *name = TGSI_SEMANTIC_CLIPDIST;
 452       *index = 0;
 453       break;
 454    case VARYING_SLOT_CLIP_DIST1:
 455       *name = TGSI_SEMANTIC_CLIPDIST;
 456       *index = 1;
 457       break;
 458    case VARYING_SLOT_CLIP_VERTEX:
 459       *name = TGSI_SEMANTIC_CLIPVERTEX;
 460       *index = 0;
 461       break;
 462    case VARYING_SLOT_COL0:
 463       *name = TGSI_SEMANTIC_COLOR;
 464       *index = 0;
 465       break;
 466    case VARYING_SLOT_COL1:
 467       *name = TGSI_SEMANTIC_COLOR;
 468       *index = 1;
 469       break;
 470    case VARYING_SLOT_EDGE:
 471       *name = TGSI_SEMANTIC_EDGEFLAG;
 472       *index = 0;
 473       break;
 474    case VARYING_SLOT_FACE:
 475       *name = TGSI_SEMANTIC_FACE;
 476       *index = 0;
 477       break;
 478    case VARYING_SLOT_FOGC:
 479       *name = TGSI_SEMANTIC_FOG;
 480       *index = 0;
 481       break;
 482    case VARYING_SLOT_LAYER:
 483       *name = TGSI_SEMANTIC_LAYER;
 484       *index = 0;
 485       break;
 486    case VARYING_SLOT_PNTC:
 487       *name = TGSI_SEMANTIC_PCOORD;
 488       *index = 0;
 489       break;
 490    case VARYING_SLOT_POS:
 491       *name = TGSI_SEMANTIC_POSITION;
 492       *index = 0;
 493       break;
 494    case VARYING_SLOT_PRIMITIVE_ID:
 495       *name = TGSI_SEMANTIC_PRIMID;
 496       *index = 0;
 497       break;
 498    case VARYING_SLOT_PSIZ:
 499       *name = TGSI_SEMANTIC_PSIZE;
 500       *index = 0;
 501       break;
 502    case VARYING_SLOT_TESS_LEVEL_INNER:
 503       *name = TGSI_SEMANTIC_TESSINNER;
 504       *index = 0;
 505       break;
 506    case VARYING_SLOT_TESS_LEVEL_OUTER:
 507       *name = TGSI_SEMANTIC_TESSOUTER;
 508       *index = 0;
 509       break;
 510    case VARYING_SLOT_VIEWPORT:
 511       *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
 512       *index = 0;
 513       break;
 514    default:
 515       ERROR("unknown varying slot %u\n", slot);
 516       assert(false);
 517       break;
 518    }
 519 }
 520
 521 static void
 522 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
 523 {
 524    if (slot >= FRAG_RESULT_DATA0) {
 525       *name = TGSI_SEMANTIC_COLOR;
 526       *index = slot - FRAG_RESULT_COLOR - 2; // intentional
 527       return;
 528    }
 529
 530    switch (slot) {
 531    case FRAG_RESULT_COLOR:
 532       *name = TGSI_SEMANTIC_COLOR;
 533       *index = 0;
 534       break;
 535    case FRAG_RESULT_DEPTH:
 536       *name = TGSI_SEMANTIC_POSITION;
 537       *index = 0;
 538       break;
 539    case FRAG_RESULT_SAMPLE_MASK:
 540       *name = TGSI_SEMANTIC_SAMPLEMASK;
 541       *index = 0;
 542       break;
 543    default:
 544       ERROR("unknown frag result slot %u\n", slot);
 545       assert(false);
 546       break;
 547    }
 548 }
 549
 550 // copy of _mesa_sysval_to_semantic
 551 static void
 552 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
 553 {
 554    *index = 0;
 555    switch (val) {
 556    // Vertex shader
 557    case SYSTEM_VALUE_VERTEX_ID:
 558       *name = TGSI_SEMANTIC_VERTEXID;
 559       break;
 560    case SYSTEM_VALUE_INSTANCE_ID:
 561       *name = TGSI_SEMANTIC_INSTANCEID;
 562       break;
 563    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
 564       *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
 565       break;
 566    case SYSTEM_VALUE_BASE_VERTEX:
 567       *name = TGSI_SEMANTIC_BASEVERTEX;
 568       break;
 569    case SYSTEM_VALUE_BASE_INSTANCE:
 570       *name = TGSI_SEMANTIC_BASEINSTANCE;
 571       break;
 572    case SYSTEM_VALUE_DRAW_ID:
 573       *name = TGSI_SEMANTIC_DRAWID;
 574       break;
 575
 576    // Geometry shader
 577    case SYSTEM_VALUE_INVOCATION_ID:
 578       *name = TGSI_SEMANTIC_INVOCATIONID;
 579       break;
 580
 581    // Fragment shader
 582    case SYSTEM_VALUE_FRAG_COORD:
 583       *name = TGSI_SEMANTIC_POSITION;
 584       break;
 585    case SYSTEM_VALUE_FRONT_FACE:
 586       *name = TGSI_SEMANTIC_FACE;
 587       break;
 588    case SYSTEM_VALUE_SAMPLE_ID:
 589       *name = TGSI_SEMANTIC_SAMPLEID;
 590       break;
 591    case SYSTEM_VALUE_SAMPLE_POS:
 592       *name = TGSI_SEMANTIC_SAMPLEPOS;
 593       break;
 594    case SYSTEM_VALUE_SAMPLE_MASK_IN:
 595       *name = TGSI_SEMANTIC_SAMPLEMASK;
 596       break;
 597    case SYSTEM_VALUE_HELPER_INVOCATION:
 598       *name = TGSI_SEMANTIC_HELPER_INVOCATION;
 599       break;
 600
 601    // Tessellation shader
 602    case SYSTEM_VALUE_TESS_COORD:
 603       *name = TGSI_SEMANTIC_TESSCOORD;
 604       break;
 605    case SYSTEM_VALUE_VERTICES_IN:
 606       *name = TGSI_SEMANTIC_VERTICESIN;
 607       break;
 608    case SYSTEM_VALUE_PRIMITIVE_ID:
 609       *name = TGSI_SEMANTIC_PRIMID;
 610       break;
 611    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
 612       *name = TGSI_SEMANTIC_TESSOUTER;
 613       break;
 614    case SYSTEM_VALUE_TESS_LEVEL_INNER:
 615       *name = TGSI_SEMANTIC_TESSINNER;
 616       break;
 617
 618    // Compute shader
 619    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
 620       *name = TGSI_SEMANTIC_THREAD_ID;
 621       break;
 622    case SYSTEM_VALUE_WORK_GROUP_ID:
 623       *name = TGSI_SEMANTIC_BLOCK_ID;
 624       break;
 625    case SYSTEM_VALUE_NUM_WORK_GROUPS:
 626       *name = TGSI_SEMANTIC_GRID_SIZE;
 627       break;
 628    case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
 629       *name = TGSI_SEMANTIC_BLOCK_SIZE;
 630       break;
 631
 632    // ARB_shader_ballot
 633    case SYSTEM_VALUE_SUBGROUP_SIZE:
 634       *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
 635       break;
 636    case SYSTEM_VALUE_SUBGROUP_INVOCATION:
 637       *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
 638       break;
 639    case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
 640       *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
 641       break;
 642    case SYSTEM_VALUE_SUBGROUP_GE_MASK:
 643       *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
 644       break;
 645    case SYSTEM_VALUE_SUBGROUP_GT_MASK:
 646       *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
 647       break;
 648    case SYSTEM_VALUE_SUBGROUP_LE_MASK:
 649       *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
 650       break;
 651    case SYSTEM_VALUE_SUBGROUP_LT_MASK:
 652       *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
 653       break;
 654
 655    default:
 656       ERROR("unknown system value %u\n", val);
 657       assert(false);
 658       break;
 659    }
 660 }
 661
 662 void
 663 Converter::setInterpolate(nv50_ir_varying *var,
 664                           uint8_t mode,
 665                           bool centroid,
 666                           unsigned semantic)
 667 {
 668    switch (mode) {
 669    case INTERP_MODE_FLAT:
 670       var->flat = 1;
 671       break;
 672    case INTERP_MODE_NONE:
 673       if (semantic == TGSI_SEMANTIC_COLOR)
 674          var->sc = 1;
 675       else if (semantic == TGSI_SEMANTIC_POSITION)
 676          var->linear = 1;
 677       break;
 678    case INTERP_MODE_NOPERSPECTIVE:
 679       var->linear = 1;
 680       break;
 681    case INTERP_MODE_SMOOTH:
 682       break;
 683    }
 684    var->centroid = centroid;
 685 }
 686
 687 static uint16_t
 688 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
 689           bool input, const nir_variable *var)
 690 {
 691    if (!type->is_array())
 692       return type->count_attribute_slots(false);
 693
 694    uint16_t slots;
 695    switch (stage) {
 696    case Program::TYPE_GEOMETRY:
 697       slots = type->uniform_locations();
 698       if (input)
 699          slots /= info.gs.vertices_in;
 700       break;
 701    case Program::TYPE_TESSELLATION_CONTROL:
 702    case Program::TYPE_TESSELLATION_EVAL:
 703       // remove first dimension
 704       if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
 705          slots = type->uniform_locations();
 706       else
 707          slots = type->fields.array->uniform_locations();
 708       break;
 709    default:
 710       slots = type->count_attribute_slots(false);
 711       break;
 712    }
 713
 714    return slots;
 715 }
 716
 717 bool Converter::assignSlots() {
 718    unsigned name;
 719    unsigned index;
 720
 721    info->io.viewportId = -1;
 722    info->numInputs = 0;
 723
 724    // we have to fixup the uniform locations for arrays
 725    unsigned numImages = 0;
 726    nir_foreach_variable(var, &nir->uniforms) {
 727       const glsl_type *type = var->type;
 728       if (!type->without_array()->is_image())
 729          continue;
 730       var->data.driver_location = numImages;
 731       numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
 732    }
 733
 734    nir_foreach_variable(var, &nir->inputs) {
 735       const glsl_type *type = var->type;
 736       int slot = var->data.location;
 737       uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
 738       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
 739                                        : type->component_slots();
 740       uint32_t frac = var->data.location_frac;
 741       uint32_t vary = var->data.driver_location;
 742
 743       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
 744          if (comp > 2)
 745             slots *= 2;
 746       }
 747
 748       assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
 749
 750       switch(prog->getType()) {
 751       case Program::TYPE_FRAGMENT:
 752          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
 753          for (uint16_t i = 0; i < slots; ++i) {
 754             setInterpolate(&info->in[vary + i], var->data.interpolation,
 755                            var->data.centroid | var->data.sample, name);
 756          }
 757          break;
 758       case Program::TYPE_GEOMETRY:
 759          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
 760          break;
 761       case Program::TYPE_TESSELLATION_CONTROL:
 762       case Program::TYPE_TESSELLATION_EVAL:
 763          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
 764          if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
 765             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
 766          break;
 767       case Program::TYPE_VERTEX:
 768          vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
 769          switch (name) {
 770          case TGSI_SEMANTIC_EDGEFLAG:
 771             info->io.edgeFlagIn = vary;
 772             break;
 773          default:
 774             break;
 775          }
 776          break;
 777       default:
 778          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
 779          return false;
 780       }
 781
 782       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
 783          info->in[vary].id = vary;
 784          info->in[vary].patch = var->data.patch;
 785          info->in[vary].sn = name;
 786          info->in[vary].si = index + i;
 787          if (glsl_base_type_is_64bit(type->without_array()->base_type))
 788             if (i & 0x1)
 789                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
 790             else
 791                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
 792          else
 793             info->in[vary].mask |= ((1 << comp) - 1) << frac;
 794       }
 795       info->numInputs = std::max<uint8_t>(info->numInputs, vary);
 796    }
 797
 798    info->numOutputs = 0;
 799    nir_foreach_variable(var, &nir->outputs) {
 800       const glsl_type *type = var->type;
 801       int slot = var->data.location;
 802       uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
 803       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
 804                                        : type->component_slots();
 805       uint32_t frac = var->data.location_frac;
 806       uint32_t vary = var->data.driver_location;
 807
 808       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
 809          if (comp > 2)
 810             slots *= 2;
 811       }
 812
 813       assert(vary < PIPE_MAX_SHADER_OUTPUTS);
 814
 815       switch(prog->getType()) {
 816       case Program::TYPE_FRAGMENT:
 817          frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
 818          switch (name) {
 819          case TGSI_SEMANTIC_COLOR:
 820             if (!var->data.fb_fetch_output)
 821                info->prop.fp.numColourResults++;
 822             info->prop.fp.separateFragData = true;
 823             // sometimes we get FRAG_RESULT_DATAX with data.index 0
 824             // sometimes we get FRAG_RESULT_DATA0 with data.index X
 825             index = index == 0 ? var->data.index : index;
 826             break;
 827          case TGSI_SEMANTIC_POSITION:
 828             info->io.fragDepth = vary;
 829             info->prop.fp.writesDepth = true;
 830             break;
 831          case TGSI_SEMANTIC_SAMPLEMASK:
 832             info->io.sampleMask = vary;
 833             break;
 834          default:
 835             break;
 836          }
 837          break;
 838       case Program::TYPE_GEOMETRY:
 839       case Program::TYPE_TESSELLATION_CONTROL:
 840       case Program::TYPE_TESSELLATION_EVAL:
 841       case Program::TYPE_VERTEX:
 842          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
 843
 844          if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
 845              name != TGSI_SEMANTIC_TESSOUTER)
 846             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
 847
 848          switch (name) {
 849          case TGSI_SEMANTIC_CLIPDIST:
 850             info->io.genUserClip = -1;
 851             break;
 852          case TGSI_SEMANTIC_EDGEFLAG:
 853             info->io.edgeFlagOut = vary;
 854             break;
 855          default:
 856             break;
 857          }
 858          break;
 859       default:
 860          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
 861          return false;
 862       }
 863
 864       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
 865          info->out[vary].id = vary;
 866          info->out[vary].patch = var->data.patch;
 867          info->out[vary].sn = name;
 868          info->out[vary].si = index + i;
 869          if (glsl_base_type_is_64bit(type->without_array()->base_type))
 870             if (i & 0x1)
 871                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
 872             else
 873                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
 874          else
 875             info->out[vary].mask |= ((1 << comp) - 1) << frac;
 876
 877          if (nir->info.outputs_read & 1ll << slot)
 878             info->out[vary].oread = 1;
 879       }
 880       info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
 881    }
 882
 883    info->numSysVals = 0;
 884    for (uint8_t i = 0; i < 64; ++i) {
 885       if (!(nir->info.system_values_read & 1ll << i))
 886          continue;
 887
 888       system_val_to_tgsi_semantic(i, &name, &index);
 889       info->sv[info->numSysVals].sn = name;
 890       info->sv[info->numSysVals].si = index;
 891       info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
 892
 893       switch (i) {
 894       case SYSTEM_VALUE_INSTANCE_ID:
 895          info->io.instanceId = info->numSysVals;
 896          break;
 897       case SYSTEM_VALUE_TESS_LEVEL_INNER:
 898       case SYSTEM_VALUE_TESS_LEVEL_OUTER:
 899          info->sv[info->numSysVals].patch = 1;
 900          break;
 901       case SYSTEM_VALUE_VERTEX_ID:
 902          info->io.vertexId = info->numSysVals;
 903          break;
 904       default:
 905          break;
 906       }
 907
 908       info->numSysVals += 1;
 909    }
 910
 911    if (info->io.genUserClip > 0) {
 912       info->io.clipDistances = info->io.genUserClip;
 913
 914       const unsigned int nOut = (info->io.genUserClip + 3) / 4;
 915
 916       for (unsigned int n = 0; n < nOut; ++n) {
 917          unsigned int i = info->numOutputs++;
 918          info->out[i].id = i;
 919          info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
 920          info->out[i].si = n;
 921          info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
 922       }
 923    }
 924
 925    return info->assignSlots(info) == 0;
 926 }
 927
 928 uint32_t
 929 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
 930 {
 931    DataType ty;
 932    int offset = nir_intrinsic_component(insn);
 933    bool input;
 934
 935    if (nir_intrinsic_infos[insn->intrinsic].has_dest)
 936       ty = getDType(insn);
 937    else
 938       ty = getSType(insn->src[0], false, false);
 939
 940    switch (insn->intrinsic) {
 941    case nir_intrinsic_load_input:
 942    case nir_intrinsic_load_interpolated_input:
 943    case nir_intrinsic_load_per_vertex_input:
 944       input = true;
 945       break;
 946    case nir_intrinsic_load_output:
 947    case nir_intrinsic_load_per_vertex_output:
 948    case nir_intrinsic_store_output:
 949    case nir_intrinsic_store_per_vertex_output:
 950       input = false;
 951       break;
 952    default:
 953       ERROR("unknown intrinsic in getSlotAddress %s",
 954             nir_intrinsic_infos[insn->intrinsic].name);
 955       input = false;
 956       assert(false);
 957       break;
 958    }
 959
 960    if (typeSizeof(ty) == 8) {
 961       slot *= 2;
 962       slot += offset;
 963       if (slot >= 4) {
 964          idx += 1;
 965          slot -= 4;
 966       }
 967    } else {
 968       slot += offset;
 969    }
 970
 971    assert(slot < 4);
 972    assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
 973    assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
 974
 975    const nv50_ir_varying *vary = input ? info->in : info->out;
 976    return vary[idx].slot[slot] * 4;
 977 }
 978
 979 Instruction *
 980 Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
 981                     uint32_t base, uint8_t c, Value *indirect0,
 982                     Value *indirect1, bool patch)
 983 {
 984    unsigned int tySize = typeSizeof(ty);
 985
 986    if (tySize == 8 &&
 987        (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
 988       Value *lo = getSSA();
 989       Value *hi = getSSA();
 990
 991       Instruction *loi =
 992          mkLoad(TYPE_U32, lo,
 993                 mkSymbol(file, i, TYPE_U32, base + c * tySize),
 994                 indirect0);
 995       loi->setIndirect(0, 1, indirect1);
 996       loi->perPatch = patch;
 997
 998       Instruction *hii =
 999          mkLoad(TYPE_U32, hi,
1000                 mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
1001                 indirect0);
1002       hii->setIndirect(0, 1, indirect1);
1003       hii->perPatch = patch;
1004
1005       return mkOp2(OP_MERGE, ty, def, lo, hi);
1006    } else {
1007       Instruction *ld =
1008          mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
1009       ld->setIndirect(0, 1, indirect1);
1010       ld->perPatch = patch;
1011       return ld;
1012    }
1013 }
1014
1015 void
1016 Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
1017                    DataType ty, Value *src, uint8_t idx, uint8_t c,
1018                    Value *indirect0, Value *indirect1)
1019 {
1020    uint8_t size = typeSizeof(ty);
1021    uint32_t address = getSlotAddress(insn, idx, c);
1022
1023    if (size == 8 && indirect0) {
1024       Value *split[2];
1025       mkSplit(split, 4, src);
1026
1027       if (op == OP_EXPORT) {
1028          split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
1029          split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
1030       }
1031
1032       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
1033               split[0])->perPatch = info->out[idx].patch;
1034       mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,
1035               split[1])->perPatch = info->out[idx].patch;
1036    } else {
1037       if (op == OP_EXPORT)
1038          src = mkMov(getSSA(size), src, ty)->getDef(0);
1039       mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
1040               src)->perPatch = info->out[idx].patch;
1041    }
1042 }
1043
1044 bool
1045 Converter::run()
1046 {
1047    bool progress;
1048
1049    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1050       nir_print_shader(nir, stderr);
1051
1052    NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
1053    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
1054    NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
1055    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
1056    NIR_PASS_V(nir, nir_lower_alu_to_scalar);
1057    NIR_PASS_V(nir, nir_lower_phis_to_scalar);
1058
1059    do {
1060       progress = false;
1061       NIR_PASS(progress, nir, nir_copy_prop);
1062       NIR_PASS(progress, nir, nir_opt_remove_phis);
1063       NIR_PASS(progress, nir, nir_opt_trivial_continues);
1064       NIR_PASS(progress, nir, nir_opt_cse);
1065       NIR_PASS(progress, nir, nir_opt_algebraic);
1066       NIR_PASS(progress, nir, nir_opt_constant_folding);
1067       NIR_PASS(progress, nir, nir_copy_prop);
1068       NIR_PASS(progress, nir, nir_opt_dce);
1069       NIR_PASS(progress, nir, nir_opt_dead_cf);
1070    } while (progress);
1071
1072    NIR_PASS_V(nir, nir_lower_bool_to_int32);
1073    NIR_PASS_V(nir, nir_lower_locals_to_regs);
1074    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
1075    NIR_PASS_V(nir, nir_convert_from_ssa, true);
1076
1077    // Garbage collect dead instructions
1078    nir_sweep(nir);
1079
1080    if (!assignSlots()) {
1081       ERROR("Couldn't assign slots!\n");
1082       return false;
1083    }
1084
1085    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1086       nir_print_shader(nir, stderr);
1087
1088    return false;
1089 }
1090
1091 } // unnamed namespace
1092
1093 namespace nv50_ir {
1094
1095 bool
1096 Program::makeFromNIR(struct nv50_ir_prog_info *info)
1097 {
1098    nir_shader *nir = (nir_shader*)info->bin.source;
1099    Converter converter(this, nir, info);
1100    bool result = converter.run();
1101    if (!result)
1102       return result;
1103    LoweringHelper lowering;
1104    lowering.run(this);
1105    tlsSize = info->bin.tlsSpace;
1106    return result;
1107 }
1108
1109 } // namespace nv50_ir