src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

   1 /*
   2  * Copyright 2017 Red Hat Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * Authors: Karol Herbst <kherbst@redhat.com>
  23  */
  24
  25 #include "compiler/nir/nir.h"
  26
  27 #include "util/u_debug.h"
  28
  29 #include "codegen/nv50_ir.h"
  30 #include "codegen/nv50_ir_from_common.h"
  31 #include "codegen/nv50_ir_lowering_helper.h"
  32 #include "codegen/nv50_ir_util.h"
  33
  34 #if __cplusplus >= 201103L
  35 #include <unordered_map>
  36 #else
  37 #include <tr1/unordered_map>
  38 #endif
  39 #include <vector>
  40
  41 namespace {
  42
  43 #if __cplusplus >= 201103L
  44 using std::hash;
  45 using std::unordered_map;
  46 #else
  47 using std::tr1::hash;
  48 using std::tr1::unordered_map;
  49 #endif
  50
  51 using namespace nv50_ir;
  52
  53 int
  54 type_size(const struct glsl_type *type)
  55 {
  56    return glsl_count_attribute_slots(type, false);
  57 }
  58
  59 class Converter : public ConverterCommon
  60 {
  61 public:
  62    Converter(Program *, nir_shader *, nv50_ir_prog_info *);
  63
  64    bool run();
  65 private:
  66    typedef std::vector<LValue *> LValues;
  67    typedef unordered_map<unsigned, LValues> NirDefMap;
  68
  69    LValues& convert(nir_alu_dest *);
  70    LValues& convert(nir_dest *);
  71    LValues& convert(nir_register *);
  72    LValues& convert(nir_ssa_def *);
  73
  74    Value* getSrc(nir_alu_src *, uint8_t component = 0);
  75    Value* getSrc(nir_register *, uint8_t);
  76    Value* getSrc(nir_src *, uint8_t, bool indirect = false);
  77    Value* getSrc(nir_ssa_def *, uint8_t);
  78
  79    // returned value is the constant part of the given source (either the
  80    // nir_src or the selected source component of an intrinsic). Even though
  81    // this is mostly an optimization to be able to skip indirects in a few
  82    // cases, sometimes we require immediate values or set some fileds on
  83    // instructions (e.g. tex) in order for codegen to consume those.
  84    // If the found value has not a constant part, the Value gets returned
  85    // through the Value parameter.
  86    uint32_t getIndirect(nir_src *, uint8_t, Value *&);
  87    uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&);
  88
  89    uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
  90
  91    void setInterpolate(nv50_ir_varying *,
  92                        uint8_t,
  93                        bool centroid,
  94                        unsigned semantics);
  95
  96    bool isFloatType(nir_alu_type);
  97    bool isSignedType(nir_alu_type);
  98    bool isResultFloat(nir_op);
  99    bool isResultSigned(nir_op);
 100
 101    DataType getDType(nir_alu_instr *);
 102    DataType getDType(nir_intrinsic_instr *);
 103    DataType getDType(nir_op, uint8_t);
 104
 105    std::vector<DataType> getSTypes(nir_alu_instr *);
 106    DataType getSType(nir_src &, bool isFloat, bool isSigned);
 107
 108    bool assignSlots();
 109
 110    nir_shader *nir;
 111
 112    NirDefMap ssaDefs;
 113    NirDefMap regDefs;
 114 };
 115
 116 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
 117    : ConverterCommon(prog, info),
 118      nir(nir) {}
 119
 120 bool
 121 Converter::isFloatType(nir_alu_type type)
 122 {
 123    return nir_alu_type_get_base_type(type) == nir_type_float;
 124 }
 125
 126 bool
 127 Converter::isSignedType(nir_alu_type type)
 128 {
 129    return nir_alu_type_get_base_type(type) == nir_type_int;
 130 }
 131
 132 bool
 133 Converter::isResultFloat(nir_op op)
 134 {
 135    const nir_op_info &info = nir_op_infos[op];
 136    if (info.output_type != nir_type_invalid)
 137       return isFloatType(info.output_type);
 138
 139    ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
 140    assert(false);
 141    return true;
 142 }
 143
 144 bool
 145 Converter::isResultSigned(nir_op op)
 146 {
 147    switch (op) {
 148    // there is no umul and we get wrong results if we treat all muls as signed
 149    case nir_op_imul:
 150    case nir_op_inot:
 151       return false;
 152    default:
 153       const nir_op_info &info = nir_op_infos[op];
 154       if (info.output_type != nir_type_invalid)
 155          return isSignedType(info.output_type);
 156       ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
 157       assert(false);
 158       return true;
 159    }
 160 }
 161
 162 DataType
 163 Converter::getDType(nir_alu_instr *insn)
 164 {
 165    if (insn->dest.dest.is_ssa)
 166       return getDType(insn->op, insn->dest.dest.ssa.bit_size);
 167    else
 168       return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
 169 }
 170
 171 DataType
 172 Converter::getDType(nir_intrinsic_instr *insn)
 173 {
 174    if (insn->dest.is_ssa)
 175       return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
 176    else
 177       return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
 178 }
 179
 180 DataType
 181 Converter::getDType(nir_op op, uint8_t bitSize)
 182 {
 183    DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));
 184    if (ty == TYPE_NONE) {
 185       ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);
 186       assert(false);
 187    }
 188    return ty;
 189 }
 190
 191 std::vector<DataType>
 192 Converter::getSTypes(nir_alu_instr *insn)
 193 {
 194    const nir_op_info &info = nir_op_infos[insn->op];
 195    std::vector<DataType> res(info.num_inputs);
 196
 197    for (uint8_t i = 0; i < info.num_inputs; ++i) {
 198       if (info.input_types[i] != nir_type_invalid) {
 199          res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));
 200       } else {
 201          ERROR("getSType not implemented for %s idx %u\n", info.name, i);
 202          assert(false);
 203          res[i] = TYPE_NONE;
 204          break;
 205       }
 206    }
 207
 208    return res;
 209 }
 210
 211 DataType
 212 Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
 213 {
 214    uint8_t bitSize;
 215    if (src.is_ssa)
 216       bitSize = src.ssa->bit_size;
 217    else
 218       bitSize = src.reg.reg->bit_size;
 219
 220    DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);
 221    if (ty == TYPE_NONE) {
 222       const char *str;
 223       if (isFloat)
 224          str = "float";
 225       else if (isSigned)
 226          str = "int";
 227       else
 228          str = "uint";
 229       ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);
 230       assert(false);
 231    }
 232    return ty;
 233 }
 234
 235 Converter::LValues&
 236 Converter::convert(nir_dest *dest)
 237 {
 238    if (dest->is_ssa)
 239       return convert(&dest->ssa);
 240    if (dest->reg.indirect) {
 241       ERROR("no support for indirects.");
 242       assert(false);
 243    }
 244    return convert(dest->reg.reg);
 245 }
 246
 247 Converter::LValues&
 248 Converter::convert(nir_register *reg)
 249 {
 250    NirDefMap::iterator it = regDefs.find(reg->index);
 251    if (it != regDefs.end())
 252       return it->second;
 253
 254    LValues newDef(reg->num_components);
 255    for (uint8_t i = 0; i < reg->num_components; i++)
 256       newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
 257    return regDefs[reg->index] = newDef;
 258 }
 259
 260 Converter::LValues&
 261 Converter::convert(nir_ssa_def *def)
 262 {
 263    NirDefMap::iterator it = ssaDefs.find(def->index);
 264    if (it != ssaDefs.end())
 265       return it->second;
 266
 267    LValues newDef(def->num_components);
 268    for (uint8_t i = 0; i < def->num_components; i++)
 269       newDef[i] = getSSA(std::max(4, def->bit_size / 8));
 270    return ssaDefs[def->index] = newDef;
 271 }
 272
 273 Value*
 274 Converter::getSrc(nir_alu_src *src, uint8_t component)
 275 {
 276    if (src->abs || src->negate) {
 277       ERROR("modifiers currently not supported on nir_alu_src\n");
 278       assert(false);
 279    }
 280    return getSrc(&src->src, src->swizzle[component]);
 281 }
 282
 283 Value*
 284 Converter::getSrc(nir_register *reg, uint8_t idx)
 285 {
 286    NirDefMap::iterator it = regDefs.find(reg->index);
 287    if (it == regDefs.end())
 288       return convert(reg)[idx];
 289    return it->second[idx];
 290 }
 291
 292 Value*
 293 Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
 294 {
 295    if (src->is_ssa)
 296       return getSrc(src->ssa, idx);
 297
 298    if (src->reg.indirect) {
 299       if (indirect)
 300          return getSrc(src->reg.indirect, idx);
 301       ERROR("no support for indirects.");
 302       assert(false);
 303       return NULL;
 304    }
 305
 306    return getSrc(src->reg.reg, idx);
 307 }
 308
 309 Value*
 310 Converter::getSrc(nir_ssa_def *src, uint8_t idx)
 311 {
 312    NirDefMap::iterator it = ssaDefs.find(src->index);
 313    if (it == ssaDefs.end()) {
 314       ERROR("SSA value %u not found\n", src->index);
 315       assert(false);
 316       return NULL;
 317    }
 318    return it->second[idx];
 319 }
 320
 321 uint32_t
 322 Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)
 323 {
 324    nir_const_value *offset = nir_src_as_const_value(*src);
 325
 326    if (offset) {
 327       indirect = NULL;
 328       return offset->u32[0];
 329    }
 330
 331    indirect = getSrc(src, idx, true);
 332    return 0;
 333 }
 334
 335 uint32_t
 336 Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect)
 337 {
 338    int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);
 339    if (indirect)
 340       indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));
 341    return idx;
 342 }
 343
 344 static void
 345 vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)
 346 {
 347    assert(name && index);
 348
 349    if (slot >= VERT_ATTRIB_MAX) {
 350       ERROR("invalid varying slot %u\n", slot);
 351       assert(false);
 352       return;
 353    }
 354
 355    if (slot >= VERT_ATTRIB_GENERIC0 &&
 356        slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {
 357       *name = TGSI_SEMANTIC_GENERIC;
 358       *index = slot - VERT_ATTRIB_GENERIC0;
 359       return;
 360    }
 361
 362    if (slot >= VERT_ATTRIB_TEX0 &&
 363        slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {
 364       *name = TGSI_SEMANTIC_TEXCOORD;
 365       *index = slot - VERT_ATTRIB_TEX0;
 366       return;
 367    }
 368
 369    switch (slot) {
 370    case VERT_ATTRIB_COLOR0:
 371       *name = TGSI_SEMANTIC_COLOR;
 372       *index = 0;
 373       break;
 374    case VERT_ATTRIB_COLOR1:
 375       *name = TGSI_SEMANTIC_COLOR;
 376       *index = 1;
 377       break;
 378    case VERT_ATTRIB_EDGEFLAG:
 379       *name = TGSI_SEMANTIC_EDGEFLAG;
 380       *index = 0;
 381       break;
 382    case VERT_ATTRIB_FOG:
 383       *name = TGSI_SEMANTIC_FOG;
 384       *index = 0;
 385       break;
 386    case VERT_ATTRIB_NORMAL:
 387       *name = TGSI_SEMANTIC_NORMAL;
 388       *index = 0;
 389       break;
 390    case VERT_ATTRIB_POS:
 391       *name = TGSI_SEMANTIC_POSITION;
 392       *index = 0;
 393       break;
 394    case VERT_ATTRIB_POINT_SIZE:
 395       *name = TGSI_SEMANTIC_PSIZE;
 396       *index = 0;
 397       break;
 398    default:
 399       ERROR("unknown vert attrib slot %u\n", slot);
 400       assert(false);
 401       break;
 402    }
 403 }
 404
 405 static void
 406 varying_slot_to_tgsi_semantic(gl_varying_slot slot, unsigned *name, unsigned *index)
 407 {
 408    assert(name && index);
 409
 410    if (slot >= VARYING_SLOT_TESS_MAX) {
 411       ERROR("invalid varying slot %u\n", slot);
 412       assert(false);
 413       return;
 414    }
 415
 416    if (slot >= VARYING_SLOT_PATCH0) {
 417       *name = TGSI_SEMANTIC_PATCH;
 418       *index = slot - VARYING_SLOT_PATCH0;
 419       return;
 420    }
 421
 422    if (slot >= VARYING_SLOT_VAR0) {
 423       *name = TGSI_SEMANTIC_GENERIC;
 424       *index = slot - VARYING_SLOT_VAR0;
 425       return;
 426    }
 427
 428    if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
 429       *name = TGSI_SEMANTIC_TEXCOORD;
 430       *index = slot - VARYING_SLOT_TEX0;
 431       return;
 432    }
 433
 434    switch (slot) {
 435    case VARYING_SLOT_BFC0:
 436       *name = TGSI_SEMANTIC_BCOLOR;
 437       *index = 0;
 438       break;
 439    case VARYING_SLOT_BFC1:
 440       *name = TGSI_SEMANTIC_BCOLOR;
 441       *index = 1;
 442       break;
 443    case VARYING_SLOT_CLIP_DIST0:
 444       *name = TGSI_SEMANTIC_CLIPDIST;
 445       *index = 0;
 446       break;
 447    case VARYING_SLOT_CLIP_DIST1:
 448       *name = TGSI_SEMANTIC_CLIPDIST;
 449       *index = 1;
 450       break;
 451    case VARYING_SLOT_CLIP_VERTEX:
 452       *name = TGSI_SEMANTIC_CLIPVERTEX;
 453       *index = 0;
 454       break;
 455    case VARYING_SLOT_COL0:
 456       *name = TGSI_SEMANTIC_COLOR;
 457       *index = 0;
 458       break;
 459    case VARYING_SLOT_COL1:
 460       *name = TGSI_SEMANTIC_COLOR;
 461       *index = 1;
 462       break;
 463    case VARYING_SLOT_EDGE:
 464       *name = TGSI_SEMANTIC_EDGEFLAG;
 465       *index = 0;
 466       break;
 467    case VARYING_SLOT_FACE:
 468       *name = TGSI_SEMANTIC_FACE;
 469       *index = 0;
 470       break;
 471    case VARYING_SLOT_FOGC:
 472       *name = TGSI_SEMANTIC_FOG;
 473       *index = 0;
 474       break;
 475    case VARYING_SLOT_LAYER:
 476       *name = TGSI_SEMANTIC_LAYER;
 477       *index = 0;
 478       break;
 479    case VARYING_SLOT_PNTC:
 480       *name = TGSI_SEMANTIC_PCOORD;
 481       *index = 0;
 482       break;
 483    case VARYING_SLOT_POS:
 484       *name = TGSI_SEMANTIC_POSITION;
 485       *index = 0;
 486       break;
 487    case VARYING_SLOT_PRIMITIVE_ID:
 488       *name = TGSI_SEMANTIC_PRIMID;
 489       *index = 0;
 490       break;
 491    case VARYING_SLOT_PSIZ:
 492       *name = TGSI_SEMANTIC_PSIZE;
 493       *index = 0;
 494       break;
 495    case VARYING_SLOT_TESS_LEVEL_INNER:
 496       *name = TGSI_SEMANTIC_TESSINNER;
 497       *index = 0;
 498       break;
 499    case VARYING_SLOT_TESS_LEVEL_OUTER:
 500       *name = TGSI_SEMANTIC_TESSOUTER;
 501       *index = 0;
 502       break;
 503    case VARYING_SLOT_VIEWPORT:
 504       *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
 505       *index = 0;
 506       break;
 507    default:
 508       ERROR("unknown varying slot %u\n", slot);
 509       assert(false);
 510       break;
 511    }
 512 }
 513
 514 static void
 515 frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
 516 {
 517    if (slot >= FRAG_RESULT_DATA0) {
 518       *name = TGSI_SEMANTIC_COLOR;
 519       *index = slot - FRAG_RESULT_COLOR - 2; // intentional
 520       return;
 521    }
 522
 523    switch (slot) {
 524    case FRAG_RESULT_COLOR:
 525       *name = TGSI_SEMANTIC_COLOR;
 526       *index = 0;
 527       break;
 528    case FRAG_RESULT_DEPTH:
 529       *name = TGSI_SEMANTIC_POSITION;
 530       *index = 0;
 531       break;
 532    case FRAG_RESULT_SAMPLE_MASK:
 533       *name = TGSI_SEMANTIC_SAMPLEMASK;
 534       *index = 0;
 535       break;
 536    default:
 537       ERROR("unknown frag result slot %u\n", slot);
 538       assert(false);
 539       break;
 540    }
 541 }
 542
 543 // copy of _mesa_sysval_to_semantic
 544 static void
 545 system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
 546 {
 547    *index = 0;
 548    switch (val) {
 549    // Vertex shader
 550    case SYSTEM_VALUE_VERTEX_ID:
 551       *name = TGSI_SEMANTIC_VERTEXID;
 552       break;
 553    case SYSTEM_VALUE_INSTANCE_ID:
 554       *name = TGSI_SEMANTIC_INSTANCEID;
 555       break;
 556    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
 557       *name = TGSI_SEMANTIC_VERTEXID_NOBASE;
 558       break;
 559    case SYSTEM_VALUE_BASE_VERTEX:
 560       *name = TGSI_SEMANTIC_BASEVERTEX;
 561       break;
 562    case SYSTEM_VALUE_BASE_INSTANCE:
 563       *name = TGSI_SEMANTIC_BASEINSTANCE;
 564       break;
 565    case SYSTEM_VALUE_DRAW_ID:
 566       *name = TGSI_SEMANTIC_DRAWID;
 567       break;
 568
 569    // Geometry shader
 570    case SYSTEM_VALUE_INVOCATION_ID:
 571       *name = TGSI_SEMANTIC_INVOCATIONID;
 572       break;
 573
 574    // Fragment shader
 575    case SYSTEM_VALUE_FRAG_COORD:
 576       *name = TGSI_SEMANTIC_POSITION;
 577       break;
 578    case SYSTEM_VALUE_FRONT_FACE:
 579       *name = TGSI_SEMANTIC_FACE;
 580       break;
 581    case SYSTEM_VALUE_SAMPLE_ID:
 582       *name = TGSI_SEMANTIC_SAMPLEID;
 583       break;
 584    case SYSTEM_VALUE_SAMPLE_POS:
 585       *name = TGSI_SEMANTIC_SAMPLEPOS;
 586       break;
 587    case SYSTEM_VALUE_SAMPLE_MASK_IN:
 588       *name = TGSI_SEMANTIC_SAMPLEMASK;
 589       break;
 590    case SYSTEM_VALUE_HELPER_INVOCATION:
 591       *name = TGSI_SEMANTIC_HELPER_INVOCATION;
 592       break;
 593
 594    // Tessellation shader
 595    case SYSTEM_VALUE_TESS_COORD:
 596       *name = TGSI_SEMANTIC_TESSCOORD;
 597       break;
 598    case SYSTEM_VALUE_VERTICES_IN:
 599       *name = TGSI_SEMANTIC_VERTICESIN;
 600       break;
 601    case SYSTEM_VALUE_PRIMITIVE_ID:
 602       *name = TGSI_SEMANTIC_PRIMID;
 603       break;
 604    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
 605       *name = TGSI_SEMANTIC_TESSOUTER;
 606       break;
 607    case SYSTEM_VALUE_TESS_LEVEL_INNER:
 608       *name = TGSI_SEMANTIC_TESSINNER;
 609       break;
 610
 611    // Compute shader
 612    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
 613       *name = TGSI_SEMANTIC_THREAD_ID;
 614       break;
 615    case SYSTEM_VALUE_WORK_GROUP_ID:
 616       *name = TGSI_SEMANTIC_BLOCK_ID;
 617       break;
 618    case SYSTEM_VALUE_NUM_WORK_GROUPS:
 619       *name = TGSI_SEMANTIC_GRID_SIZE;
 620       break;
 621    case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
 622       *name = TGSI_SEMANTIC_BLOCK_SIZE;
 623       break;
 624
 625    // ARB_shader_ballot
 626    case SYSTEM_VALUE_SUBGROUP_SIZE:
 627       *name = TGSI_SEMANTIC_SUBGROUP_SIZE;
 628       break;
 629    case SYSTEM_VALUE_SUBGROUP_INVOCATION:
 630       *name = TGSI_SEMANTIC_SUBGROUP_INVOCATION;
 631       break;
 632    case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
 633       *name = TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
 634       break;
 635    case SYSTEM_VALUE_SUBGROUP_GE_MASK:
 636       *name = TGSI_SEMANTIC_SUBGROUP_GE_MASK;
 637       break;
 638    case SYSTEM_VALUE_SUBGROUP_GT_MASK:
 639       *name = TGSI_SEMANTIC_SUBGROUP_GT_MASK;
 640       break;
 641    case SYSTEM_VALUE_SUBGROUP_LE_MASK:
 642       *name = TGSI_SEMANTIC_SUBGROUP_LE_MASK;
 643       break;
 644    case SYSTEM_VALUE_SUBGROUP_LT_MASK:
 645       *name = TGSI_SEMANTIC_SUBGROUP_LT_MASK;
 646       break;
 647
 648    default:
 649       ERROR("unknown system value %u\n", val);
 650       assert(false);
 651       break;
 652    }
 653 }
 654
 655 void
 656 Converter::setInterpolate(nv50_ir_varying *var,
 657                           uint8_t mode,
 658                           bool centroid,
 659                           unsigned semantic)
 660 {
 661    switch (mode) {
 662    case INTERP_MODE_FLAT:
 663       var->flat = 1;
 664       break;
 665    case INTERP_MODE_NONE:
 666       if (semantic == TGSI_SEMANTIC_COLOR)
 667          var->sc = 1;
 668       else if (semantic == TGSI_SEMANTIC_POSITION)
 669          var->linear = 1;
 670       break;
 671    case INTERP_MODE_NOPERSPECTIVE:
 672       var->linear = 1;
 673       break;
 674    case INTERP_MODE_SMOOTH:
 675       break;
 676    }
 677    var->centroid = centroid;
 678 }
 679
 680 static uint16_t
 681 calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
 682           bool input, const nir_variable *var)
 683 {
 684    if (!type->is_array())
 685       return type->count_attribute_slots(false);
 686
 687    uint16_t slots;
 688    switch (stage) {
 689    case Program::TYPE_GEOMETRY:
 690       slots = type->uniform_locations();
 691       if (input)
 692          slots /= info.gs.vertices_in;
 693       break;
 694    case Program::TYPE_TESSELLATION_CONTROL:
 695    case Program::TYPE_TESSELLATION_EVAL:
 696       // remove first dimension
 697       if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
 698          slots = type->uniform_locations();
 699       else
 700          slots = type->fields.array->uniform_locations();
 701       break;
 702    default:
 703       slots = type->count_attribute_slots(false);
 704       break;
 705    }
 706
 707    return slots;
 708 }
 709
 710 bool Converter::assignSlots() {
 711    unsigned name;
 712    unsigned index;
 713
 714    info->io.viewportId = -1;
 715    info->numInputs = 0;
 716
 717    // we have to fixup the uniform locations for arrays
 718    unsigned numImages = 0;
 719    nir_foreach_variable(var, &nir->uniforms) {
 720       const glsl_type *type = var->type;
 721       if (!type->without_array()->is_image())
 722          continue;
 723       var->data.driver_location = numImages;
 724       numImages += type->is_array() ? type->arrays_of_arrays_size() : 1;
 725    }
 726
 727    nir_foreach_variable(var, &nir->inputs) {
 728       const glsl_type *type = var->type;
 729       int slot = var->data.location;
 730       uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
 731       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
 732                                        : type->component_slots();
 733       uint32_t frac = var->data.location_frac;
 734       uint32_t vary = var->data.driver_location;
 735
 736       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
 737          if (comp > 2)
 738             slots *= 2;
 739       }
 740
 741       assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
 742
 743       switch(prog->getType()) {
 744       case Program::TYPE_FRAGMENT:
 745          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
 746          for (uint16_t i = 0; i < slots; ++i) {
 747             setInterpolate(&info->in[vary + i], var->data.interpolation,
 748                            var->data.centroid | var->data.sample, name);
 749          }
 750          break;
 751       case Program::TYPE_GEOMETRY:
 752          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
 753          break;
 754       case Program::TYPE_TESSELLATION_CONTROL:
 755       case Program::TYPE_TESSELLATION_EVAL:
 756          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
 757          if (var->data.patch && name == TGSI_SEMANTIC_PATCH)
 758             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
 759          break;
 760       case Program::TYPE_VERTEX:
 761          vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
 762          switch (name) {
 763          case TGSI_SEMANTIC_EDGEFLAG:
 764             info->io.edgeFlagIn = vary;
 765             break;
 766          default:
 767             break;
 768          }
 769          break;
 770       default:
 771          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
 772          return false;
 773       }
 774
 775       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
 776          info->in[vary].id = vary;
 777          info->in[vary].patch = var->data.patch;
 778          info->in[vary].sn = name;
 779          info->in[vary].si = index + i;
 780          if (glsl_base_type_is_64bit(type->without_array()->base_type))
 781             if (i & 0x1)
 782                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
 783             else
 784                info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
 785          else
 786             info->in[vary].mask |= ((1 << comp) - 1) << frac;
 787       }
 788       info->numInputs = std::max<uint8_t>(info->numInputs, vary);
 789    }
 790
 791    info->numOutputs = 0;
 792    nir_foreach_variable(var, &nir->outputs) {
 793       const glsl_type *type = var->type;
 794       int slot = var->data.location;
 795       uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
 796       uint32_t comp = type->is_array() ? type->without_array()->component_slots()
 797                                        : type->component_slots();
 798       uint32_t frac = var->data.location_frac;
 799       uint32_t vary = var->data.driver_location;
 800
 801       if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
 802          if (comp > 2)
 803             slots *= 2;
 804       }
 805
 806       assert(vary < PIPE_MAX_SHADER_OUTPUTS);
 807
 808       switch(prog->getType()) {
 809       case Program::TYPE_FRAGMENT:
 810          frag_result_to_tgsi_semantic((gl_frag_result)slot, &name, &index);
 811          switch (name) {
 812          case TGSI_SEMANTIC_COLOR:
 813             if (!var->data.fb_fetch_output)
 814                info->prop.fp.numColourResults++;
 815             info->prop.fp.separateFragData = true;
 816             // sometimes we get FRAG_RESULT_DATAX with data.index 0
 817             // sometimes we get FRAG_RESULT_DATA0 with data.index X
 818             index = index == 0 ? var->data.index : index;
 819             break;
 820          case TGSI_SEMANTIC_POSITION:
 821             info->io.fragDepth = vary;
 822             info->prop.fp.writesDepth = true;
 823             break;
 824          case TGSI_SEMANTIC_SAMPLEMASK:
 825             info->io.sampleMask = vary;
 826             break;
 827          default:
 828             break;
 829          }
 830          break;
 831       case Program::TYPE_GEOMETRY:
 832       case Program::TYPE_TESSELLATION_CONTROL:
 833       case Program::TYPE_TESSELLATION_EVAL:
 834       case Program::TYPE_VERTEX:
 835          varying_slot_to_tgsi_semantic((gl_varying_slot)slot, &name, &index);
 836
 837          if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&
 838              name != TGSI_SEMANTIC_TESSOUTER)
 839             info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
 840
 841          switch (name) {
 842          case TGSI_SEMANTIC_CLIPDIST:
 843             info->io.genUserClip = -1;
 844             break;
 845          case TGSI_SEMANTIC_EDGEFLAG:
 846             info->io.edgeFlagOut = vary;
 847             break;
 848          default:
 849             break;
 850          }
 851          break;
 852       default:
 853          ERROR("unknown shader type %u in assignSlots\n", prog->getType());
 854          return false;
 855       }
 856
 857       for (uint16_t i = 0u; i < slots; ++i, ++vary) {
 858          info->out[vary].id = vary;
 859          info->out[vary].patch = var->data.patch;
 860          info->out[vary].sn = name;
 861          info->out[vary].si = index + i;
 862          if (glsl_base_type_is_64bit(type->without_array()->base_type))
 863             if (i & 0x1)
 864                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
 865             else
 866                info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
 867          else
 868             info->out[vary].mask |= ((1 << comp) - 1) << frac;
 869
 870          if (nir->info.outputs_read & 1ll << slot)
 871             info->out[vary].oread = 1;
 872       }
 873       info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
 874    }
 875
 876    info->numSysVals = 0;
 877    for (uint8_t i = 0; i < 64; ++i) {
 878       if (!(nir->info.system_values_read & 1ll << i))
 879          continue;
 880
 881       system_val_to_tgsi_semantic(i, &name, &index);
 882       info->sv[info->numSysVals].sn = name;
 883       info->sv[info->numSysVals].si = index;
 884       info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn);
 885
 886       switch (i) {
 887       case SYSTEM_VALUE_INSTANCE_ID:
 888          info->io.instanceId = info->numSysVals;
 889          break;
 890       case SYSTEM_VALUE_TESS_LEVEL_INNER:
 891       case SYSTEM_VALUE_TESS_LEVEL_OUTER:
 892          info->sv[info->numSysVals].patch = 1;
 893          break;
 894       case SYSTEM_VALUE_VERTEX_ID:
 895          info->io.vertexId = info->numSysVals;
 896          break;
 897       default:
 898          break;
 899       }
 900
 901       info->numSysVals += 1;
 902    }
 903
 904    if (info->io.genUserClip > 0) {
 905       info->io.clipDistances = info->io.genUserClip;
 906
 907       const unsigned int nOut = (info->io.genUserClip + 3) / 4;
 908
 909       for (unsigned int n = 0; n < nOut; ++n) {
 910          unsigned int i = info->numOutputs++;
 911          info->out[i].id = i;
 912          info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
 913          info->out[i].si = n;
 914          info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
 915       }
 916    }
 917
 918    return info->assignSlots(info) == 0;
 919 }
 920
 921 uint32_t
 922 Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)
 923 {
 924    DataType ty;
 925    int offset = nir_intrinsic_component(insn);
 926    bool input;
 927
 928    if (nir_intrinsic_infos[insn->intrinsic].has_dest)
 929       ty = getDType(insn);
 930    else
 931       ty = getSType(insn->src[0], false, false);
 932
 933    switch (insn->intrinsic) {
 934    case nir_intrinsic_load_input:
 935    case nir_intrinsic_load_interpolated_input:
 936    case nir_intrinsic_load_per_vertex_input:
 937       input = true;
 938       break;
 939    case nir_intrinsic_load_output:
 940    case nir_intrinsic_load_per_vertex_output:
 941    case nir_intrinsic_store_output:
 942    case nir_intrinsic_store_per_vertex_output:
 943       input = false;
 944       break;
 945    default:
 946       ERROR("unknown intrinsic in getSlotAddress %s",
 947             nir_intrinsic_infos[insn->intrinsic].name);
 948       input = false;
 949       assert(false);
 950       break;
 951    }
 952
 953    if (typeSizeof(ty) == 8) {
 954       slot *= 2;
 955       slot += offset;
 956       if (slot >= 4) {
 957          idx += 1;
 958          slot -= 4;
 959       }
 960    } else {
 961       slot += offset;
 962    }
 963
 964    assert(slot < 4);
 965    assert(!input || idx < PIPE_MAX_SHADER_INPUTS);
 966    assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);
 967
 968    const nv50_ir_varying *vary = input ? info->in : info->out;
 969    return vary[idx].slot[slot] * 4;
 970 }
 971
 972 bool
 973 Converter::run()
 974 {
 975    bool progress;
 976
 977    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
 978       nir_print_shader(nir, stderr);
 979
 980    NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0);
 981    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
 982    NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
 983    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
 984    NIR_PASS_V(nir, nir_lower_alu_to_scalar);
 985    NIR_PASS_V(nir, nir_lower_phis_to_scalar);
 986
 987    do {
 988       progress = false;
 989       NIR_PASS(progress, nir, nir_copy_prop);
 990       NIR_PASS(progress, nir, nir_opt_remove_phis);
 991       NIR_PASS(progress, nir, nir_opt_trivial_continues);
 992       NIR_PASS(progress, nir, nir_opt_cse);
 993       NIR_PASS(progress, nir, nir_opt_algebraic);
 994       NIR_PASS(progress, nir, nir_opt_constant_folding);
 995       NIR_PASS(progress, nir, nir_copy_prop);
 996       NIR_PASS(progress, nir, nir_opt_dce);
 997       NIR_PASS(progress, nir, nir_opt_dead_cf);
 998    } while (progress);
 999
1000    NIR_PASS_V(nir, nir_lower_bool_to_int32);
1001    NIR_PASS_V(nir, nir_lower_locals_to_regs);
1002    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
1003    NIR_PASS_V(nir, nir_convert_from_ssa, true);
1004
1005    // Garbage collect dead instructions
1006    nir_sweep(nir);
1007
1008    if (!assignSlots()) {
1009       ERROR("Couldn't assign slots!\n");
1010       return false;
1011    }
1012
1013    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1014       nir_print_shader(nir, stderr);
1015
1016    return false;
1017 }
1018
1019 } // unnamed namespace
1020
1021 namespace nv50_ir {
1022
1023 bool
1024 Program::makeFromNIR(struct nv50_ir_prog_info *info)
1025 {
1026    nir_shader *nir = (nir_shader*)info->bin.source;
1027    Converter converter(this, nir, info);
1028    bool result = converter.run();
1029    if (!result)
1030       return result;
1031    LoweringHelper lowering;
1032    lowering.run(this);
1033    tlsSize = info->bin.tlsSpace;
1034    return result;
1035 }
1036
1037 } // namespace nv50_ir