src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp

   1 /*
   2  * Copyright 2011 Christoph Bumiller
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
  19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20  * SOFTWARE.
  21  */
  22
  23 #include "nv50/codegen/nv50_ir.h"
  24 #include "nv50/codegen/nv50_ir_build_util.h"
  25
  26 #include "nv50_ir_target_nvc0.h"
  27
  28 namespace nv50_ir {
  29
  30 #define QOP_ADD  0
  31 #define QOP_SUBR 1
  32 #define QOP_SUB  2
  33 #define QOP_MOV2 3
  34
  35 #define QUADOP(q, r, s, t)                      \
  36    ((QOP_##q << 0) | (QOP_##r << 2) |           \
  37     (QOP_##s << 4) | (QOP_##t << 6))
  38
  39 class NVC0LegalizeSSA : public Pass
  40 {
  41 private:
  42    virtual bool visit(BasicBlock *);
  43    virtual bool visit(Function *);
  44
  45    // we want to insert calls to the builtin library only after optimization
  46    void handleDIV(Instruction *); // integer division, modulus
  47    void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
  48
  49 private:
  50    BuildUtil bld;
  51 };
  52
  53 void
  54 NVC0LegalizeSSA::handleDIV(Instruction *i)
  55 {
  56    FlowInstruction *call;
  57    int builtin;
  58    Value *def[2];
  59
  60    bld.setPosition(i, false);
  61    def[0] = bld.mkMovToReg(0, i->getSrc(0))->getDef(0);
  62    def[1] = bld.mkMovToReg(1, i->getSrc(1))->getDef(0);
  63    switch (i->dType) {
  64    case TYPE_U32: builtin = NVC0_BUILTIN_DIV_U32; break;
  65    case TYPE_S32: builtin = NVC0_BUILTIN_DIV_S32; break;
  66    default:
  67       return;
  68    }
  69    call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL);
  70    bld.mkMov(i->getDef(0), def[(i->op == OP_DIV) ? 0 : 1]);
  71    bld.mkClobber(FILE_GPR, (i->op == OP_DIV) ? 0xe : 0xd, 2);
  72    bld.mkClobber(FILE_PREDICATE, (i->dType == TYPE_S32) ? 0xf : 0x3, 0);
  73
  74    call->fixed = 1;
  75    call->absolute = call->builtin = 1;
  76    call->target.builtin = builtin;
  77    delete_Instruction(prog, i);
  78 }
  79
  80 void
  81 NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
  82 {
  83    // TODO
  84 }
  85
  86 bool
  87 NVC0LegalizeSSA::visit(Function *fn)
  88 {
  89    bld.setProgram(fn->getProgram());
  90    return true;
  91 }
  92
  93 bool
  94 NVC0LegalizeSSA::visit(BasicBlock *bb)
  95 {
  96    Instruction *next;
  97    for (Instruction *i = bb->getEntry(); i; i = next) {
  98       next = i->next;
  99       if (i->dType == TYPE_F32)
 100          continue;
 101       switch (i->op) {
 102       case OP_DIV:
 103       case OP_MOD:
 104          handleDIV(i);
 105          break;
 106       case OP_RCP:
 107       case OP_RSQ:
 108          if (i->dType == TYPE_F64)
 109             handleRCPRSQ(i);
 110          break;
 111       default:
 112          break;
 113       }
 114    }
 115    return true;
 116 }
 117
 118 class NVC0LegalizePostRA : public Pass
 119 {
 120 private:
 121    virtual bool visit(Function *);
 122    virtual bool visit(BasicBlock *);
 123
 124    void replaceZero(Instruction *);
 125    void split64BitOp(Instruction *);
 126    bool tryReplaceContWithBra(BasicBlock *);
 127    void propagateJoin(BasicBlock *);
 128
 129    LValue *r63;
 130 };
 131
 132 bool
 133 NVC0LegalizePostRA::visit(Function *fn)
 134 {
 135    r63 = new_LValue(fn, FILE_GPR);
 136    r63->reg.data.id = 63;
 137    return true;
 138 }
 139
 140 void
 141 NVC0LegalizePostRA::replaceZero(Instruction *i)
 142 {
 143    for (int s = 0; i->srcExists(s); ++s) {
 144       ImmediateValue *imm = i->getSrc(s)->asImm();
 145       if (imm && imm->reg.data.u64 == 0)
 146          i->setSrc(s, r63);
 147    }
 148 }
 149
 150 void
 151 NVC0LegalizePostRA::split64BitOp(Instruction *i)
 152 {
 153    if (i->dType == TYPE_F64) {
 154       if (i->op == OP_MAD)
 155          i->op = OP_FMA;
 156       if (i->op == OP_ADD || i->op == OP_MUL || i->op == OP_FMA ||
 157           i->op == OP_CVT || i->op == OP_MIN || i->op == OP_MAX ||
 158           i->op == OP_SET)
 159          return;
 160       i->dType = i->sType = TYPE_U32;
 161
 162       i->bb->insertAfter(i, i->clone(true)); // deep cloning
 163    }
 164 }
 165
 166 // replace CONT with BRA for single unconditional continue
 167 bool
 168 NVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock *bb)
 169 {
 170    if (bb->cfg.incidentCount() != 2 || bb->getEntry()->op != OP_PRECONT)
 171       return false;
 172    Graph::EdgeIterator ei = bb->cfg.incident();
 173    if (ei.getType() != Graph::Edge::BACK)
 174       ei.next();
 175    if (ei.getType() != Graph::Edge::BACK)
 176       return false;
 177    BasicBlock *contBB = BasicBlock::get(ei.getNode());
 178
 179    if (!contBB->getExit() || contBB->getExit()->op != OP_CONT ||
 180        contBB->getExit()->getPredicate())
 181       return false;
 182    contBB->getExit()->op = OP_BRA;
 183    bb->remove(bb->getEntry()); // delete PRECONT
 184
 185    ei.next();
 186    assert(ei.end() || ei.getType() != Graph::Edge::BACK);
 187    return true;
 188 }
 189
 190 // replace branches to join blocks with join ops
 191 void
 192 NVC0LegalizePostRA::propagateJoin(BasicBlock *bb)
 193 {
 194    if (bb->getEntry()->op != OP_JOIN || bb->getEntry()->asFlow()->limit)
 195       return;
 196    for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
 197       BasicBlock *in = BasicBlock::get(ei.getNode());
 198       Instruction *exit = in->getExit();
 199       if (!exit) {
 200          in->insertTail(new FlowInstruction(func, OP_JOIN, bb));
 201          // there should always be a terminator instruction
 202          WARN("inserted missing terminator in BB:%i\n", in->getId());
 203       } else
 204       if (exit->op == OP_BRA) {
 205          exit->op = OP_JOIN;
 206          exit->asFlow()->limit = 1; // must-not-propagate marker
 207       }
 208    }
 209    bb->remove(bb->getEntry());
 210 }
 211
 212 bool
 213 NVC0LegalizePostRA::visit(BasicBlock *bb)
 214 {
 215    Instruction *i, *next;
 216
 217    // remove pseudo operations and non-fixed no-ops, split 64 bit operations
 218    for (i = bb->getFirst(); i; i = next) {
 219       next = i->next;
 220       if (i->op == OP_EMIT || i->op == OP_RESTART) {
 221          if (!i->getDef(0)->refCount())
 222             i->setDef(0, NULL);
 223          if (i->src[0].getFile() == FILE_IMMEDIATE)
 224             i->setSrc(0, r63); // initial value must be 0
 225       } else
 226       if (i->isNop()) {
 227          bb->remove(i);
 228       } else {
 229          if (i->op != OP_MOV && i->op != OP_PFETCH)
 230             replaceZero(i);
 231          if (typeSizeof(i->dType) == 8)
 232             split64BitOp(i);
 233       }
 234    }
 235    if (!bb->getEntry())
 236       return true;
 237
 238    if (!tryReplaceContWithBra(bb))
 239       propagateJoin(bb);
 240
 241    return true;
 242 }
 243
 244 class NVC0LoweringPass : public Pass
 245 {
 246 public:
 247    NVC0LoweringPass(Program *);
 248
 249 private:
 250    virtual bool visit(Function *);
 251    virtual bool visit(BasicBlock *);
 252    virtual bool visit(Instruction *);
 253
 254    bool handleRDSV(Instruction *);
 255    bool handleWRSV(Instruction *);
 256    bool handleEXPORT(Instruction *);
 257    bool handleOUT(Instruction *);
 258    bool handleDIV(Instruction *);
 259    bool handleMOD(Instruction *);
 260    bool handleSQRT(Instruction *);
 261    bool handlePOW(Instruction *);
 262    bool handleTEX(TexInstruction *);
 263    bool handleTXD(TexInstruction *);
 264    bool handleTXQ(TexInstruction *);
 265    bool handleManualTXD(TexInstruction *);
 266
 267    void checkPredicate(Instruction *);
 268
 269    void readTessCoord(LValue *dst, int c);
 270
 271 private:
 272    const Target *const targ;
 273
 274    BuildUtil bld;
 275
 276    LValue *gpEmitAddress;
 277 };
 278
 279 NVC0LoweringPass::NVC0LoweringPass(Program *prog) : targ(prog->getTarget())
 280 {
 281    bld.setProgram(prog);
 282 }
 283
 284 bool
 285 NVC0LoweringPass::visit(Function *fn)
 286 {
 287    if (prog->getType() == Program::TYPE_GEOMETRY) {
 288       assert(!strncmp(fn->getName(), "MAIN", 4));
 289       // TODO: when we generate actual functions pass this value along somehow
 290       bld.setPosition(BasicBlock::get(fn->cfg.getRoot()), false);
 291       gpEmitAddress = bld.loadImm(NULL, 0)->asLValue();
 292       if (fn->cfgExit) {
 293          bld.setPosition(BasicBlock::get(fn->cfgExit)->getExit(), false);
 294          bld.mkMovToReg(0, gpEmitAddress);
 295       }
 296    }
 297    return true;
 298 }
 299
 300 bool
 301 NVC0LoweringPass::visit(BasicBlock *bb)
 302 {
 303    return true;
 304 }
 305
 306 // move array source to first slot, convert to u16, add indirections
 307 bool
 308 NVC0LoweringPass::handleTEX(TexInstruction *i)
 309 {
 310    const int dim = i->tex.target.getDim() + i->tex.target.isCube();
 311    const int arg = i->tex.target.getArgCount();
 312
 313    // generate and move the tsc/tic/array source to the front
 314    if (dim != arg || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
 315       LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa
 316
 317       Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(arg - 1) : NULL;
 318       for (int s = dim; s >= 1; --s)
 319          i->setSrc(s, i->getSrc(s - 1));
 320       i->setSrc(0, arrayIndex);
 321
 322       Value *ticRel = i->getIndirectR();
 323       Value *tscRel = i->getIndirectS();
 324
 325       if (arrayIndex) {
 326          int sat = (i->op == OP_TXF) ? 1 : 0;
 327          DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32;
 328          bld.mkCvt(OP_CVT, TYPE_U16, src, sTy, arrayIndex)->saturate = sat;
 329       } else {
 330          bld.loadImm(src, 0);
 331       }
 332
 333       if (ticRel) {
 334          i->setSrc(i->tex.rIndirectSrc, NULL);
 335          bld.mkOp3(OP_INSBF, TYPE_U32, src, ticRel, bld.mkImm(0x0917), src);
 336       }
 337       if (tscRel) {
 338          i->setSrc(i->tex.sIndirectSrc, NULL);
 339          bld.mkOp3(OP_INSBF, TYPE_U32, src, tscRel, bld.mkImm(0x0710), src);
 340       }
 341
 342       i->setSrc(0, src);
 343    }
 344
 345    // offset is last source (lod 1st, dc 2nd)
 346    if (i->tex.useOffsets) {
 347       uint32_t value = 0;
 348       int n, c;
 349       int s = i->srcCount(0xff);
 350       for (n = 0; n < i->tex.useOffsets; ++n)
 351          for (c = 0; c < 3; ++c)
 352             value |= (i->tex.offset[n][c] & 0xf) << (n * 12 + c * 4);
 353       i->setSrc(s, bld.loadImm(NULL, value));
 354    }
 355
 356    return true;
 357 }
 358
 359 bool
 360 NVC0LoweringPass::handleManualTXD(TexInstruction *i)
 361 {
 362    static const uint8_t qOps[4][2] =
 363    {
 364       { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(MOV2, MOV2, ADD,  ADD) }, // l0
 365       { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD,  ADD) }, // l1
 366       { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
 367       { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
 368    };
 369    Value *def[4][4];
 370    Value *crd[3];
 371    Instruction *tex;
 372    Value *zero = bld.loadImm(bld.getSSA(), 0);
 373    int l, c;
 374    const int dim = i->tex.target.getDim();
 375
 376    i->op = OP_TEX; // no need to clone dPdx/dPdy later
 377
 378    for (c = 0; c < dim; ++c)
 379       crd[c] = bld.getScratch();
 380
 381    bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
 382    for (l = 0; l < 4; ++l) {
 383       // mov coordinates from lane l to all lanes
 384       for (c = 0; c < dim; ++c)
 385          bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero);
 386       // add dPdx from lane l to lanes dx
 387       for (c = 0; c < dim; ++c)
 388          bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]);
 389       // add dPdy from lane l to lanes dy
 390       for (c = 0; c < dim; ++c)
 391          bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
 392       // texture
 393       bld.insert(tex = i->clone(true));
 394       for (c = 0; c < dim; ++c)
 395          tex->setSrc(c, crd[c]);
 396       // save results
 397       for (c = 0; i->defExists(c); ++c) {
 398          Instruction *mov;
 399          def[c][l] = bld.getSSA();
 400          mov = bld.mkMov(def[c][l], tex->getDef(c));
 401          mov->fixed = 1;
 402          mov->lanes = 1 << l;
 403       }
 404    }
 405    bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
 406
 407    for (c = 0; i->defExists(c); ++c) {
 408       Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
 409       for (l = 0; l < 4; ++l)
 410          u->setSrc(l, def[c][l]);
 411    }
 412
 413    i->bb->remove(i);
 414    return true;
 415 }
 416
 417 bool
 418 NVC0LoweringPass::handleTXD(TexInstruction *txd)
 419 {
 420    int dim = txd->tex.target.getDim();
 421    int arg = txd->tex.target.getDim() + txd->tex.target.isArray();
 422
 423    handleTEX(txd);
 424    while (txd->src[arg].exists())
 425       ++arg;
 426
 427    txd->tex.derivAll = true;
 428    if (dim > 2 || txd->tex.target.isShadow())
 429       return handleManualTXD(txd);
 430
 431    assert(arg <= 4); // at most s/t/array, x, y, offset
 432
 433    for (int c = 0; c < dim; ++c) {
 434       txd->src[arg + c * 2 + 0].set(txd->dPdx[c]);
 435       txd->src[arg + c * 2 + 1].set(txd->dPdy[c]);
 436       txd->dPdx[c] = NULL;
 437       txd->dPdy[c] = NULL;
 438    }
 439    return true;
 440 }
 441
 442 bool
 443 NVC0LoweringPass::handleTXQ(TexInstruction *txq)
 444 {
 445    // TODO: indirect resource/sampler index
 446    return true;
 447 }
 448
 449 bool
 450 NVC0LoweringPass::handleWRSV(Instruction *i)
 451 {
 452    Instruction *st;
 453    Symbol *sym;
 454    uint32_t addr;
 455
 456    // must replace, $sreg are not writeable
 457    addr = targ->getSVAddress(FILE_SHADER_OUTPUT, i->getSrc(0)->asSym());
 458    if (addr >= 0x400)
 459       return false;
 460    sym = bld.mkSymbol(FILE_SHADER_OUTPUT, 0, i->sType, addr);
 461
 462    st = bld.mkStore(OP_EXPORT, i->dType, sym, i->getIndirect(0, 0),
 463                     i->getSrc(1));
 464    st->perPatch = i->perPatch;
 465
 466    bld.getBB()->remove(i);
 467    return true;
 468 }
 469
 470 void
 471 NVC0LoweringPass::readTessCoord(LValue *dst, int c)
 472 {
 473    Value *laneid = bld.getSSA();
 474    Value *x, *y;
 475
 476    bld.mkOp1(OP_RDSV, TYPE_U32, laneid, bld.mkSysVal(SV_LANEID, 0));
 477
 478    if (c == 0) {
 479       x = dst;
 480       y = NULL;
 481    } else
 482    if (c == 1) {
 483       x = NULL;
 484       y = dst;
 485    } else {
 486       assert(c == 2);
 487       x = bld.getSSA();
 488       y = bld.getSSA();
 489    }
 490    if (x)
 491       bld.mkFetch(x, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f0, NULL, laneid);
 492    if (y)
 493       bld.mkFetch(y, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f4, NULL, laneid);
 494
 495    if (c == 2) {
 496       bld.mkOp2(OP_ADD, TYPE_F32, dst, x, y);
 497       bld.mkOp2(OP_SUB, TYPE_F32, dst, bld.loadImm(NULL, 1.0f), dst);
 498    }
 499 }
 500
 501 bool
 502 NVC0LoweringPass::handleRDSV(Instruction *i)
 503 {
 504    Symbol *sym = i->getSrc(0)->asSym();
 505    Value *vtx = NULL;
 506    Instruction *ld;
 507    uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym);
 508
 509    if (addr >= 0x400) // mov $sreg
 510       return true;
 511
 512    switch (i->getSrc(0)->reg.data.sv.sv) {
 513    case SV_POSITION:
 514       assert(prog->getType() == Program::TYPE_FRAGMENT);
 515       bld.mkInterp(NV50_IR_INTERP_LINEAR, i->getDef(0), addr, NULL);
 516       break;
 517    case SV_FACE:
 518    {
 519       Value *face = i->getDef(0);
 520       bld.mkInterp(NV50_IR_INTERP_FLAT, face, addr, NULL);
 521       if (i->dType == TYPE_F32) {
 522          bld.mkOp2(OP_AND, TYPE_U32, face, face, bld.mkImm(0x80000000));
 523          bld.mkOp2(OP_XOR, TYPE_U32, face, face, bld.mkImm(0xbf800000));
 524       }
 525    }
 526       break;
 527    case SV_TESS_COORD:
 528       assert(prog->getType() == Program::TYPE_TESSELLATION_EVAL);
 529       readTessCoord(i->getDef(0)->asLValue(), i->getSrc(0)->reg.data.sv.index);
 530       break;
 531    default:
 532       if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
 533          vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));
 534       ld = bld.mkFetch(i->getDef(0), i->dType,
 535                        FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx);
 536       ld->perPatch = i->perPatch;
 537       break;
 538    }
 539    bld.getBB()->remove(i);
 540    return true;
 541 }
 542
 543 bool
 544 NVC0LoweringPass::handleDIV(Instruction *i)
 545 {
 546    if (!isFloatType(i->dType))
 547       return true;
 548    bld.setPosition(i, false);
 549    Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(), i->getSrc(1));
 550    i->op = OP_MUL;
 551    i->setSrc(1, rcp->getDef(0));
 552    return true;
 553 }
 554
 555 bool
 556 NVC0LoweringPass::handleMOD(Instruction *i)
 557 {
 558    if (i->dType != TYPE_F32)
 559       return true;
 560    LValue *value = bld.getScratch();
 561    bld.mkOp1(OP_RCP, TYPE_F32, value, i->getSrc(1));
 562    bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(0), value);
 563    bld.mkOp1(OP_TRUNC, TYPE_F32, value, value);
 564    bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(1), value);
 565    i->op = OP_SUB;
 566    i->setSrc(1, value);
 567    return true;
 568 }
 569
 570 bool
 571 NVC0LoweringPass::handleSQRT(Instruction *i)
 572 {
 573    Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32,
 574                                 bld.getSSA(), i->getSrc(0));
 575    i->op = OP_MUL;
 576    i->setSrc(1, rsq->getDef(0));
 577
 578    return true;
 579 }
 580
 581 bool
 582 NVC0LoweringPass::handlePOW(Instruction *i)
 583 {
 584    LValue *val = bld.getScratch();
 585
 586    bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0));
 587    bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1;
 588    bld.mkOp1(OP_PREEX2, TYPE_F32, val, val);
 589
 590    i->op = OP_EX2;
 591    i->setSrc(0, val);
 592    i->setSrc(1, NULL);
 593
 594    return true;
 595 }
 596
 597 bool
 598 NVC0LoweringPass::handleEXPORT(Instruction *i)
 599 {
 600    if (prog->getType() == Program::TYPE_FRAGMENT) {
 601       int id = i->getSrc(0)->reg.data.offset / 4;
 602
 603       if (i->src[0].isIndirect(0)) // TODO, ugly
 604          return false;
 605       i->op = OP_MOV;
 606       i->src[0].set(i->src[1]);
 607       i->setSrc(1, NULL);
 608       i->setDef(0, new_LValue(func, FILE_GPR));
 609       i->getDef(0)->reg.data.id = id;
 610
 611       prog->maxGPR = MAX2(prog->maxGPR, id);
 612    } else
 613    if (prog->getType() == Program::TYPE_GEOMETRY) {
 614       i->setIndirect(0, 1, gpEmitAddress);
 615    }
 616    return true;
 617 }
 618
 619 bool
 620 NVC0LoweringPass::handleOUT(Instruction *i)
 621 {
 622    if (i->op == OP_RESTART && i->prev && i->prev->op == OP_EMIT) {
 623       i->prev->subOp = NV50_IR_SUBOP_EMIT_RESTART;
 624       delete_Instruction(prog, i);
 625    } else {
 626       assert(gpEmitAddress);
 627       i->setDef(0, gpEmitAddress);
 628       if (i->srcExists(0))
 629          i->setSrc(1, i->getSrc(0));
 630       i->setSrc(0, gpEmitAddress);
 631    }
 632    return true;
 633 }
 634
 635 // Generate a binary predicate if an instruction is predicated by
 636 // e.g. an f32 value.
 637 void
 638 NVC0LoweringPass::checkPredicate(Instruction *insn)
 639 {
 640    Value *pred = insn->getPredicate();
 641    Value *pdst;
 642
 643    if (!pred || pred->reg.file == FILE_PREDICATE)
 644       return;
 645    pdst = new_LValue(func, FILE_PREDICATE);
 646
 647    // CAUTION: don't use pdst->getInsn, the definition might not be unique,
 648    //  delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass
 649
 650    bld.mkCmp(OP_SET, CC_NEU, TYPE_U32, pdst, bld.mkImm(0), pred);
 651
 652    insn->setPredicate(insn->cc, pdst);
 653 }
 654
 655 //
 656 // - add quadop dance for texturing
 657 // - put FP outputs in GPRs
 658 // - convert instruction sequences
 659 //
 660 bool
 661 NVC0LoweringPass::visit(Instruction *i)
 662 {
 663    bld.setPosition(i, false);
 664
 665    if (i->cc != CC_ALWAYS)
 666       checkPredicate(i);
 667
 668    switch (i->op) {
 669    case OP_TEX:
 670    case OP_TXB:
 671    case OP_TXL:
 672    case OP_TXF:
 673    case OP_TXG:
 674       return handleTEX(i->asTex());
 675    case OP_TXD:
 676       return handleTXD(i->asTex());
 677    case OP_TXQ:
 678      return handleTXQ(i->asTex());
 679    case OP_EX2:
 680       bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
 681       i->setSrc(0, i->getDef(0));
 682       break;
 683    case OP_POW:
 684       return handlePOW(i);
 685    case OP_DIV:
 686       return handleDIV(i);
 687    case OP_MOD:
 688       return handleMOD(i);
 689    case OP_SQRT:
 690       return handleSQRT(i);
 691    case OP_EXPORT:
 692       return handleEXPORT(i);
 693    case OP_EMIT:
 694    case OP_RESTART:
 695       return handleOUT(i);
 696    case OP_RDSV:
 697       return handleRDSV(i);
 698    case OP_WRSV:
 699       return handleWRSV(i);
 700    case OP_LOAD:
 701       if (i->src[0].getFile() == FILE_SHADER_INPUT) {
 702          i->op = OP_VFETCH;
 703          assert(prog->getType() != Program::TYPE_FRAGMENT);
 704       }
 705       break;
 706    default:
 707       break;
 708    }
 709    return true;
 710 }
 711
 712 bool
 713 TargetNVC0::runLegalizePass(Program *prog, CGStage stage) const
 714 {
 715    if (stage == CG_STAGE_PRE_SSA) {
 716       NVC0LoweringPass pass(prog);
 717       return pass.run(prog, false, true);
 718    } else
 719    if (stage == CG_STAGE_POST_RA) {
 720       NVC0LegalizePostRA pass;
 721       return pass.run(prog, false, true);
 722    } else
 723    if (stage == CG_STAGE_SSA) {
 724       NVC0LegalizeSSA pass;
 725       return pass.run(prog, false, true);
 726    }
 727    return false;
 728 }
 729
 730 } // namespace nv50_ir