src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp

   1 /*
   2  * Copyright 2011 Christoph Bumiller
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
  19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20  * SOFTWARE.
  21  */
  22
  23 #include "nv50/codegen/nv50_ir.h"
  24 #include "nv50/codegen/nv50_ir_build_util.h"
  25
  26 #include "nv50_ir_target_nvc0.h"
  27
  28 namespace nv50_ir {
  29
  30 #define QOP_ADD  0
  31 #define QOP_SUBR 1
  32 #define QOP_SUB  2
  33 #define QOP_MOV2 3
  34
  35 #define QUADOP(q, r, s, t)                      \
  36    ((QOP_##q << 0) | (QOP_##r << 2) |           \
  37     (QOP_##s << 4) | (QOP_##t << 6))
  38
  39 class NVC0LegalizeSSA : public Pass
  40 {
  41 private:
  42    virtual bool visit(BasicBlock *);
  43    virtual bool visit(Function *);
  44
  45    // we want to insert calls to the builtin library only after optimization
  46    void handleDIV(Instruction *); // integer division, modulus
  47    void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
  48
  49 private:
  50    BuildUtil bld;
  51 };
  52
  53 void
  54 NVC0LegalizeSSA::handleDIV(Instruction *i)
  55 {
  56    FlowInstruction *call;
  57    int builtin;
  58    Value *def[2];
  59
  60    bld.setPosition(i, false);
  61    def[0] = bld.mkMovToReg(0, i->getSrc(0))->getDef(0);
  62    def[1] = bld.mkMovToReg(1, i->getSrc(1))->getDef(0);
  63    switch (i->dType) {
  64    case TYPE_U32: builtin = NVC0_BUILTIN_DIV_U32; break;
  65    case TYPE_S32: builtin = NVC0_BUILTIN_DIV_S32; break;
  66    default:
  67       return;
  68    }
  69    call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL);
  70    bld.mkMov(i->getDef(0), def[(i->op == OP_DIV) ? 0 : 1]);
  71    bld.mkClobber(FILE_GPR, (i->op == OP_DIV) ? 0xe : 0xd, 2);
  72    bld.mkClobber(FILE_PREDICATE, (i->dType == TYPE_S32) ? 0xf : 0x3, 0);
  73
  74    call->fixed = 1;
  75    call->absolute = call->builtin = 1;
  76    call->target.builtin = builtin;
  77    delete_Instruction(prog, i);
  78 }
  79
  80 void
  81 NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
  82 {
  83    // TODO
  84 }
  85
  86 bool
  87 NVC0LegalizeSSA::visit(Function *fn)
  88 {
  89    bld.setProgram(fn->getProgram());
  90    return true;
  91 }
  92
  93 bool
  94 NVC0LegalizeSSA::visit(BasicBlock *bb)
  95 {
  96    Instruction *next;
  97    for (Instruction *i = bb->getEntry(); i; i = next) {
  98       next = i->next;
  99       if (i->dType == TYPE_F32)
 100          continue;
 101       switch (i->op) {
 102       case OP_DIV:
 103       case OP_MOD:
 104          handleDIV(i);
 105          break;
 106       case OP_RCP:
 107       case OP_RSQ:
 108          if (i->dType == TYPE_F64)
 109             handleRCPRSQ(i);
 110          break;
 111       default:
 112          break;
 113       }
 114    }
 115    return true;
 116 }
 117
 118 class NVC0LegalizePostRA : public Pass
 119 {
 120 private:
 121    virtual bool visit(Function *);
 122    virtual bool visit(BasicBlock *);
 123
 124    void replaceZero(Instruction *);
 125    void split64BitOp(Instruction *);
 126    bool tryReplaceContWithBra(BasicBlock *);
 127    void propagateJoin(BasicBlock *);
 128
 129    LValue *r63;
 130 };
 131
 132 bool
 133 NVC0LegalizePostRA::visit(Function *fn)
 134 {
 135    r63 = new_LValue(fn, FILE_GPR);
 136    r63->reg.data.id = 63;
 137    return true;
 138 }
 139
 140 void
 141 NVC0LegalizePostRA::replaceZero(Instruction *i)
 142 {
 143    for (int s = 0; i->srcExists(s); ++s) {
 144       ImmediateValue *imm = i->getSrc(s)->asImm();
 145       if (imm && imm->reg.data.u64 == 0)
 146          i->setSrc(s, r63);
 147    }
 148 }
 149
 150 void
 151 NVC0LegalizePostRA::split64BitOp(Instruction *i)
 152 {
 153    if (i->dType == TYPE_F64) {
 154       if (i->op == OP_MAD)
 155          i->op = OP_FMA;
 156       if (i->op == OP_ADD || i->op == OP_MUL || i->op == OP_FMA ||
 157           i->op == OP_CVT || i->op == OP_MIN || i->op == OP_MAX ||
 158           i->op == OP_SET)
 159          return;
 160       i->dType = i->sType = TYPE_U32;
 161
 162       i->bb->insertAfter(i, i->clone(true)); // deep cloning
 163    }
 164 }
 165
 166 // replace CONT with BRA for single unconditional continue
 167 bool
 168 NVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock *bb)
 169 {
 170    if (bb->cfg.incidentCount() != 2 || bb->getEntry()->op != OP_PRECONT)
 171       return false;
 172    Graph::EdgeIterator ei = bb->cfg.incident();
 173    if (ei.getType() != Graph::Edge::BACK)
 174       ei.next();
 175    if (ei.getType() != Graph::Edge::BACK)
 176       return false;
 177    BasicBlock *contBB = BasicBlock::get(ei.getNode());
 178
 179    if (!contBB->getExit() || contBB->getExit()->op != OP_CONT ||
 180        contBB->getExit()->getPredicate())
 181       return false;
 182    contBB->getExit()->op = OP_BRA;
 183    bb->remove(bb->getEntry()); // delete PRECONT
 184
 185    ei.next();
 186    assert(ei.end() || ei.getType() != Graph::Edge::BACK);
 187    return true;
 188 }
 189
 190 // replace branches to join blocks with join ops
 191 void
 192 NVC0LegalizePostRA::propagateJoin(BasicBlock *bb)
 193 {
 194    if (bb->getEntry()->op != OP_JOIN || bb->getEntry()->asFlow()->limit)
 195       return;
 196    for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
 197       BasicBlock *in = BasicBlock::get(ei.getNode());
 198       Instruction *exit = in->getExit();
 199       if (!exit) {
 200          in->insertTail(new FlowInstruction(func, OP_JOIN, bb));
 201          // there should always be a terminator instruction
 202          WARN("inserted missing terminator in BB:%i\n", in->getId());
 203       } else
 204       if (exit->op == OP_BRA) {
 205          exit->op = OP_JOIN;
 206          exit->asFlow()->limit = 1; // must-not-propagate marker
 207       }
 208    }
 209    bb->remove(bb->getEntry());
 210 }
 211
 212 bool
 213 NVC0LegalizePostRA::visit(BasicBlock *bb)
 214 {
 215    Instruction *i, *next;
 216
 217    // remove pseudo operations and non-fixed no-ops, split 64 bit operations
 218    for (i = bb->getFirst(); i; i = next) {
 219       next = i->next;
 220       if (i->op == OP_EMIT || i->op == OP_RESTART) {
 221          if (!i->getDef(0)->refCount())
 222             i->setDef(0, NULL);
 223          if (i->src[0].getFile() == FILE_IMMEDIATE)
 224             i->setSrc(0, r63); // initial value must be 0
 225       } else
 226       if (i->isNop()) {
 227          bb->remove(i);
 228       } else {
 229          if (i->op != OP_MOV && i->op != OP_PFETCH)
 230             replaceZero(i);
 231          if (typeSizeof(i->dType) == 8)
 232             split64BitOp(i);
 233       }
 234    }
 235    if (!bb->getEntry())
 236       return true;
 237
 238    if (!tryReplaceContWithBra(bb))
 239       propagateJoin(bb);
 240
 241    return true;
 242 }
 243
 244 class NVC0LoweringPass : public Pass
 245 {
 246 public:
 247    NVC0LoweringPass(Program *);
 248
 249 private:
 250    virtual bool visit(Function *);
 251    virtual bool visit(BasicBlock *);
 252    virtual bool visit(Instruction *);
 253
 254    bool handleRDSV(Instruction *);
 255    bool handleWRSV(Instruction *);
 256    bool handleEXPORT(Instruction *);
 257    bool handleOUT(Instruction *);
 258    bool handleDIV(Instruction *);
 259    bool handleMOD(Instruction *);
 260    bool handleSQRT(Instruction *);
 261    bool handlePOW(Instruction *);
 262    bool handleTEX(TexInstruction *);
 263    bool handleTXD(TexInstruction *);
 264    bool handleManualTXD(TexInstruction *);
 265
 266    void checkPredicate(Instruction *);
 267
 268    void readTessCoord(LValue *dst, int c);
 269
 270 private:
 271    const Target *const targ;
 272
 273    BuildUtil bld;
 274
 275    LValue *gpEmitAddress;
 276 };
 277
 278 NVC0LoweringPass::NVC0LoweringPass(Program *prog) : targ(prog->getTarget())
 279 {
 280    bld.setProgram(prog);
 281 }
 282
 283 bool
 284 NVC0LoweringPass::visit(Function *fn)
 285 {
 286    if (prog->getType() == Program::TYPE_GEOMETRY) {
 287       assert(!strncmp(fn->getName(), "MAIN", 4));
 288       // TODO: when we generate actual functions pass this value along somehow
 289       bld.setPosition(BasicBlock::get(fn->cfg.getRoot()), false);
 290       gpEmitAddress = bld.loadImm(NULL, 0)->asLValue();
 291    }
 292    return true;
 293 }
 294
 295 bool
 296 NVC0LoweringPass::visit(BasicBlock *bb)
 297 {
 298    return true;
 299 }
 300
 301 // move array source to first slot, convert to u16, add indirections
 302 bool
 303 NVC0LoweringPass::handleTEX(TexInstruction *i)
 304 {
 305    const int dim = i->tex.target.getDim();
 306    const int arg = i->tex.target.getDim() + i->tex.target.isArray();
 307
 308    // generate and move the tsc/tic/array source to the front
 309    if (dim != arg || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
 310       LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa
 311
 312       Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(dim) : NULL;
 313       for (int s = dim; s >= 1; --s)
 314          i->setSrc(s, i->getSrc(s - 1));
 315       i->setSrc(0, arrayIndex);
 316
 317       Value *ticRel = i->getIndirectR();
 318       Value *tscRel = i->getIndirectS();
 319
 320       if (arrayIndex)
 321          bld.mkCvt(OP_CVT, TYPE_U16, src, TYPE_F32, arrayIndex);
 322       else
 323          bld.loadImm(src, 0);
 324
 325       if (ticRel) {
 326          i->setSrc(i->tex.rIndirectSrc, NULL);
 327          bld.mkOp3(OP_INSBF, TYPE_U32, src, ticRel, bld.mkImm(0x0917), src);
 328       }
 329       if (tscRel) {
 330          i->setSrc(i->tex.sIndirectSrc, NULL);
 331          bld.mkOp3(OP_INSBF, TYPE_U32, src, tscRel, bld.mkImm(0x0710), src);
 332       }
 333
 334       i->setSrc(0, src);
 335    }
 336
 337    // offset is last source (lod 1st, dc 2nd)
 338    if (i->tex.useOffsets) {
 339       uint32_t value = 0;
 340       int n, c;
 341       int s = i->srcCount(0xff);
 342       for (n = 0; n < i->tex.useOffsets; ++n)
 343          for (c = 0; c < 3; ++c)
 344             value |= (i->tex.offset[n][c] & 0xf) << (n * 12 + c * 4);
 345       i->setSrc(s, bld.loadImm(NULL, value));
 346    }
 347
 348    return true;
 349 }
 350
 351 bool
 352 NVC0LoweringPass::handleManualTXD(TexInstruction *i)
 353 {
 354    static const uint8_t qOps[4][2] =
 355    {
 356       { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(MOV2, MOV2, ADD,  ADD) }, // l0
 357       { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD,  ADD) }, // l1
 358       { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
 359       { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
 360    };
 361    Value *def[4][4];
 362    Value *crd[3];
 363    Instruction *tex;
 364    Value *zero = bld.loadImm(bld.getSSA(), 0);
 365    int l, c;
 366    const int dim = i->tex.target.getDim();
 367
 368    i->op = OP_TEX; // no need to clone dPdx/dPdy later
 369
 370    for (c = 0; c < dim; ++c)
 371       crd[c] = bld.getScratch();
 372
 373    bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
 374    for (l = 0; l < 4; ++l) {
 375       // mov coordinates from lane l to all lanes
 376       for (c = 0; c < dim; ++c)
 377          bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero);
 378       // add dPdx from lane l to lanes dx
 379       for (c = 0; c < dim; ++c)
 380          bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]);
 381       // add dPdy from lane l to lanes dy
 382       for (c = 0; c < dim; ++c)
 383          bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
 384       // texture
 385       bld.insert(tex = i->clone(true));
 386       for (c = 0; c < dim; ++c)
 387          tex->setSrc(c, crd[c]);
 388       // save results
 389       for (c = 0; i->defExists(c); ++c) {
 390          Instruction *mov;
 391          def[c][l] = bld.getSSA();
 392          mov = bld.mkMov(def[c][l], tex->getDef(c));
 393          mov->fixed = 1;
 394          mov->lanes = 1 << l;
 395       }
 396    }
 397    bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
 398
 399    for (c = 0; i->defExists(c); ++c) {
 400       Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
 401       for (l = 0; l < 4; ++l)
 402          u->setSrc(l, def[c][l]);
 403    }
 404
 405    i->bb->remove(i);
 406    return true;
 407 }
 408
 409 bool
 410 NVC0LoweringPass::handleTXD(TexInstruction *txd)
 411 {
 412    int dim = txd->tex.target.getDim();
 413    int arg = txd->tex.target.getDim() + txd->tex.target.isArray();
 414
 415    handleTEX(txd);
 416    if (txd->src[arg].exists())
 417       ++arg;
 418
 419    if (dim > 2 || txd->tex.target.isShadow())
 420       return handleManualTXD(txd);
 421
 422    // at most s/t/array, x, y, offset
 423    assert(arg <= 4 && !txd->src[arg].exists());
 424
 425    for (int c = 0; c < dim; ++c) {
 426       txd->src[arg + c * 2 + 0].set(txd->dPdx[c]);
 427       txd->src[arg + c * 2 + 1].set(txd->dPdy[c]);
 428       txd->dPdx[c] = NULL;
 429       txd->dPdy[c] = NULL;
 430    }
 431    return true;
 432 }
 433
 434 bool
 435 NVC0LoweringPass::handleWRSV(Instruction *i)
 436 {
 437    Instruction *st;
 438    Symbol *sym;
 439    uint32_t addr;
 440
 441    // must replace, $sreg are not writeable
 442    addr = targ->getSVAddress(FILE_SHADER_OUTPUT, i->getSrc(0)->asSym());
 443    if (addr >= 0x400)
 444       return false;
 445    sym = bld.mkSymbol(FILE_SHADER_OUTPUT, 0, i->sType, addr);
 446
 447    st = bld.mkStore(OP_EXPORT, i->dType, sym, i->getIndirect(0, 0),
 448                     i->getSrc(1));
 449    st->perPatch = i->perPatch;
 450
 451    bld.getBB()->remove(i);
 452    return true;
 453 }
 454
 455 void
 456 NVC0LoweringPass::readTessCoord(LValue *dst, int c)
 457 {
 458    Value *laneid = bld.getSSA();
 459    Value *x, *y;
 460
 461    bld.mkOp1(OP_RDSV, TYPE_U32, laneid, bld.mkSysVal(SV_LANEID, 0));
 462
 463    if (c == 0) {
 464       x = dst;
 465       y = NULL;
 466    } else
 467    if (c == 1) {
 468       x = NULL;
 469       y = dst;
 470    } else {
 471       assert(c == 2);
 472       x = bld.getSSA();
 473       y = bld.getSSA();
 474    }
 475    if (x)
 476       bld.mkFetch(x, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f0, NULL, laneid);
 477    if (y)
 478       bld.mkFetch(y, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f4, NULL, laneid);
 479
 480    if (c == 2) {
 481       bld.mkOp2(OP_ADD, TYPE_F32, dst, x, y);
 482       bld.mkOp2(OP_SUB, TYPE_F32, dst, bld.loadImm(NULL, 1.0f), dst);
 483    }
 484 }
 485
 486 bool
 487 NVC0LoweringPass::handleRDSV(Instruction *i)
 488 {
 489    Symbol *sym = i->getSrc(0)->asSym();
 490    Value *vtx = NULL;
 491    Instruction *ld;
 492    uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym);
 493
 494    if (addr >= 0x400) // mov $sreg
 495       return true;
 496
 497    switch (i->getSrc(0)->reg.data.sv.sv) {
 498    case SV_POSITION:
 499       assert(prog->getType() == Program::TYPE_FRAGMENT);
 500       ld = new_Instruction(func, OP_LINTERP, TYPE_F32);
 501       ld->setDef(0, i->getDef(0));
 502       ld->setSrc(0, bld.mkSymbol(FILE_SHADER_INPUT, 0, TYPE_F32, addr));
 503       ld->setInterpolate(NV50_IR_INTERP_LINEAR);
 504       bld.getBB()->insertAfter(i, ld);
 505       break;
 506    case SV_TESS_COORD:
 507       assert(prog->getType() == Program::TYPE_TESSELLATION_EVAL);
 508       readTessCoord(i->getDef(0)->asLValue(), i->getSrc(0)->reg.data.sv.index);
 509       break;
 510    default:
 511       if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
 512          vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));
 513       ld = bld.mkFetch(i->getDef(0), i->dType,
 514                        FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx);
 515       ld->perPatch = i->perPatch;
 516       break;
 517    }
 518    bld.getBB()->remove(i);
 519    return true;
 520 }
 521
 522 bool
 523 NVC0LoweringPass::handleDIV(Instruction *i)
 524 {
 525    if (!isFloatType(i->dType))
 526       return true;
 527    Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(), i->getSrc(1));
 528    i->op = OP_MUL;
 529    i->setSrc(1, rcp->getDef(0));
 530    return true;
 531 }
 532
 533 bool
 534 NVC0LoweringPass::handleMOD(Instruction *i)
 535 {
 536    if (i->dType != TYPE_F32)
 537       return true;
 538    LValue *value = bld.getScratch();
 539    bld.mkOp1(OP_RCP, TYPE_F32, value, i->getSrc(1));
 540    bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(0), value);
 541    bld.mkOp1(OP_TRUNC, TYPE_F32, value, value);
 542    bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(1), value);
 543    i->op = OP_SUB;
 544    i->setSrc(1, value);
 545    return true;
 546 }
 547
 548 bool
 549 NVC0LoweringPass::handleSQRT(Instruction *i)
 550 {
 551    Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32,
 552                                 bld.getSSA(), i->getSrc(0));
 553    i->op = OP_MUL;
 554    i->setSrc(1, rsq->getDef(0));
 555
 556    return true;
 557 }
 558
 559 bool
 560 NVC0LoweringPass::handlePOW(Instruction *i)
 561 {
 562    LValue *val = bld.getScratch();
 563
 564    bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0));
 565    bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1;
 566    bld.mkOp1(OP_PREEX2, TYPE_F32, val, val);
 567
 568    i->op = OP_EX2;
 569    i->setSrc(0, val);
 570    i->setSrc(1, NULL);
 571
 572    return true;
 573 }
 574
 575 bool
 576 NVC0LoweringPass::handleEXPORT(Instruction *i)
 577 {
 578    if (prog->getType() == Program::TYPE_FRAGMENT) {
 579       int id = i->getSrc(0)->reg.data.offset / 4;
 580
 581       if (i->src[0].isIndirect(0)) // TODO, ugly
 582          return false;
 583       i->op = OP_MOV;
 584       i->src[0].set(i->src[1]);
 585       i->setSrc(1, NULL);
 586       i->setDef(0, new_LValue(func, FILE_GPR));
 587       i->getDef(0)->reg.data.id = id;
 588
 589       prog->maxGPR = MAX2(prog->maxGPR, id);
 590    } else
 591    if (prog->getType() == Program::TYPE_GEOMETRY) {
 592       i->setIndirect(0, 1, gpEmitAddress);
 593    }
 594    return true;
 595 }
 596
 597 bool
 598 NVC0LoweringPass::handleOUT(Instruction *i)
 599 {
 600    if (i->op == OP_RESTART && i->prev && i->prev->op == OP_EMIT) {
 601       i->prev->subOp = NV50_IR_SUBOP_EMIT_RESTART;
 602       delete_Instruction(prog, i);
 603    } else {
 604       assert(gpEmitAddress);
 605       i->setDef(0, gpEmitAddress);
 606       if (i->srcExists(0))
 607          i->setSrc(1, i->getSrc(0));
 608       i->setSrc(0, gpEmitAddress);
 609    }
 610    return true;
 611 }
 612
 613 // Generate a binary predicate if an instruction is predicated by
 614 // e.g. an f32 value.
 615 void
 616 NVC0LoweringPass::checkPredicate(Instruction *insn)
 617 {
 618    Value *pred = insn->getPredicate();
 619    Value *pdst;
 620
 621    if (!pred || pred->reg.file == FILE_PREDICATE)
 622       return;
 623    pdst = new_LValue(func, FILE_PREDICATE);
 624
 625    // CAUTION: don't use pdst->getInsn, the definition might not be unique,
 626    //  delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass
 627
 628    bld.mkCmp(OP_SET, CC_NEU, TYPE_U32, pdst, bld.mkImm(0), pred);
 629
 630    insn->setPredicate(insn->cc, pdst);
 631 }
 632
 633 //
 634 // - add quadop dance for texturing
 635 // - put FP outputs in GPRs
 636 // - convert instruction sequences
 637 //
 638 bool
 639 NVC0LoweringPass::visit(Instruction *i)
 640 {
 641    if (i->prev)
 642       bld.setPosition(i->prev, true);
 643    else
 644    if (i->next)
 645       bld.setPosition(i->next, false);
 646    else
 647       bld.setPosition(i->bb, true);
 648
 649    if (i->cc != CC_ALWAYS)
 650       checkPredicate(i);
 651
 652    switch (i->op) {
 653    case OP_TEX:
 654    case OP_TXB:
 655    case OP_TXL:
 656    case OP_TXF:
 657    case OP_TXQ:
 658    case OP_TXG:
 659       return handleTEX(i->asTex());
 660    case OP_TXD:
 661       return handleTXD(i->asTex());
 662    case OP_EX2:
 663       bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
 664       i->setSrc(0, i->getDef(0));
 665       break;
 666    case OP_POW:
 667       return handlePOW(i);
 668    case OP_DIV:
 669       return handleDIV(i);
 670    case OP_MOD:
 671       return handleMOD(i);
 672    case OP_SQRT:
 673       return handleSQRT(i);
 674    case OP_EXPORT:
 675       return handleEXPORT(i);
 676    case OP_EMIT:
 677    case OP_RESTART:
 678       return handleOUT(i);
 679    case OP_RDSV:
 680       return handleRDSV(i);
 681    case OP_WRSV:
 682       return handleWRSV(i);
 683    case OP_LOAD:
 684       if (i->src[0].getFile() == FILE_SHADER_INPUT) {
 685          i->op = OP_VFETCH;
 686          assert(prog->getType() != Program::TYPE_FRAGMENT);
 687       }
 688       break;
 689    case OP_PINTERP:
 690       if (i->getSrc(0)->reg.data.offset >= 0x280 &&
 691           i->getSrc(0)->reg.data.offset <  0x2c0)
 692          i->setInterpolate(i->getSampleMode() | NV50_IR_INTERP_SC);
 693       break;
 694    case OP_LINTERP:
 695       if (i->getSrc(0)->reg.data.offset == 0x3fc) {
 696          Value *face = i->getDef(0);
 697          bld.setPosition(i, true);
 698          bld.mkOp2(OP_SHL, TYPE_U32, face, face, bld.mkImm(31));
 699          bld.mkOp2(OP_XOR, TYPE_U32, face, face, bld.mkImm(0xbf800000));
 700       }
 701       break;
 702    default:
 703       break;
 704    }
 705    return true;
 706 }
 707
 708 bool
 709 TargetNVC0::runLegalizePass(Program *prog, CGStage stage) const
 710 {
 711    if (stage == CG_STAGE_PRE_SSA) {
 712       NVC0LoweringPass pass(prog);
 713       return pass.run(prog, false, true);
 714    } else
 715    if (stage == CG_STAGE_POST_RA) {
 716       NVC0LegalizePostRA pass;
 717       return pass.run(prog, false, true);
 718    } else
 719    if (stage == CG_STAGE_SSA) {
 720       NVC0LegalizeSSA pass;
 721       return pass.run(prog, false, true);
 722    }
 723    return false;
 724 }
 725
 726 } // namespace nv50_ir