#include "codegen/nv50_ir_build_util.h"
#include "codegen/nv50_ir_target_nvc0.h"
+#include "codegen/nv50_ir_lowering_nvc0.h"
#include <limits>
((QOP_##q << 6) | (QOP_##r << 4) | \
(QOP_##s << 2) | (QOP_##t << 0))
-class NVC0LegalizeSSA : public Pass
-{
-private:
- virtual bool visit(BasicBlock *);
- virtual bool visit(Function *);
-
- // we want to insert calls to the builtin library only after optimization
- void handleDIV(Instruction *); // integer division, modulus
- void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
-
-private:
- BuildUtil bld;
-};
-
void
NVC0LegalizeSSA::handleDIV(Instruction *i)
{
return true;
}
-class NVC0LegalizePostRA : public Pass
-{
-public:
- NVC0LegalizePostRA(const Program *);
-
-private:
- virtual bool visit(Function *);
- virtual bool visit(BasicBlock *);
-
- void replaceZero(Instruction *);
- bool tryReplaceContWithBra(BasicBlock *);
- void propagateJoin(BasicBlock *);
-
- struct TexUse
- {
- TexUse(Instruction *use, const Instruction *tex)
- : insn(use), tex(tex), level(-1) { }
- Instruction *insn;
- const Instruction *tex; // or split / mov
- int level;
- };
- struct Limits
- {
- Limits() { }
- Limits(int min, int max) : min(min), max(max) { }
- int min, max;
- };
- bool insertTextureBarriers(Function *);
- inline bool insnDominatedBy(const Instruction *, const Instruction *) const;
- void findFirstUses(const Instruction *tex, const Instruction *def,
- std::list<TexUse>&);
- void findOverwritingDefs(const Instruction *tex, Instruction *insn,
- const BasicBlock *term,
- std::list<TexUse>&);
- void addTexUse(std::list<TexUse>&, Instruction *, const Instruction *);
- const Instruction *recurseDef(const Instruction *);
-
-private:
- LValue *rZero;
- LValue *carry;
- const bool needTexBar;
-};
-
NVC0LegalizePostRA::NVC0LegalizePostRA(const Program *prog)
: rZero(NULL),
carry(NULL),
i->setDef(0, NULL);
if (i->src(0).getFile() == FILE_IMMEDIATE)
i->setSrc(0, rZero); // initial value must be 0
+ replaceZero(i);
} else
if (i->isNop()) {
bb->remove(i);
return true;
}
-class NVC0LoweringPass : public Pass
-{
-public:
- NVC0LoweringPass(Program *);
-
-private:
- virtual bool visit(Function *);
- virtual bool visit(BasicBlock *);
- virtual bool visit(Instruction *);
-
- bool handleRDSV(Instruction *);
- bool handleWRSV(Instruction *);
- bool handleEXPORT(Instruction *);
- bool handleOUT(Instruction *);
- bool handleDIV(Instruction *);
- bool handleMOD(Instruction *);
- bool handleSQRT(Instruction *);
- bool handlePOW(Instruction *);
- bool handleTEX(TexInstruction *);
- bool handleTXD(TexInstruction *);
- bool handleTXQ(TexInstruction *);
- bool handleManualTXD(TexInstruction *);
- bool handleTXLQ(TexInstruction *);
- bool handleATOM(Instruction *);
- bool handleCasExch(Instruction *, bool needCctl);
- void handleSurfaceOpNVE4(TexInstruction *);
-
- void checkPredicate(Instruction *);
-
- void readTessCoord(LValue *dst, int c);
-
- Value *loadResInfo32(Value *ptr, uint32_t off);
- Value *loadMsInfo32(Value *ptr, uint32_t off);
- Value *loadTexHandle(Value *ptr, unsigned int slot);
-
- void adjustCoordinatesMS(TexInstruction *);
- void processSurfaceCoordsNVE4(TexInstruction *);
-
-private:
- const Target *const targ;
-
- BuildUtil bld;
-
- Symbol *gMemBase;
- LValue *gpEmitAddress;
-};
-
NVC0LoweringPass::NVC0LoweringPass(Program *prog) : targ(prog->getTarget())
{
bld.setProgram(prog);
if (i->tex.target.isArray() || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa
+ Value *ticRel = i->getIndirectR();
+ Value *tscRel = i->getIndirectS();
+
+ if (ticRel)
+ i->setSrc(i->tex.rIndirectSrc, NULL);
+ if (tscRel)
+ i->setSrc(i->tex.sIndirectSrc, NULL);
+
Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(lyr) : NULL;
for (int s = dim; s >= 1; --s)
i->setSrc(s, i->getSrc(s - 1));
i->setSrc(0, arrayIndex);
- Value *ticRel = i->getIndirectR();
- Value *tscRel = i->getIndirectS();
-
if (arrayIndex) {
int sat = (i->op == OP_TXF) ? 1 : 0;
DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32;
bld.loadImm(src, 0);
}
- if (ticRel) {
- i->setSrc(i->tex.rIndirectSrc, NULL);
+ if (ticRel)
bld.mkOp3(OP_INSBF, TYPE_U32, src, ticRel, bld.mkImm(0x0917), src);
- }
- if (tscRel) {
- i->setSrc(i->tex.sIndirectSrc, NULL);
+ if (tscRel)
bld.mkOp3(OP_INSBF, TYPE_U32, src, tscRel, bld.mkImm(0x0710), src);
- }
i->setSrc(0, src);
}
assert(chipset >= NVISA_GK104_CHIPSET ||
!i->tex.useOffsets || !i->tex.target.isMS());
- // offset is last source (lod 1st, dc 2nd)
+ // offset is between lod and dc
if (i->tex.useOffsets) {
- uint32_t value = 0;
int n, c;
int s = i->srcCount(0xff, true);
- if (i->srcExists(s)) // move potential predicate out of the way
- i->moveSources(s, 1);
+ if (i->op != OP_TXD || chipset < NVISA_GK104_CHIPSET) {
+ if (i->tex.target.isShadow())
+ s--;
+ if (i->srcExists(s)) // move potential predicate out of the way
+ i->moveSources(s, 1);
+ if (i->tex.useOffsets == 4 && i->srcExists(s + 1))
+ i->moveSources(s + 1, 1);
+ }
if (i->op == OP_TXG) {
- assert(i->tex.useOffsets == 1);
- for (c = 0; c < 3; ++c)
- value |= (i->tex.offset[0][c] & 0xff) << (c * 8);
+ // Either there is 1 offset, which goes into the 2 low bytes of the
+ // first source, or there are 4 offsets, which go into 2 sources (8
+ // values, 1 byte each).
+ Value *offs[2] = {NULL, NULL};
+ for (n = 0; n < i->tex.useOffsets; n++) {
+ for (c = 0; c < 2; ++c) {
+ if ((n % 2) == 0 && c == 0)
+ offs[n / 2] = i->offset[n][c].get();
+ else
+ bld.mkOp3(OP_INSBF, TYPE_U32,
+ offs[n / 2],
+ i->offset[n][c].get(),
+ bld.mkImm(0x800 | ((n * 16 + c * 8) % 32)),
+ offs[n / 2]);
+ }
+ }
+ i->setSrc(s, offs[0]);
+ if (offs[1])
+ i->setSrc(s + 1, offs[1]);
} else {
- for (n = 0; n < i->tex.useOffsets; ++n)
- for (c = 0; c < 3; ++c)
- value |= (i->tex.offset[n][c] & 0xf) << (n * 12 + c * 4);
+ unsigned imm = 0;
+ assert(i->tex.useOffsets == 1);
+ for (c = 0; c < 3; ++c) {
+ ImmediateValue val;
+ assert(i->offset[0][c].getImmediate(val));
+ imm |= (val.reg.data.u32 & 0xf) << (c * 4);
+ }
+ if (i->op == OP_TXD && chipset >= NVISA_GK104_CHIPSET) {
+ // The offset goes into the upper 16 bits of the array index. So
+ // create it if it's not already there, and INSBF it if it already
+ // is.
+ if (i->tex.target.isArray()) {
+ bld.mkOp3(OP_INSBF, TYPE_U32, i->getSrc(0),
+ bld.loadImm(NULL, imm), bld.mkImm(0xc10),
+ i->getSrc(0));
+ } else {
+ for (int s = dim; s >= 1; --s)
+ i->setSrc(s, i->getSrc(s - 1));
+ i->setSrc(0, bld.loadImm(NULL, imm << 16));
+ }
+ } else {
+ i->setSrc(s, bld.loadImm(NULL, imm));
+ }
}
- i->setSrc(s, bld.loadImm(NULL, value));
}
if (chipset >= NVISA_GK104_CHIPSET) {
Value *zero = bld.loadImm(bld.getSSA(), 0);
int l, c;
const int dim = i->tex.target.getDim();
+ const int array = i->tex.target.isArray();
i->op = OP_TEX; // no need to clone dPdx/dPdy later
for (l = 0; l < 4; ++l) {
// mov coordinates from lane l to all lanes
for (c = 0; c < dim; ++c)
- bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero);
+ bld.mkQuadop(0x00, crd[c], l, i->getSrc(c + array), zero);
// add dPdx from lane l to lanes dx
for (c = 0; c < dim; ++c)
bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]);
// texture
bld.insert(tex = cloneForward(func, i));
for (c = 0; c < dim; ++c)
- tex->setSrc(c, crd[c]);
+ tex->setSrc(c + array, crd[c]);
// save results
for (c = 0; i->defExists(c); ++c) {
Instruction *mov;
NVC0LoweringPass::handleTXD(TexInstruction *txd)
{
int dim = txd->tex.target.getDim();
- int arg = txd->tex.target.getArgCount();
+ unsigned arg = txd->tex.target.getArgCount();
+ unsigned expected_args = arg;
+ const int chipset = prog->getTarget()->getChipset();
+
+ if (chipset >= NVISA_GK104_CHIPSET) {
+ if (!txd->tex.target.isArray() && txd->tex.useOffsets)
+ expected_args++;
+ } else {
+ if (txd->tex.useOffsets)
+ expected_args++;
+ }
+
+ if (expected_args > 4 ||
+ dim > 2 ||
+ txd->tex.target.isShadow() ||
+ txd->tex.target.isCube())
+ txd->op = OP_TEX;
handleTEX(txd);
while (txd->srcExists(arg))
++arg;
txd->tex.derivAll = true;
- if (dim > 2 ||
- txd->tex.target.isCube() ||
- arg > 4 ||
- txd->tex.target.isShadow())
+ if (txd->op == OP_TEX)
return handleManualTXD(txd);
+ assert(arg == expected_args);
for (int c = 0; c < dim; ++c) {
txd->setSrc(arg + c * 2 + 0, txd->dPdx[c]);
txd->setSrc(arg + c * 2 + 1, txd->dPdy[c]);
off);
break;
}
+ case SV_SAMPLE_MASK:
+ ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0));
+ ld->subOp = NV50_IR_SUBOP_PIXLD_COVMASK;
+ break;
default:
if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));
bool
NVC0LoweringPass::handleOUT(Instruction *i)
{
- if (i->op == OP_RESTART && i->prev && i->prev->op == OP_EMIT) {
+ Instruction *prev = i->prev;
+ ImmediateValue stream, prevStream;
+
+ // Only merge if the stream ids match. Also, note that the previous
+ // instruction would have already been lowered, so we take arg1 from it.
+ if (i->op == OP_RESTART && prev && prev->op == OP_EMIT &&
+ i->src(0).getImmediate(stream) &&
+ prev->src(1).getImmediate(prevStream) &&
+ stream.reg.data.u32 == prevStream.reg.data.u32) {
i->prev->subOp = NV50_IR_SUBOP_EMIT_RESTART;
delete_Instruction(prog, i);
} else {
assert(gpEmitAddress);
i->setDef(0, gpEmitAddress);
- if (i->srcExists(0))
- i->setSrc(1, i->getSrc(0));
+ i->setSrc(1, i->getSrc(0));
i->setSrc(0, gpEmitAddress);
}
return true;