void
NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
{
- // TODO
+ assert(i->dType == TYPE_F64);
+ // There are instructions that will compute the high 32 bits of the 64-bit
+ // float. We will just stick 0 in the bottom 32 bits.
+
+ bld.setPosition(i, false);
+
+ // 1. Take the source and it up.
+ Value *src[2], *dst[2], *def = i->getDef(0);
+ bld.mkSplit(src, 4, i->getSrc(0));
+
+ // 2. We don't care about the low 32 bits of the destination. Stick a 0 in.
+ dst[0] = bld.loadImm(NULL, 0);
+ dst[1] = bld.getSSA();
+
+ // 3. The new version of the instruction takes the high 32 bits of the
+ // source and outputs the high 32 bits of the destination.
+ i->setSrc(0, src[1]);
+ i->setDef(0, dst[1]);
+ i->setType(TYPE_F32);
+ i->subOp = NV50_IR_SUBOP_RCPRSQ_64H;
+
+ // 4. Recombine the two dst pieces back into the original destination.
+ bld.setPosition(i, true);
+ bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]);
+}
+
+void
+NVC0LegalizeSSA::handleFTZ(Instruction *i)
+{
+ // Only want to flush float inputs
+ assert(i->sType == TYPE_F32);
+
+ // If we're already flushing denorms (and NaN's) to zero, no need for this.
+ if (i->dnz)
+ return;
+
+ // Only certain classes of operations can flush
+ OpClass cls = prog->getTarget()->getOpClass(i->op);
+ if (cls != OPCLASS_ARITH && cls != OPCLASS_COMPARE &&
+ cls != OPCLASS_CONVERT)
+ return;
+
+ i->ftz = true;
}
bool
Instruction *next;
for (Instruction *i = bb->getEntry(); i; i = next) {
next = i->next;
- if (i->dType == TYPE_F32)
+ if (i->sType == TYPE_F32) {
+ if (prog->getType() != Program::TYPE_COMPUTE)
+ handleFTZ(i);
continue;
+ }
switch (i->op) {
case OP_DIV:
case OP_MOD:
void
NVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses,
- Instruction *usei, const Instruction *insn)
+ Instruction *usei, const Instruction *texi)
{
bool add = true;
for (std::list<TexUse>::iterator it = uses.begin();
++it;
}
if (add)
- uses.push_back(TexUse(usei, insn));
+ uses.push_back(TexUse(usei, texi));
}
void
while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0)))
insn = insn->getSrc(0)->getUniqueInsn();
- if (!insn || !insn->bb->reachableBy(texi->bb, term))
+ // NOTE: the tex itself is, of course, not an overwriting definition
+ if (insn == texi || !insn->bb->reachableBy(texi->bb, term))
return;
switch (insn->op) {
for (Value::UseIterator u = v->uses.begin(); u != v->uses.end(); ++u) {
Instruction *usei = (*u)->getInsn();
- /* XXX HACK ALERT XXX
- *
- * This shouldn't have to be here, we should always be making forward
- * progress by looking at the uses. However this somehow does not
- * appear to be the case. Probably because this is being done right
- * after RA, when the defs/uses lists have been messed with by node
- * merging. This should probably be moved to being done right before
- * RA. But this will do for now.
- */
+ // NOTE: In case of a loop that overwrites a value but never uses
+ // it, it can happen that we have a cycle of uses that consists only
+ // of phis and no-op moves and will thus cause an infinite loop here
+ // since these are not considered actual uses.
+ // The most obvious (and perhaps the only) way to prevent this is to
+ // remember which instructions we've already visited.
+
if (visited.find(usei) != visited.end())
continue;
visited.insert(usei);
if (usei->op == OP_PHI || usei->op == OP_UNION) {
- // need a barrier before WAW cases
+ // need a barrier before WAW cases, like:
+ // %r0 = tex
+ // if ...
+ // texbar <- is required or tex might replace x again
+ // %r1 = x <- overwriting def
+ // %r2 = phi %r0, %r1
for (int s = 0; usei->srcExists(s); ++s) {
Instruction *defi = usei->getSrc(s)->getUniqueInsn();
if (defi && &usei->src(s) != *u)
usei->subOp != NV50_IR_SUBOP_MOV_FINAL) {
findFirstUses(texi, usei, uses, visited);
} else {
- addTexUse(uses, usei, insn);
+ addTexUse(uses, usei, texi);
}
}
}
}
}
delete[] uses;
- uses = NULL;
// insert the barriers
for (size_t i = 0; i < useVec.size(); ++i) {
}
}
- if (fn->getProgram()->optLevel < 3) {
- if (uses)
- delete[] uses;
+ if (fn->getProgram()->optLevel < 3)
return true;
- }
std::vector<Limits> limitT, limitB, limitS; // entry, exit, single
prev = i;
}
}
- if (uses)
- delete[] uses;
return true;
}
// lod bias
// depth compare
// offsets (same as fermi, except txd which takes it with array)
+ //
+ // Maxwell (tex):
+ // array
+ // coords
+ // indirect handle
+ // sample
+ // lod bias
+ // depth compare
+ // offsets
+ //
+ // Maxwell (txd):
+ // indirect handle
+ // coords
+ // array + offsets
+ // derivatives
if (chipset >= NVISA_GK104_CHIPSET) {
if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
const int sat = (i->op == OP_TXF) ? 1 : 0;
DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32;
bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat;
- for (int s = dim; s >= 1; --s)
- i->setSrc(s, i->getSrc(s - 1));
- i->setSrc(0, layer);
+ if (i->op != OP_TXD || chipset < NVISA_GM107_CHIPSET) {
+ for (int s = dim; s >= 1; --s)
+ i->setSrc(s, i->getSrc(s - 1));
+ i->setSrc(0, layer);
+ } else {
+ i->setSrc(dim, layer);
+ }
}
// Move the indirect reference to the first place
- if (i->tex.rIndirectSrc >= 0) {
+ if (i->tex.rIndirectSrc >= 0 && (
+ i->op == OP_TXD || chipset < NVISA_GM107_CHIPSET)) {
Value *hnd = i->getIndirectR();
i->setIndirectR(NULL);
assert(i->tex.useOffsets == 1);
for (c = 0; c < 3; ++c) {
ImmediateValue val;
- assert(i->offset[0][c].getImmediate(val));
+ if (!i->offset[0][c].getImmediate(val))
+ assert(!"non-immediate offset passed to non-TXG");
imm |= (val.reg.data.u32 & 0xf) << (c * 4);
}
if (i->op == OP_TXD && chipset >= NVISA_GK104_CHIPSET) {
// create it if it's not already there, and INSBF it if it already
// is.
s = (i->tex.rIndirectSrc >= 0) ? 1 : 0;
+ if (chipset >= NVISA_GM107_CHIPSET)
+ s += dim;
if (i->tex.target.isArray()) {
- bld.mkOp3(OP_INSBF, TYPE_U32, i->getSrc(0),
+ bld.mkOp3(OP_INSBF, TYPE_U32, i->getSrc(s),
bld.loadImm(NULL, imm), bld.mkImm(0xc10),
i->getSrc(s));
} else {
bool
NVC0LoweringPass::handleTXQ(TexInstruction *txq)
{
- // TODO: indirect resource/sampler index
+ if (txq->tex.rIndirectSrc < 0)
+ return true;
+
+ Value *ticRel = txq->getIndirectR();
+ const int chipset = prog->getTarget()->getChipset();
+
+ txq->setIndirectS(NULL);
+ txq->tex.sIndirectSrc = -1;
+
+ assert(ticRel);
+
+ if (chipset < NVISA_GK104_CHIPSET) {
+ LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa
+
+ txq->setSrc(txq->tex.rIndirectSrc, NULL);
+ if (txq->tex.r)
+ ticRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(),
+ ticRel, bld.mkImm(txq->tex.r));
+
+ bld.mkOp2(OP_SHL, TYPE_U32, src, ticRel, bld.mkImm(0x17));
+
+ txq->moveSources(0, 1);
+ txq->setSrc(0, src);
+ } else {
+ Value *hnd = loadTexHandle(
+ bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
+ txq->getIndirectR(), bld.mkImm(2)),
+ txq->tex.r);
+ txq->tex.r = 0xff;
+ txq->tex.s = 0x1f;
+
+ if (chipset < NVISA_GM107_CHIPSET) {
+ txq->setIndirectR(NULL);
+ txq->moveSources(0, 1);
+ txq->setSrc(0, hnd);
+ txq->tex.rIndirectSrc = 0;
+ } else {
+ txq->setIndirectR(hnd);
+ }
+ }
+
return true;
}
Value *face = i->getDef(0);
bld.mkInterp(NV50_IR_INTERP_FLAT, face, addr, NULL);
if (i->dType == TYPE_F32) {
- bld.mkOp2(OP_AND, TYPE_U32, face, face, bld.mkImm(0x80000000));
- bld.mkOp2(OP_XOR, TYPE_U32, face, face, bld.mkImm(0xbf800000));
+ bld.mkOp2(OP_OR, TYPE_U32, face, face, bld.mkImm(0x00000001));
+ bld.mkOp1(OP_NEG, TYPE_S32, face, face);
+ bld.mkCvt(OP_CVT, TYPE_F32, face, TYPE_S32, face);
}
}
break;
if (!isFloatType(i->dType))
return true;
bld.setPosition(i, false);
- Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(), i->getSrc(1));
+ Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(typeSizeof(i->dType)), i->getSrc(1));
i->op = OP_MUL;
i->setSrc(1, rcp->getDef(0));
return true;
bool
NVC0LoweringPass::handleMOD(Instruction *i)
{
- if (i->dType != TYPE_F32)
+ if (!isFloatType(i->dType))
return true;
- LValue *value = bld.getScratch();
- bld.mkOp1(OP_RCP, TYPE_F32, value, i->getSrc(1));
- bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(0), value);
- bld.mkOp1(OP_TRUNC, TYPE_F32, value, value);
- bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(1), value);
+ LValue *value = bld.getScratch(typeSizeof(i->dType));
+ bld.mkOp1(OP_RCP, i->dType, value, i->getSrc(1));
+ bld.mkOp2(OP_MUL, i->dType, value, i->getSrc(0), value);
+ bld.mkOp1(OP_TRUNC, i->dType, value, value);
+ bld.mkOp2(OP_MUL, i->dType, value, i->getSrc(1), value);
i->op = OP_SUB;
i->setSrc(1, value);
return true;
bool
NVC0LoweringPass::handleSQRT(Instruction *i)
{
- Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32,
- bld.getSSA(), i->getSrc(0));
+ Value *pred = bld.getSSA(1, FILE_PREDICATE);
+ Value *zero = bld.getSSA();
+ Instruction *rsq;
+
+ bld.mkOp1(OP_MOV, TYPE_U32, zero, bld.mkImm(0));
+ if (i->dType == TYPE_F64)
+ zero = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), zero, zero);
+ bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero);
+ bld.mkOp1(OP_MOV, i->dType, i->getDef(0), zero)->setPredicate(CC_P, pred);
+ rsq = bld.mkOp1(OP_RSQ, i->dType,
+ bld.getSSA(typeSizeof(i->dType)), i->getSrc(0));
+ rsq->setPredicate(CC_NOT_P, pred);
i->op = OP_MUL;
i->setSrc(1, rsq->getDef(0));
+ i->setPredicate(CC_NOT_P, pred);
+
return true;
}
Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
i->getIndirect(0, 0), bld.mkImm(4));
i->setIndirect(0, 0, ptr);
+ i->op = OP_VFETCH;
} else {
i->op = OP_VFETCH;
assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP