return;
if (insn->src(1).getFile() != FILE_GPR)
return;
+ // This is the special OP_SET used for alphatesting, we can't reverse its
+ // arguments as that will confuse the fixup code.
+ if (insn->op == OP_SET && insn->subOp)
+ return;
Instruction *i0 = insn->getSrc(0)->getInsn();
Instruction *i1 = insn->getSrc(1)->getInsn();
void expr(Instruction *, ImmediateValue&, ImmediateValue&);
void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&);
void opnd(Instruction *, ImmediateValue&, int s);
+ void opnd3(Instruction *, ImmediateValue&);
void unary(Instruction *, const ImmediateValue&);
else
if (i->srcExists(1) && i->src(1).getImmediate(src1))
opnd(i, src1, 1);
+ if (i->srcExists(2) && i->src(2).getImmediate(src2))
+ opnd3(i, src2);
}
return true;
}
}
}
+void
+ConstantFolding::opnd3(Instruction *i, ImmediateValue &imm2)
+{
+ switch (i->op) {
+ case OP_MAD:
+ case OP_FMA:
+ if (imm2.isInteger(0)) {
+ i->op = OP_MUL;
+ i->setSrc(2, NULL);
+ foldCount++;
+ return;
+ }
+ break;
+ default:
+ return;
+ }
+}
+
void
ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
{
void handleCVT_CVT(Instruction *);
void handleCVT_EXTBF(Instruction *);
void handleSUCLAMP(Instruction *);
+ void handleNEG(Instruction *);
BuildUtil bld;
};
if (src->getUniqueInsn() && src->getUniqueInsn()->bb != add->bb)
return false;
- if (src->getInsn()->postFactor)
+ if (src->getInsn()->saturate || src->getInsn()->postFactor ||
+ src->getInsn()->dnz)
return false;
+
if (toOp == OP_SAD) {
ImmediateValue imm;
if (!src->getInsn()->src(2).getImmediate(imm))
insn->setSrc(0, add->getSrc(s));
}
+// NEG(AND(SET, 1)) -> SET
+void
+AlgebraicOpt::handleNEG(Instruction *i) {
+ Instruction *src = i->getSrc(0)->getInsn();
+ ImmediateValue imm;
+ int b;
+
+ if (isFloatType(i->sType) || !src || src->op != OP_AND)
+ return;
+
+ if (src->src(0).getImmediate(imm))
+ b = 1;
+ else if (src->src(1).getImmediate(imm))
+ b = 0;
+ else
+ return;
+
+ if (!imm.isInteger(1))
+ return;
+
+ Instruction *set = src->getSrc(b)->getInsn();
+ if ((set->op == OP_SET || set->op == OP_SET_AND ||
+ set->op == OP_SET_OR || set->op == OP_SET_XOR) &&
+ !isFloatType(set->dType)) {
+ i->def(0).replace(set->getDef(0), false);
+ }
+}
+
bool
AlgebraicOpt::visit(BasicBlock *bb)
{
case OP_SUCLAMP:
handleSUCLAMP(i);
break;
+ case OP_NEG:
+ handleNEG(i);
+ break;
default:
break;
}
if (((size == 0x8) && (MIN2(offLd, offRc) & 0x7)) ||
((size == 0xc) && (MIN2(offLd, offRc) & 0xf)))
return false;
+ // for compute indirect loads are not guaranteed to be aligned
+ if (prog->getType() == Program::TYPE_COMPUTE && rec->rel[0])
+ return false;
assert(sizeRc + sizeLd <= 16 && offRc != offLd);
if (!prog->getTarget()->
isAccessSupported(st->getSrc(0)->reg.file, typeOfSize(size)))
return false;
+ // no unaligned stores
if (size == 8 && MIN2(offRc, offSt) & 0x7)
return false;
+ // for compute indirect stores are not guaranteed to be aligned
+ if (prog->getType() == Program::TYPE_COMPUTE && rec->rel[0])
+ return false;
st->takeExtraSources(0, extra); // save predicate and indirect address
// get non-replaced sources after values covered by st
for (; offR < endR; offR += ri->getSrc(s)->reg.size, ++s)
vals[k++] = ri->getSrc(s);
- assert((unsigned int)k <= Elements(vals));
+ assert((unsigned int)k <= ARRAY_SIZE(vals));
for (s = 0; s < k; ++s)
st->setSrc(s + 1, vals[s]);
st->setSrc(0, ri->getSrc(0));
}
} else
if (ldst->op == OP_STORE || ldst->op == OP_EXPORT) {
+ if (typeSizeof(ldst->dType) == 4 &&
+ ldst->src(1).getFile() == FILE_GPR &&
+ ldst->getSrc(1)->getInsn()->op == OP_NOP) {
+ delete_Instruction(prog, ldst);
+ continue;
+ }
isLoad = false;
} else {
// TODO: maybe have all fixed ops act as barrier ?
!isSurfaceOp(insn->op) && // not confirmed
insn->op != OP_LINTERP && // probably just nve4
insn->op != OP_PINTERP && // probably just nve4
- ((insn->op != OP_LOAD && insn->op != OP_STORE) ||
+ ((insn->op != OP_LOAD && insn->op != OP_STORE && insn->op != OP_ATOM) ||
(typeSizeof(insn->dType) <= 4 && !insn->src(0).isIndirect(0))) &&
!insn->isNop()) {
insn->join = 1;
if (that->srcExists(s))
return false;
- if (op == OP_LOAD || op == OP_VFETCH) {
+ if (op == OP_LOAD || op == OP_VFETCH || op == OP_ATOM) {
switch (src(0).getFile()) {
case FILE_MEMORY_CONST:
case FILE_SHADER_INPUT:
ik = phi->getSrc(0)->getInsn();
if (!ik)
continue; // probably a function input
+ if (ik->defCount(0xff) > 1)
+ continue; // too painful to check if we can really push this forward
for (s = 1; phi->srcExists(s); ++s) {
if (phi->getSrc(s)->refCount() > 1)
break;
bool
DeadCodeElim::visit(BasicBlock *bb)
{
- Instruction *next;
+ Instruction *prev;
- for (Instruction *i = bb->getFirst(); i; i = next) {
- next = i->next;
+ for (Instruction *i = bb->getExit(); i; i = prev) {
+ prev = i->prev;
if (i->isDead()) {
++deadCount;
delete_Instruction(prog, i);
} else
- if (i->defExists(1) && (i->op == OP_VFETCH || i->op == OP_LOAD)) {
+ if (i->defExists(1) &&
+ i->subOp == 0 &&
+ (i->op == OP_VFETCH || i->op == OP_LOAD)) {
checkSplitLoad(i);
} else
if (i->defExists(0) && !i->getDef(0)->refCount()) {
if (i->op == OP_ATOM ||
i->op == OP_SUREDP ||
- i->op == OP_SUREDB)
+ i->op == OP_SUREDB) {
i->setDef(0, NULL);
+ } else if (i->op == OP_LOAD && i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
+ i->setDef(0, i->getDef(1));
+ i->setDef(1, NULL);
+ }
}
}
return true;