// =============================================================================
// ADD(SHL(a, b), c) -> SHLADD(a, b, c)
+// MUL(a, b) -> a few XMADs
+// MAD/FMA(a, b, c) -> a few XMADs
class LateAlgebraicOpt : public Pass
{
private:
virtual bool visit(Instruction *);
void handleADD(Instruction *);
+ void handleMULMAD(Instruction *);
bool tryADDToSHLADD(Instruction *);
+
+ BuildUtil bld;
};
void
return true;
}
+// MUL(a, b) -> a few XMADs
+// MAD/FMA(a, b, c) -> a few XMADs
+void
+LateAlgebraicOpt::handleMULMAD(Instruction *i)
+{
+ // TODO: handle NV50_IR_SUBOP_MUL_HIGH
+ if (!prog->getTarget()->isOpSupported(OP_XMAD, TYPE_U32))
+ return;
+ if (isFloatType(i->dType) || typeSizeof(i->dType) != 4)
+ return;
+ if (i->subOp || i->usesFlags() || i->flagsDef >= 0)
+ return;
+
+ assert(!i->src(0).mod);
+ assert(!i->src(1).mod);
+ assert(i->op == OP_MUL ? 1 : !i->src(2).mod);
+
+ bld.setPosition(i, false);
+
+ Value *a = i->getSrc(0);
+ Value *b = i->getSrc(1);
+ Value *c = i->op == OP_MUL ? bld.mkImm(0) : i->getSrc(2);
+
+ Value *tmp0 = bld.getSSA();
+ Value *tmp1 = bld.getSSA();
+
+ Instruction *insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp0, b, a, c);
+ insn->setPredicate(i->cc, i->getPredicate());
+
+ insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp1, b, a, bld.mkImm(0));
+ insn->setPredicate(i->cc, i->getPredicate());
+ insn->subOp = NV50_IR_SUBOP_XMAD_MRG | NV50_IR_SUBOP_XMAD_H1(1);
+
+ Value *pred = i->getPredicate();
+ i->setPredicate(i->cc, NULL);
+
+ i->op = OP_XMAD;
+ i->setSrc(0, b);
+ i->setSrc(1, tmp1);
+ i->setSrc(2, tmp0);
+ i->subOp = NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_CBCC;
+ i->subOp |= NV50_IR_SUBOP_XMAD_H1(0) | NV50_IR_SUBOP_XMAD_H1(1);
+
+ i->setPredicate(i->cc, pred);
+}
+
bool
LateAlgebraicOpt::visit(Instruction *i)
{
case OP_ADD:
handleADD(i);
break;
+ case OP_MUL:
+ case OP_MAD:
+ case OP_FMA:
+ handleMULMAD(i);
+ break;
default:
break;
}