nv50/ir: optimize imul/imad to xmads

author Rhys Perry <pendingchaos02@gmail.com>

Wed, 13 Jun 2018 15:25:23 +0000 (16:25 +0100)

committer Rhys Perry <pendingchaos02@gmail.com>

Mon, 27 Aug 2018 12:56:44 +0000 (13:56 +0100)
author Rhys Perry <pendingchaos02@gmail.com>
Wed, 13 Jun 2018 15:25:23 +0000 (16:25 +0100)
committer Rhys Perry <pendingchaos02@gmail.com>
Mon, 27 Aug 2018 12:56:44 +0000 (13:56 +0100)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp

index 5b4a98d25cb7532968ba15a6153bee8c7e1c85e5..dc7bf24ba238e38b1eff3d63b032f163b4d63bb0 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2301,13 +2301,18 @@ AlgebraicOpt::visit(BasicBlock *bb)
  // =============================================================================
  
  // ADD(SHL(a, b), c) -> SHLADD(a, b, c)
+// MUL(a, b) -> a few XMADs
+// MAD/FMA(a, b, c) -> a few XMADs
  class LateAlgebraicOpt : public Pass
  {
  private:
     virtual bool visit(Instruction *);
  
     void handleADD(Instruction *);
+   void handleMULMAD(Instruction *);
     bool tryADDToSHLADD(Instruction *);
+
+   BuildUtil bld;
  };
  
  void
@@ -2368,6 +2373,52 @@ LateAlgebraicOpt::tryADDToSHLADD(Instruction *add)
     return true;
  }
  
+// MUL(a, b) -> a few XMADs
+// MAD/FMA(a, b, c) -> a few XMADs
+void
+LateAlgebraicOpt::handleMULMAD(Instruction *i)
+{
+   // TODO: handle NV50_IR_SUBOP_MUL_HIGH
+   if (!prog->getTarget()->isOpSupported(OP_XMAD, TYPE_U32))
+      return;
+   if (isFloatType(i->dType) || typeSizeof(i->dType) != 4)
+      return;
+   if (i->subOp || i->usesFlags() || i->flagsDef >= 0)
+      return;
+
+   assert(!i->src(0).mod);
+   assert(!i->src(1).mod);
+   assert(i->op == OP_MUL ? 1 : !i->src(2).mod);
+
+   bld.setPosition(i, false);
+
+   Value *a = i->getSrc(0);
+   Value *b = i->getSrc(1);
+   Value *c = i->op == OP_MUL ? bld.mkImm(0) : i->getSrc(2);
+
+   Value *tmp0 = bld.getSSA();
+   Value *tmp1 = bld.getSSA();
+
+   Instruction *insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp0, b, a, c);
+   insn->setPredicate(i->cc, i->getPredicate());
+
+   insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp1, b, a, bld.mkImm(0));
+   insn->setPredicate(i->cc, i->getPredicate());
+   insn->subOp = NV50_IR_SUBOP_XMAD_MRG | NV50_IR_SUBOP_XMAD_H1(1);
+
+   Value *pred = i->getPredicate();
+   i->setPredicate(i->cc, NULL);
+
+   i->op = OP_XMAD;
+   i->setSrc(0, b);
+   i->setSrc(1, tmp1);
+   i->setSrc(2, tmp0);
+   i->subOp = NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_CBCC;
+   i->subOp |= NV50_IR_SUBOP_XMAD_H1(0) | NV50_IR_SUBOP_XMAD_H1(1);
+
+   i->setPredicate(i->cc, pred);
+}
+
  bool
  LateAlgebraicOpt::visit(Instruction *i)
  {
@@ -2375,6 +2426,11 @@ LateAlgebraicOpt::visit(Instruction *i)
     case OP_ADD:
        handleADD(i);
        break;
+   case OP_MUL:
+   case OP_MAD:
+   case OP_FMA:
+      handleMULMAD(i);
+      break;
     default:
        break;
     }
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp

index de07ad1de89e704a7073766d255f8dbb97231cbe..2e2e40770e137986dd41c970adf6a5ccc56032a3 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
@@ -170,7 +170,6 @@ TargetGM107::isBarrierRequired(const Instruction *insn) const
        }
        break;
     case OPCLASS_ARITH:
-      // TODO: IMUL/IMAD require barriers too, use of XMAD instead!
        if ((insn->op == OP_MUL || insn->op == OP_MAD) &&
            !isFloatType(insn->dType))
           return true;
author	Rhys Perry <pendingchaos02@gmail.com>
	Wed, 13 Jun 2018 15:25:23 +0000 (16:25 +0100)
committer	Rhys Perry <pendingchaos02@gmail.com>
	Mon, 27 Aug 2018 12:56:44 +0000 (13:56 +0100)
src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp		patch \| blob \| history
src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp		patch \| blob \| history