nv50/ir: combine sequences of conversions

author Ilia Mirkin <imirkin@alum.mit.edu>

Thu, 10 Dec 2015 20:24:47 +0000 (15:24 -0500)

committer Ilia Mirkin <imirkin@alum.mit.edu>

Sat, 12 Dec 2015 23:10:16 +0000 (18:10 -0500)
author Ilia Mirkin <imirkin@alum.mit.edu>
Thu, 10 Dec 2015 20:24:47 +0000 (15:24 -0500)
committer Ilia Mirkin <imirkin@alum.mit.edu>
Sat, 12 Dec 2015 23:10:16 +0000 (18:10 -0500)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp

index 59c5c1ccba1d7f596e4b71c3740634f07ce35e66..022626ccb8fb51063055e19c7c0d41339444f358 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1501,6 +1501,7 @@ private:
     void handleSLCT(Instruction *);
     void handleLOGOP(Instruction *);
     void handleCVT_NEG(Instruction *);
+   void handleCVT_CVT(Instruction *);
     void handleCVT_EXTBF(Instruction *);
     void handleSUCLAMP(Instruction *);
  
@@ -1792,6 +1793,47 @@ AlgebraicOpt::handleCVT_NEG(Instruction *cvt)
     delete_Instruction(prog, cvt);
  }
  
+// F2I(TRUNC()) and so on can be expressed as a single CVT. If the earlier CVT
+// does a type conversion, this becomes trickier as there might be range
+// changes/etc. We could handle those in theory as long as the range was being
+// reduced or kept the same.
+void
+AlgebraicOpt::handleCVT_CVT(Instruction *cvt)
+{
+   Instruction *insn = cvt->getSrc(0)->getInsn();
+   RoundMode rnd = insn->rnd;
+
+   if (insn->saturate ||
+       insn->subOp ||
+       insn->dType != insn->sType ||
+       insn->dType != cvt->sType)
+      return;
+
+   switch (insn->op) {
+   case OP_CEIL:
+      rnd = ROUND_PI;
+      break;
+   case OP_FLOOR:
+      rnd = ROUND_MI;
+      break;
+   case OP_TRUNC:
+      rnd = ROUND_ZI;
+      break;
+   case OP_CVT:
+      break;
+   default:
+      return;
+   }
+
+   if (!isFloatType(cvt->dType) || !isFloatType(insn->sType))
+      rnd = (RoundMode)(rnd & 3);
+
+   cvt->rnd = rnd;
+   cvt->setSrc(0, insn->getSrc(0));
+   cvt->src(0).mod *= insn->src(0).mod;
+   cvt->sType = insn->sType;
+}
+
  // Some shaders extract packed bytes out of words and convert them to
  // e.g. float. The Fermi+ CVT instruction can extract those directly, as can
  // nv50 for word sizes.
@@ -1961,6 +2003,7 @@ AlgebraicOpt::visit(BasicBlock *bb)
           break;
        case OP_CVT:
           handleCVT_NEG(i);
+         handleCVT_CVT(i);
           if (prog->getTarget()->isOpSupported(OP_EXTBF, TYPE_U32))
               handleCVT_EXTBF(i);
           break;
author	Ilia Mirkin <imirkin@alum.mit.edu>
	Thu, 10 Dec 2015 20:24:47 +0000 (15:24 -0500)
committer	Ilia Mirkin <imirkin@alum.mit.edu>
	Sat, 12 Dec 2015 23:10:16 +0000 (18:10 -0500)