nv50/ir/opt: improve post-multiply and check target for support

author Christoph Bumiller <e0425955@student.tuwien.ac.at>

Mon, 9 Apr 2012 18:34:24 +0000 (20:34 +0200)

committer Christoph Bumiller <e0425955@student.tuwien.ac.at>

Sat, 14 Apr 2012 19:54:00 +0000 (21:54 +0200)
author Christoph Bumiller <e0425955@student.tuwien.ac.at>
Mon, 9 Apr 2012 18:34:24 +0000 (20:34 +0200)
committer Christoph Bumiller <e0425955@student.tuwien.ac.at>
Sat, 14 Apr 2012 19:54:00 +0000 (21:54 +0200)
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp

index 9ffc75c685abcc02e8da4f96234ae112934d7853..bf648dddb84be56e2647f196ab660cabf58977af 100644 (file)
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
@@ -216,6 +216,8 @@ private:
  
     void unary(Instruction *, const ImmediateValue&);
  
+   void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&);
+
     // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET
     CmpInstruction *findOriginForTestWithZero(Value *);
  
@@ -527,6 +529,73 @@ ConstantFolding::unary(Instruction *i, const ImmediateValue &imm)
     i->src[0].mod = Modifier(0);
  }
  
+void
+ConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
+                                        const int s, ImmediateValue& imm2)
+{
+   const int t = s ? 0 : 1;
+   Instruction *insn;
+   Instruction *mul1 = NULL; // mul1 before mul2
+   int e = 0;
+   float f = imm2.reg.data.f32;
+
+   assert(mul2->op == OP_MUL && mul2->dType == TYPE_F32);
+
+   if (mul2->getSrc(t)->refCount() == 1) {
+      insn = mul2->getSrc(t)->getInsn();
+      if (insn->op == OP_MUL && insn->dType == TYPE_F32)
+         mul1 = insn;
+      if (mul1) {
+         int s1 = 0;
+         ImmediateValue *imm = mul1->src[s1].getImmediate();
+         if (!imm) {
+            s1 = 1;
+            imm = mul1->src[s1].getImmediate();
+         }
+         if (imm) {
+            bld.setPosition(mul1, false);
+            // a = mul r, imm1
+            // d = mul a, imm2 -> d = mul r, (imm1 * imm2)
+            ImmediateValue imm1(mul1->src[s1].getImmediate(), TYPE_F32);
+            mul1->src[s1].mod.applyTo(imm1);
+            mul1->src[s1].mod = Modifier(0);
+            mul1->setSrc(s1, bld.loadImm(NULL, f * imm1.reg.data.f32));
+            mul2->def[0].replace(mul1->getDef(0), false);
+         } else
+         if (prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) {
+            // c = mul a, b
+            // d = mul c, imm   -> d = mul_x_imm a, b
+            mul1->postFactor = e;
+            mul2->def[0].replace(mul1->getDef(0), false);
+            if (f < 0)
+               mul1->src[0].mod = mul1->src[0].mod ^ Modifier(NV50_IR_MOD_NEG);
+         }
+         return;
+      }
+   }
+   if (mul2->getDef(0)->refCount() == 1) {
+      // b = mul a, imm
+      // d = mul b, c   -> d = mul_x_imm a, c
+      int s2, t2;
+      insn = mul2->getDef(0)->uses->getInsn();
+      if (!insn)
+         return;
+      mul1 = mul2;
+      mul2 = NULL;
+      s2 = insn->getSrc(0) == mul1->getDef(0) ? 0 : 1;
+      t2 = s2 ? 0 : 1;
+      if (insn->op == OP_MUL && insn->dType == TYPE_F32)
+         if (!insn->src[t2].getImmediate())
+            mul2 = insn;
+      if (mul2 && prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) {
+         mul2->postFactor = e;
+         mul2->setSrc(s2, mul1->src[t]);
+         if (f < 0)
+            mul2->src[s2].mod = mul2->src[s2].mod ^ Modifier(NV50_IR_MOD_NEG);
+      }
+   }
+}
+
  void
  ConstantFolding::opnd(Instruction *i, ImmediateValue *src, int s)
  {
@@ -539,41 +608,9 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue *src, int s)
  
     switch (i->op) {
     case OP_MUL:
-      if (i->dType == TYPE_F32 && i->getSrc(t)->refCount() == 1) {
-         Instruction *si = i->getSrc(t)->getUniqueInsn();
-
-         if (si && si->op == OP_MUL) {
-            float f = imm.reg.data.f32;
-
-            if (si->src[1].getImmediate()) {
-               f *= si->src[1].getImmediate()->reg.data.f32;
-               si->setSrc(1, new_ImmediateValue(prog, f));
-               i->def[0].replace(i->getSrc(t), false);
-               break;
-            } else {
-               int fac;
-               if (f == 0.125f) fac = -3;
-               else
-               if (f == 0.250f) fac = -2;
-               else
-               if (f == 0.500f) fac = -1;
-               else
-               if (f == 2.000f) fac = +1;
-               else
-               if (f == 4.000f) fac = +2;
-               else
-               if (f == 8.000f) fac = +3;
-               else
-                  fac = 0;
-               if (fac) {
-                  // FIXME: allowed & modifier
-                  si->postFactor = fac;
-                  i->def[0].replace(i->getSrc(t), false);
-                  break;
-               }
-            }
-         }
-      }
+      if (i->dType == TYPE_F32)
+         tryCollapseChainedMULs(i, s, imm);
+
        if (imm.isInteger(0)) {
           i->op = OP_MOV;
           i->setSrc(0, i->getSrc(s));
@@ -905,6 +942,9 @@ AlgebraicOpt::handleADD(Instruction *add)
  
     src = add->getSrc(s);
  
+   if (src->getInsn()->postFactor)
+      return;
+
     mod[0] = add->src[0].mod;
     mod[1] = add->src[1].mod;
     mod[2] = src->getUniqueInsn()->src[0].mod;
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target.h b/src/gallium/drivers/nv50/codegen/nv50_ir_target.h

index b685eca0f1d8ef5a01a2f41128b12dc7b6c556e9..6640198f09023790e0ea09d36f13a84f8f8c73e2 100644 (file)
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_target.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target.h
@@ -142,6 +142,8 @@ public:
     virtual bool isModSupported(const Instruction *,
                                 int s, Modifier) const = 0;
     virtual bool isSatSupported(const Instruction *) const = 0;
+   virtual bool isPostMultiplySupported(operation op, float f,
+                                        int& e) const { return false; }
     virtual bool mayPredicate(const Instruction *,
                               const Value *) const = 0;
  
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp

index bcc2c43a55fd41bfe5e912171bb127e0662c80a0..6fe95c6693f4c551ae670c9c3d7bdd2cd557ea1e 100644 (file)
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
@@ -516,6 +516,18 @@ TargetNVC0::isSatSupported(const Instruction *insn) const
     return insn->dType == TYPE_F32;
  }
  
+bool
+TargetNVC0::isPostMultiplySupported(operation op, float f, int& e) const
+{
+   if (op != OP_MUL)
+      return false;
+   f = fabsf(f);
+   e = static_cast<int>(log2f(f));
+   if (e < -3 || e > 3)
+      return false;
+   return f == exp2f(static_cast<float>(e));
+}
+
  // TODO: better values
  int TargetNVC0::getLatency(const Instruction *i) const
  {
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h

index 1a49f3699206eae8156ac3ac8e2cd7c63195a1ee..e4efe476de0544f9f3b89016ea4bab2cf53667a0 100644 (file)
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h
@@ -48,6 +48,7 @@ public:
     virtual bool isAccessSupported(DataFile, DataType) const;
     virtual bool isModSupported(const Instruction *, int s, Modifier) const;
     virtual bool isSatSupported(const Instruction *) const;
+   virtual bool isPostMultiplySupported(operation, float, int& e) const;
     virtual bool mayPredicate(const Instruction *, const Value *) const;
  
     virtual int getLatency(const Instruction *) const;
author	Christoph Bumiller <e0425955@student.tuwien.ac.at>
	Mon, 9 Apr 2012 18:34:24 +0000 (20:34 +0200)
committer	Christoph Bumiller <e0425955@student.tuwien.ac.at>
	Sat, 14 Apr 2012 19:54:00 +0000 (21:54 +0200)
src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp		patch \| blob \| history
src/gallium/drivers/nv50/codegen/nv50_ir_target.h		patch \| blob \| history
src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp		patch \| blob \| history
src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h		patch \| blob \| history