nv50/ir: allow OP_SET to merge with OP_SET_AND/etc as well as a neg

author Ilia Mirkin <imirkin@alum.mit.edu>

Sat, 9 May 2015 03:46:53 +0000 (23:46 -0400)

committer Ilia Mirkin <imirkin@alum.mit.edu>

Fri, 22 May 2015 20:51:05 +0000 (16:51 -0400)
author Ilia Mirkin <imirkin@alum.mit.edu>
Sat, 9 May 2015 03:46:53 +0000 (23:46 -0400)
committer Ilia Mirkin <imirkin@alum.mit.edu>
Fri, 22 May 2015 20:51:05 +0000 (16:51 -0400)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp

index 82e81482b5107ce83b9ead5592099cd18369b0f7..72dd31efecc5c2f70927c2af36283c7007bfae08 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -278,7 +278,6 @@ private:
  
     void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&);
  
-   // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET
     CmpInstruction *findOriginForTestWithZero(Value *);
  
     unsigned int foldCount;
@@ -337,25 +336,33 @@ ConstantFolding::findOriginForTestWithZero(Value *value)
        return NULL;
     Instruction *insn = value->getInsn();
  
-   while (insn && insn->op != OP_SET) {
-      Instruction *next = NULL;
-      switch (insn->op) {
-      case OP_NEG:
-      case OP_ABS:
-      case OP_CVT:
-         next = insn->getSrc(0)->getInsn();
-         if (insn->sType != next->dType)
+   if (insn->asCmp() && insn->op != OP_SLCT)
+      return insn->asCmp();
+
+   /* Sometimes mov's will sneak in as a result of other folding. This gets
+    * cleaned up later.
+    */
+   if (insn->op == OP_MOV)
+      return findOriginForTestWithZero(insn->getSrc(0));
+
+   /* Deal with AND 1.0 here since nv50 can't fold into boolean float */
+   if (insn->op == OP_AND) {
+      int s = 0;
+      ImmediateValue imm;
+      if (!insn->src(s).getImmediate(imm)) {
+         s = 1;
+         if (!insn->src(s).getImmediate(imm))
              return NULL;
-         break;
-      case OP_MOV:
-         next = insn->getSrc(0)->getInsn();
-         break;
-      default:
-         return NULL;
        }
-      insn = next;
+      if (imm.reg.data.f32 != 1.0f)
+         return NULL;
+      /* TODO: Come up with a way to handle the condition being inverted */
+      if (insn->src(!s).mod != Modifier(0))
+         return NULL;
+      return findOriginForTestWithZero(insn->getSrc(!s));
     }
-   return insn ? insn->asCmp() : NULL;
+
+   return NULL;
  }
  
  void
@@ -946,29 +953,51 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
  
     case OP_SET: // TODO: SET_AND,OR,XOR
     {
+      /* This optimizes the case where the output of a set is being compared
+       * to zero. Since the set can only produce 0/-1 (int) or 0/1 (float), we
+       * can be a lot cleverer in our comparison.
+       */
        CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
        CondCode cc, ccZ;
-      if (i->src(t).mod != Modifier(0))
-         return;
-      if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET)
+      if (imm0.reg.data.u32 != 0 || !si)
           return;
        cc = si->setCond;
        ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
+      // We do everything assuming var (cmp) 0, reverse the condition if 0 is
+      // first.
        if (s == 0)
           ccZ = reverseCondCode(ccZ);
+      // If there is a negative modifier, we need to undo that, by flipping
+      // the comparison to zero.
+      if (i->src(t).mod.neg())
+         ccZ = reverseCondCode(ccZ);
+      // If this is a signed comparison, we expect the input to be a regular
+      // boolean, i.e. 0/-1. However the rest of the logic assumes that true
+      // is positive, so just flip the sign.
+      if (i->sType == TYPE_S32) {
+         assert(!isFloatType(si->dType));
+         ccZ = reverseCondCode(ccZ);
+      }
        switch (ccZ) {
-      case CC_LT: cc = CC_FL; break;
-      case CC_GE: cc = CC_TR; break;
-      case CC_EQ: cc = inverseCondCode(cc); break;
-      case CC_LE: cc = inverseCondCode(cc); break;
-      case CC_GT: break;
-      case CC_NE: break;
+      case CC_LT: cc = CC_FL; break; // bool < 0 -- this is never true
+      case CC_GE: cc = CC_TR; break; // bool >= 0 -- this is always true
+      case CC_EQ: cc = inverseCondCode(cc); break; // bool == 0 -- !bool
+      case CC_LE: cc = inverseCondCode(cc); break; // bool <= 0 -- !bool
+      case CC_GT: break; // bool > 0 -- bool
+      case CC_NE: break; // bool != 0 -- bool
        default:
           return;
        }
+
+      // Update the condition of this SET to be identical to the origin set,
+      // but with the updated condition code. The original SET should get
+      // DCE'd, ideally.
+      i->op = si->op;
        i->asCmp()->setCond = cc;
        i->setSrc(0, si->src(0));
        i->setSrc(1, si->src(1));
+      if (si->srcExists(2))
+         i->setSrc(2, si->src(2));
        i->sType = si->sType;
     }
        break;
author	Ilia Mirkin <imirkin@alum.mit.edu>
	Sat, 9 May 2015 03:46:53 +0000 (23:46 -0400)
committer	Ilia Mirkin <imirkin@alum.mit.edu>
	Fri, 22 May 2015 20:51:05 +0000 (16:51 -0400)