nv50/ir: optimize shl(shr(a, c), c) to and(a, ~((1 << c) - 1))
authorKarol Herbst <nouveau@karolherbst.de>
Wed, 27 Jan 2016 17:25:05 +0000 (18:25 +0100)
committerIlia Mirkin <imirkin@alum.mit.edu>
Thu, 28 Jan 2016 20:34:22 +0000 (15:34 -0500)
Following shader-db results on GK110:

total instructions in shared programs : 6141510 -> 6131491 (-0.16%)
total gprs used in shared programs    : 910187 -> 910157 (-0.00%)
total local used in shared programs   : 15328 -> 15328 (0.00%)

                local        gpr       inst      bytes
    helped           0          18         821         821
      hurt           0           0           0           0

Signed-off-by: Karol Herbst <nouveau@karolherbst.de>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp

index 95e9fdfc57da5fd5b7374af4475d421a743c7415..b2c9fdfc7a463ff6c9e1168434c8a9e4d2c72b5c 100644 (file)
@@ -1202,6 +1202,14 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
             i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 + imm1.reg.data.u32));
          }
          break;
+      case OP_SHR:
+         if (si->src(1).getImmediate(imm1) && imm0.reg.data.u32 == imm1.reg.data.u32) {
+            bld.setPosition(i, false);
+            i->op = OP_AND;
+            i->setSrc(0, si->getSrc(0));
+            i->setSrc(1, bld.loadImm(NULL, ~((1 << imm0.reg.data.u32) - 1)));
+         }
+         break;
       case OP_MUL:
          int muls;
          if (isFloatType(si->dType))