From 978ae28ca279354852a586b202e705db3d596041 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Wed, 27 Jan 2016 18:25:05 +0100 Subject: [PATCH] nv50/ir: optimize shl(shr(a, c), c) to and(a, ~((1 << c) - 1)) Following shader-db results on GK110: total instructions in shared programs : 6141510 -> 6131491 (-0.16%) total gprs used in shared programs : 910187 -> 910157 (-0.00%) total local used in shared programs : 15328 -> 15328 (0.00%) local gpr inst bytes helped 0 18 821 821 hurt 0 0 0 0 Signed-off-by: Karol Herbst Reviewed-by: Ilia Mirkin --- src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 95e9fdfc57d..b2c9fdfc7a4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -1202,6 +1202,14 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 + imm1.reg.data.u32)); } break; + case OP_SHR: + if (si->src(1).getImmediate(imm1) && imm0.reg.data.u32 == imm1.reg.data.u32) { + bld.setPosition(i, false); + i->op = OP_AND; + i->setSrc(0, si->getSrc(0)); + i->setSrc(1, bld.loadImm(NULL, ~((1 << imm0.reg.data.u32) - 1))); + } + break; case OP_MUL: int muls; if (isFloatType(si->dType)) -- 2.30.2