From 400a4eb964bc43c339ce213ee40b41e11c8d44dc Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Sat, 18 Aug 2018 15:06:01 +0100 Subject: [PATCH] nv50/ir: optimize near power-of-twos into shladd total instructions in shared programs : 5819319 -> 5796385 (-0.39%) total gprs used in shared programs : 670571 -> 670103 (-0.07%) total shared used in shared programs : 548832 -> 548832 (0.00%) total local used in shared programs : 21164 -> 21164 (0.00%) local shared gpr inst bytes helped 0 0 318 1758 1758 hurt 0 0 63 0 0 Signed-off-by: Rhys Perry Reviewed-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_peephole.cpp | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 30de9f5c82d..1ab743705a7 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -958,6 +958,9 @@ ConstantFolding::opnd3(Instruction *i, ImmediateValue &imm2) bool ConstantFolding::createMul(DataType ty, Value *def, Value *a, int64_t b, Value *c) { + const Target *target = prog->getTarget(); + int64_t absB = llabs(b); + //a * (2^shl) -> a << shl if (b >= 0 && util_is_power_of_two_or_zero64(b)) { int shl = util_logbase2_64(b); @@ -970,6 +973,30 @@ ConstantFolding::createMul(DataType ty, Value *def, Value *a, int64_t b, Value * return true; } + //a * (2^shl + 1) -> a << shl + a + //a * -(2^shl + 1) -> -a << shl + a + //a * (2^shl - 1) -> a << shl - a + //a * -(2^shl - 1) -> -a << shl - a + if (typeSizeof(ty) == 4 && + (util_is_power_of_two_or_zero64(absB - 1) || + util_is_power_of_two_or_zero64(absB + 1)) && + target->isOpSupported(OP_SHLADD, TYPE_U32)) { + bool subA = util_is_power_of_two_or_zero64(absB + 1); + int shl = subA ? util_logbase2_64(absB + 1) : util_logbase2_64(absB - 1); + + Value *res = c ? bld.getSSA() : def; + Instruction *insn = bld.mkOp3(OP_SHLADD, TYPE_U32, res, a, bld.mkImm(shl), a); + if (b < 0) + insn->src(0).mod = Modifier(NV50_IR_MOD_NEG); + if (subA) + insn->src(2).mod = Modifier(NV50_IR_MOD_NEG); + + if (c) + bld.mkOp2(OP_ADD, TYPE_U32, def, res, c); + + return true; + } + return false; } -- 2.30.2