From 29d09f8747abea35f4deadced0196725d4ab89cf Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Wed, 27 Jan 2016 18:25:08 +0100 Subject: [PATCH] nv50/ir: optimize mad/fma with third argument 0 to mul Very modest effect, but it's clearly the right thing to do. total instructions in shared programs : 6131491 -> 6131398 (-0.00%) total gprs used in shared programs : 910157 -> 910131 (-0.00%) total local used in shared programs : 15328 -> 15328 (0.00%) local gpr inst bytes helped 0 55 85 85 hurt 0 26 20 20 Signed-off-by: Karol Herbst Reviewed-by: Ilia Mirkin --- .../nouveau/codegen/nv50_ir_peephole.cpp | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index bda9c7d0c52..eb790d028f1 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -336,6 +336,7 @@ private: void expr(Instruction *, ImmediateValue&, ImmediateValue&); void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&); void opnd(Instruction *, ImmediateValue&, int s); + void opnd3(Instruction *, ImmediateValue&); void unary(Instruction *, const ImmediateValue&); @@ -388,6 +389,8 @@ ConstantFolding::visit(BasicBlock *bb) else if (i->srcExists(1) && i->src(1).getImmediate(src1)) opnd(i, src1, 1); + if (i->srcExists(2) && i->src(2).getImmediate(src2)) + opnd3(i, src2); } return true; } @@ -872,6 +875,24 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2, } } +void +ConstantFolding::opnd3(Instruction *i, ImmediateValue &imm2) +{ + switch (i->op) { + case OP_MAD: + case OP_FMA: + if (imm2.isInteger(0)) { + i->op = OP_MUL; + i->setSrc(2, NULL); + foldCount++; + return; + } + break; + default: + return; + } +} + void ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) { -- 2.30.2