From 0ca046d7e97564c0479cb8d42ba40c798c163bb5 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Sat, 4 Aug 2018 04:19:49 +0200 Subject: [PATCH] gm200/ir: optimize rcp(sqrt) to rsq mitigates hurt shaders after adding sqrt: total instructions in shared programs : 5456166 -> 5454825 (-0.02%) total gprs used in shared programs : 647522 -> 647551 (0.00%) total shared used in shared programs : 389120 -> 389120 (0.00%) total local used in shared programs : 21064 -> 21064 (0.00%) total bytes used in shared programs : 58288696 -> 58274448 (-0.02%) local shared gpr inst bytes helped 0 0 0 516 516 hurt 0 0 27 2 2 Reviewed-by: Ilia Mirkin Signed-off-by: Karol Herbst --- .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 16022e6f237..2f7cc206b84 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -1848,15 +1848,24 @@ AlgebraicOpt::handleMINMAX(Instruction *minmax) } } +// rcp(rcp(a)) = a +// rcp(sqrt(a)) = rsq(a) void AlgebraicOpt::handleRCP(Instruction *rcp) { Instruction *si = rcp->getSrc(0)->getUniqueInsn(); - if (si && si->op == OP_RCP) { + if (!si) + return; + + if (si->op == OP_RCP) { Modifier mod = rcp->src(0).mod * si->src(0).mod; rcp->op = mod.getOp(); rcp->setSrc(0, si->getSrc(0)); + } else if (si->op == OP_SQRT) { + rcp->op = OP_RSQ; + rcp->setSrc(0, si->getSrc(0)); + rcp->src(0).mod = rcp->src(0).mod * si->src(0).mod; } } -- 2.30.2