From: Christoph Bumiller Date: Mon, 9 Apr 2012 18:34:24 +0000 (+0200) Subject: nv50/ir/opt: improve post-multiply and check target for support X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=55f9bdb64e1f88c74754c8e090cd2cdbe62bba05;p=mesa.git nv50/ir/opt: improve post-multiply and check target for support --- diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp index 9ffc75c685a..bf648dddb84 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp @@ -216,6 +216,8 @@ private: void unary(Instruction *, const ImmediateValue&); + void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&); + // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET CmpInstruction *findOriginForTestWithZero(Value *); @@ -527,6 +529,73 @@ ConstantFolding::unary(Instruction *i, const ImmediateValue &imm) i->src[0].mod = Modifier(0); } +void +ConstantFolding::tryCollapseChainedMULs(Instruction *mul2, + const int s, ImmediateValue& imm2) +{ + const int t = s ? 0 : 1; + Instruction *insn; + Instruction *mul1 = NULL; // mul1 before mul2 + int e = 0; + float f = imm2.reg.data.f32; + + assert(mul2->op == OP_MUL && mul2->dType == TYPE_F32); + + if (mul2->getSrc(t)->refCount() == 1) { + insn = mul2->getSrc(t)->getInsn(); + if (insn->op == OP_MUL && insn->dType == TYPE_F32) + mul1 = insn; + if (mul1) { + int s1 = 0; + ImmediateValue *imm = mul1->src[s1].getImmediate(); + if (!imm) { + s1 = 1; + imm = mul1->src[s1].getImmediate(); + } + if (imm) { + bld.setPosition(mul1, false); + // a = mul r, imm1 + // d = mul a, imm2 -> d = mul r, (imm1 * imm2) + ImmediateValue imm1(mul1->src[s1].getImmediate(), TYPE_F32); + mul1->src[s1].mod.applyTo(imm1); + mul1->src[s1].mod = Modifier(0); + mul1->setSrc(s1, bld.loadImm(NULL, f * imm1.reg.data.f32)); + mul2->def[0].replace(mul1->getDef(0), false); + } else + if (prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) { + // c = mul a, b + // d = mul c, imm -> d = mul_x_imm a, b + mul1->postFactor = e; + mul2->def[0].replace(mul1->getDef(0), false); + if (f < 0) + mul1->src[0].mod = mul1->src[0].mod ^ Modifier(NV50_IR_MOD_NEG); + } + return; + } + } + if (mul2->getDef(0)->refCount() == 1) { + // b = mul a, imm + // d = mul b, c -> d = mul_x_imm a, c + int s2, t2; + insn = mul2->getDef(0)->uses->getInsn(); + if (!insn) + return; + mul1 = mul2; + mul2 = NULL; + s2 = insn->getSrc(0) == mul1->getDef(0) ? 0 : 1; + t2 = s2 ? 0 : 1; + if (insn->op == OP_MUL && insn->dType == TYPE_F32) + if (!insn->src[t2].getImmediate()) + mul2 = insn; + if (mul2 && prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) { + mul2->postFactor = e; + mul2->setSrc(s2, mul1->src[t]); + if (f < 0) + mul2->src[s2].mod = mul2->src[s2].mod ^ Modifier(NV50_IR_MOD_NEG); + } + } +} + void ConstantFolding::opnd(Instruction *i, ImmediateValue *src, int s) { @@ -539,41 +608,9 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue *src, int s) switch (i->op) { case OP_MUL: - if (i->dType == TYPE_F32 && i->getSrc(t)->refCount() == 1) { - Instruction *si = i->getSrc(t)->getUniqueInsn(); - - if (si && si->op == OP_MUL) { - float f = imm.reg.data.f32; - - if (si->src[1].getImmediate()) { - f *= si->src[1].getImmediate()->reg.data.f32; - si->setSrc(1, new_ImmediateValue(prog, f)); - i->def[0].replace(i->getSrc(t), false); - break; - } else { - int fac; - if (f == 0.125f) fac = -3; - else - if (f == 0.250f) fac = -2; - else - if (f == 0.500f) fac = -1; - else - if (f == 2.000f) fac = +1; - else - if (f == 4.000f) fac = +2; - else - if (f == 8.000f) fac = +3; - else - fac = 0; - if (fac) { - // FIXME: allowed & modifier - si->postFactor = fac; - i->def[0].replace(i->getSrc(t), false); - break; - } - } - } - } + if (i->dType == TYPE_F32) + tryCollapseChainedMULs(i, s, imm); + if (imm.isInteger(0)) { i->op = OP_MOV; i->setSrc(0, i->getSrc(s)); @@ -905,6 +942,9 @@ AlgebraicOpt::handleADD(Instruction *add) src = add->getSrc(s); + if (src->getInsn()->postFactor) + return; + mod[0] = add->src[0].mod; mod[1] = add->src[1].mod; mod[2] = src->getUniqueInsn()->src[0].mod; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target.h b/src/gallium/drivers/nv50/codegen/nv50_ir_target.h index b685eca0f1d..6640198f090 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_target.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target.h @@ -142,6 +142,8 @@ public: virtual bool isModSupported(const Instruction *, int s, Modifier) const = 0; virtual bool isSatSupported(const Instruction *) const = 0; + virtual bool isPostMultiplySupported(operation op, float f, + int& e) const { return false; } virtual bool mayPredicate(const Instruction *, const Value *) const = 0; diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp index bcc2c43a55f..6fe95c6693f 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp @@ -516,6 +516,18 @@ TargetNVC0::isSatSupported(const Instruction *insn) const return insn->dType == TYPE_F32; } +bool +TargetNVC0::isPostMultiplySupported(operation op, float f, int& e) const +{ + if (op != OP_MUL) + return false; + f = fabsf(f); + e = static_cast(log2f(f)); + if (e < -3 || e > 3) + return false; + return f == exp2f(static_cast(e)); +} + // TODO: better values int TargetNVC0::getLatency(const Instruction *i) const { diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h index 1a49f369920..e4efe476de0 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h @@ -48,6 +48,7 @@ public: virtual bool isAccessSupported(DataFile, DataType) const; virtual bool isModSupported(const Instruction *, int s, Modifier) const; virtual bool isSatSupported(const Instruction *) const; + virtual bool isPostMultiplySupported(operation, float, int& e) const; virtual bool mayPredicate(const Instruction *, const Value *) const; virtual int getLatency(const Instruction *) const;