From a3722b81f534598f25d9d155a6d30bc59a6f4e59 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Wed, 2 Dec 2015 21:02:12 -0500 Subject: [PATCH] nv50/ir: fold fma/mad when all 3 args are immediates This happens pretty rarely, but might as well do it when it does. Signed-off-by: Ilia Mirkin --- .../nouveau/codegen/nv50_ir_peephole.cpp | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index bb7f4911c21..b79e465b4fa 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -670,6 +670,34 @@ ConstantFolding::expr(Instruction *i, res.data.u32 = ((a->data.u32 << offset) & bitmask) | (c->data.u32 & ~bitmask); break; } + case OP_MAD: + case OP_FMA: { + switch (i->dType) { + case TYPE_F32: + res.data.f32 = a->data.f32 * b->data.f32 * exp2f(i->postFactor) + + c->data.f32; + break; + case TYPE_F64: + res.data.f64 = a->data.f64 * b->data.f64 + c->data.f64; + break; + case TYPE_S32: + if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) { + res.data.s32 = ((int64_t)a->data.s32 * b->data.s32 >> 32) + c->data.s32; + break; + } + /* fallthrough */ + case TYPE_U32: + if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) { + res.data.u32 = ((uint64_t)a->data.u32 * b->data.u32 >> 32) + c->data.u32; + break; + } + res.data.u32 = a->data.u32 * b->data.u32 + c->data.u32; + break; + default: + return; + } + break; + } default: return; } @@ -684,6 +712,8 @@ ConstantFolding::expr(Instruction *i, i->setSrc(2, NULL); i->getSrc(0)->reg.data = res.data; + i->getSrc(0)->reg.type = i->dType; + i->getSrc(0)->reg.size = typeSizeof(i->dType); i->op = OP_MOV; } -- 2.30.2