From dffc2fb4e659f41b30e80e23ceb833d53417fb8e Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 4 May 2012 18:00:40 +0200 Subject: [PATCH] nv50/ir: move expansion of IMUL to later stage and handle memory operands --- .../nv50/codegen/nv50_ir_build_util.cpp | 25 +++++++++++++++++++ .../drivers/nv50/codegen/nv50_ir_build_util.h | 2 ++ .../nv50/codegen/nv50_ir_lowering_nv50.cpp | 24 ++++++------------ .../nv50/codegen/nv50_ir_target_nv50.cpp | 17 ++++++++++++- 4 files changed, 51 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp index f7dac25c116..f713e6391c6 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp @@ -278,6 +278,31 @@ BuildUtil::mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc) return mkOp2(OP_UNION, typeOfSize(dst->reg.size), dst, def0, def1); } +Instruction * +BuildUtil::mkSplit(Value *h[2], uint8_t halfSize, Value *val) +{ + Instruction *insn = NULL; + + const DataType fTy = typeOfSize(halfSize * 2); + + if (val->reg.file == FILE_IMMEDIATE) + val = mkMov(getSSA(halfSize * 2), val, fTy)->getDef(0); + + if (isMemoryFile(val->reg.file)) { + h[0] = cloneShallow(getFunction(), val); + h[1] = cloneShallow(getFunction(), val); + h[0]->reg.size = halfSize; + h[1]->reg.size = halfSize; + h[1]->reg.data.offset += halfSize; + } else { + h[0] = getSSA(halfSize, val->reg.file); + h[1] = getSSA(halfSize, val->reg.file); + insn = mkOp1(OP_SPLIT, fTy, h[0], val); + insn->setDef(1, h[1]); + } + return insn; +} + FlowInstruction * BuildUtil::mkFlow(operation op, void *targ, CondCode cc, Value *pred) { diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h index 9ee04dbcd12..dd7e491cb5c 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h @@ -81,6 +81,8 @@ public: Instruction *mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc); + Instruction *mkSplit(Value *half[2], uint8_t halfSize, Value *); + void mkClobber(DataFile file, uint32_t regMask, int regUnitLog2); ImmediateValue *mkImm(float); diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp index 011014eb5ba..16bba0e1723 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp @@ -57,15 +57,17 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul) Instruction *i[9]; - Value *a[2] = { bld->getSSA(halfSize), bld->getSSA(halfSize) }; - Value *b[2] = { bld->getSSA(halfSize), bld->getSSA(halfSize) }; + bld->setPosition(mul, true); + + Value *a[2], *b[2]; Value *c[2]; Value *t[4]; for (int j = 0; j < 4; ++j) t[j] = bld->getSSA(fullSize); - (i[0] = bld->mkOp1(OP_SPLIT, fTy, a[0], mul->getSrc(0)))->setDef(1, a[1]); - (i[1] = bld->mkOp1(OP_SPLIT, fTy, b[0], mul->getSrc(1)))->setDef(1, b[1]); + // split sources into halves + i[0] = bld->mkSplit(a, halfSize, mul->getSrc(0)); + i[1] = bld->mkSplit(b, halfSize, mul->getSrc(1)); i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], b[1]); i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]); @@ -96,7 +98,8 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul) delete_Instruction(bld->getProgram(), mul); for (int j = 2; j <= (highResult ? 5 : 4); ++j) - i[j]->sType = hTy; + if (i[j]) + i[j]->sType = hTy; return true; } @@ -518,7 +521,6 @@ private: bool handleEXPORT(Instruction *); - bool handleMUL(Instruction *); bool handleDIV(Instruction *); bool handleSQRT(Instruction *); bool handlePOW(Instruction *); @@ -941,14 +943,6 @@ NV50LoweringPreSSA::handleRDSV(Instruction *i) return true; } -bool -NV50LoweringPreSSA::handleMUL(Instruction *i) -{ - if (!isFloatType(i->dType) && typeSizeof(i->sType) > 2) - return expandIntegerMUL(&bld, i); - return true; -} - bool NV50LoweringPreSSA::handleDIV(Instruction *i) { @@ -1069,8 +1063,6 @@ NV50LoweringPreSSA::visit(Instruction *i) return handleSELP(i); case OP_POW: return handlePOW(i); - case OP_MUL: - return handleMUL(i); case OP_DIV: return handleDIV(i); case OP_SQRT: diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp index 5e541e514cb..8b11c6a2fdd 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp @@ -310,7 +310,22 @@ TargetNV50::insnCanLoad(const Instruction *i, int s, return false; } - if (ld->getSrc(0)->reg.data.offset > (int32_t)(127 * typeSizeof(ld->dType))) + uint8_t ldSize; + + if ((i->op == OP_MUL || i->op == OP_MAD) && !isFloatType(i->dType)) { + // 32-bit MUL will be split into 16-bit MULs + if (ld->src(0).isIndirect(0)) + return false; + if (sf == FILE_IMMEDIATE) + return false; + ldSize = 2; + } else { + ldSize = typeSizeof(ld->dType); + } + + if (ldSize < 4 && sf == FILE_SHADER_INPUT) // no < 4-byte aligned a[] access + return false; + if (ld->getSrc(0)->reg.data.offset > (int32_t)(127 * ldSize)) return false; if (ld->src(0).isIndirect(0)) { -- 2.30.2