X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fnouveau%2Fcodegen%2Fnv50_ir_lowering_nv50.cpp;h=36ab837f6e2592bc4ae6613930eaacf93d73b1bf;hb=1b3b4196f08bf825d031cdf6bfcbc7dd3ccf3172;hp=75164ef064111394fea50f8626bae35afc13077b;hpb=af218217d71152df8562b7f087086197f28080fe;p=mesa.git diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp index 75164ef0641..36ab837f6e2 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp @@ -44,6 +44,8 @@ static bool expandIntegerMUL(BuildUtil *bld, Instruction *mul) { const bool highResult = mul->subOp == NV50_IR_SUBOP_MUL_HIGH; + ImmediateValue src1; + bool src1imm = mul->src(1).getImmediate(src1); DataType fTy; // full type switch (mul->sType) { @@ -72,24 +74,41 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul) for (int j = 0; j < 4; ++j) t[j] = bld->getSSA(fullSize); - s[0] = mul->getSrc(0); - s[1] = mul->getSrc(1); - if (isSignedType(mul->sType) && highResult) { s[0] = bld->getSSA(fullSize); s[1] = bld->getSSA(fullSize); bld->mkOp1(OP_ABS, mul->sType, s[0], mul->getSrc(0)); bld->mkOp1(OP_ABS, mul->sType, s[1], mul->getSrc(1)); + src1.reg.data.s32 = abs(src1.reg.data.s32); + } else { + s[0] = mul->getSrc(0); + s[1] = mul->getSrc(1); } // split sources into halves i[0] = bld->mkSplit(a, halfSize, s[0]); i[1] = bld->mkSplit(b, halfSize, s[1]); - i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], b[1]); - i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]); + if (src1imm && (src1.reg.data.u32 & 0xffff0000) == 0) { + i[2] = i[3] = bld->mkOp2(OP_MUL, fTy, t[1], a[1], + bld->mkImm(src1.reg.data.u32 & 0xffff)); + } else { + i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], + src1imm ? bld->mkImm(src1.reg.data.u32 >> 16) : b[1]); + if (src1imm && (src1.reg.data.u32 & 0x0000ffff) == 0) { + i[3] = i[2]; + t[1] = t[0]; + } else { + i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]); + } + } i[7] = bld->mkOp2(OP_SHL, fTy, t[2], t[1], bld->mkImm(halfSize * 8)); - i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]); + if (src1imm && (src1.reg.data.u32 & 0x0000ffff) == 0) { + i[4] = i[3]; + t[3] = t[2]; + } else { + i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]); + } if (highResult) { Value *c[2]; @@ -202,7 +221,8 @@ NV50LegalizePostRA::visit(Function *fn) Program *prog = fn->getProgram(); r63 = new_LValue(fn, FILE_GPR); - if (prog->maxGPR < 63) + // GPR units on nv50 are in half-regs + if (prog->maxGPR < 126) r63->reg.data.id = 63; else r63->reg.data.id = 127; @@ -293,8 +313,7 @@ NV50LegalizePostRA::visit(BasicBlock *bb) next = hi; } - if (i->op != OP_MOV && i->op != OP_PFETCH && - i->op != OP_BAR && + if (i->op != OP_PFETCH && i->op != OP_BAR && (!i->defExists(0) || i->def(0).getFile() != FILE_ADDRESS)) replaceZero(i); } @@ -353,7 +372,8 @@ NV50LegalizeSSA::propagateWriteToOutput(Instruction *st) return; for (int s = 0; di->srcExists(s); ++s) - if (di->src(s).getFile() == FILE_IMMEDIATE) + if (di->src(s).getFile() == FILE_IMMEDIATE || + di->src(s).getFile() == FILE_MEMORY_LOCAL) return; if (prog->getType() == Program::TYPE_GEOMETRY) { @@ -663,7 +683,7 @@ void NV50LoweringPreSSA::loadTexMsInfo(uint32_t off, Value **ms, Value **ms_x, Value **ms_y) { // This loads the texture-indexed ms setting from the constant buffer Value *tmp = new_LValue(func, FILE_GPR); - uint8_t b = prog->driver->io.resInfoCBSlot; + uint8_t b = prog->driver->io.auxCBSlot; off += prog->driver->io.suInfoBase; if (prog->getType() > Program::TYPE_VERTEX) off += 16 * 2 * 4; @@ -705,6 +725,23 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i) const int dref = arg; const int lod = i->tex.target.isShadow() ? (arg + 1) : arg; + /* Only normalize in the non-explicit derivatives case. + */ + if (i->tex.target.isCube() && i->op != OP_TXD) { + Value *src[3], *val; + int c; + for (c = 0; c < 3; ++c) + src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), i->getSrc(c)); + val = bld.getScratch(); + bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]); + bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val); + bld.mkOp1(OP_RCP, TYPE_F32, val, val); + for (c = 0; c < 3; ++c) { + i->setSrc(c, bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), + i->getSrc(c), val)); + } + } + // handle MS, which means looking up the MS params for this texture, and // adjusting the input coordinates to point at the right sample. if (i->tex.target.isMS()) { @@ -832,7 +869,7 @@ NV50LoweringPreSSA::handleTXB(TexInstruction *i) } Value *flags = bld.getScratch(1, FILE_FLAGS); bld.setPosition(cond, true); - bld.mkCvt(OP_CVT, TYPE_U8, flags, TYPE_U32, cond->getDef(0)); + bld.mkCvt(OP_CVT, TYPE_U8, flags, TYPE_U32, cond->getDef(0))->flagsDef = 0; Instruction *tex[4]; for (l = 0; l < 4; ++l) { @@ -911,16 +948,18 @@ NV50LoweringPreSSA::handleTXD(TexInstruction *i) Instruction *tex; Value *zero = bld.loadImm(bld.getSSA(), 0); int l, c; - const int dim = i->tex.target.getDim(); + const int dim = i->tex.target.getDim() + i->tex.target.isCube(); handleTEX(i); i->op = OP_TEX; // no need to clone dPdx/dPdy later + i->tex.derivAll = true; for (c = 0; c < dim; ++c) crd[c] = bld.getScratch(); bld.mkOp(OP_QUADON, TYPE_NONE, NULL); for (l = 0; l < 4; ++l) { + Value *src[3], *val; // mov coordinates from lane l to all lanes for (c = 0; c < dim; ++c) bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero); @@ -930,10 +969,24 @@ NV50LoweringPreSSA::handleTXD(TexInstruction *i) // add dPdy from lane l to lanes dy for (c = 0; c < dim; ++c) bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]); + // normalize cube coordinates if necessary + if (i->tex.target.isCube()) { + for (c = 0; c < 3; ++c) + src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]); + val = bld.getScratch(); + bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]); + bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val); + bld.mkOp1(OP_RCP, TYPE_F32, val, val); + for (c = 0; c < 3; ++c) + src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val); + } else { + for (c = 0; c < dim; ++c) + src[c] = crd[c]; + } // texture bld.insert(tex = cloneForward(func, i)); for (c = 0; c < dim; ++c) - tex->setSrc(c, crd[c]); + tex->setSrc(c, src[c]); // save results for (c = 0; i->defExists(c); ++c) { Instruction *mov; @@ -1155,7 +1208,7 @@ NV50LoweringPreSSA::handleRDSV(Instruction *i) bld.mkLoad(TYPE_F32, def, bld.mkSymbol( - FILE_MEMORY_CONST, prog->driver->io.resInfoCBSlot, + FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, TYPE_U32, prog->driver->io.sampleInfoBase + 4 * idx), off); break; @@ -1184,10 +1237,9 @@ NV50LoweringPreSSA::handleDIV(Instruction *i) bool NV50LoweringPreSSA::handleSQRT(Instruction *i) { - Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32, - bld.getSSA(), i->getSrc(0)); - i->op = OP_MUL; - i->setSrc(1, rsq->getDef(0)); + bld.setPosition(i, true); + i->op = OP_RSQ; + bld.mkOp1(OP_RCP, i->dType, i->getDef(0), i->getDef(0)); return true; } @@ -1225,7 +1277,7 @@ NV50LoweringPreSSA::handleEXPORT(Instruction *i) i->setDef(0, new_LValue(func, FILE_GPR)); i->getDef(0)->reg.data.id = id; - prog->maxGPR = MAX2(prog->maxGPR, id); + prog->maxGPR = MAX2(prog->maxGPR, id * 2); } } return true;