From: Karol Herbst Date: Mon, 13 Jul 2020 14:24:20 +0000 (+0200) Subject: gv100/ir: set ftz bit on floating point operations X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=38e3cbb639174db71eea9edc2628e4e0b1696fbc;p=mesa.git gv100/ir: set ftz bit on floating point operations Fixes Unigine Heavens ambient occlusion Signed-off-by: Karol Herbst Part-of: --- diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp index 953f082a06a..644d4928327 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp @@ -35,7 +35,7 @@ GV100LegalizeSSA::handleCMP(Instruction *i) Value *pred = bld.getSSA(1, FILE_PREDICATE); bld.mkCmp(OP_SET, reverseCondCode(i->asCmp()->setCond), TYPE_U8, pred, - i->sType, bld.mkImm(0), i->getSrc(2)); + i->sType, bld.mkImm(0), i->getSrc(2))->ftz = i->ftz; bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1), pred); return true; } @@ -189,6 +189,7 @@ GV100LegalizeSSA::handleSET(Instruction *i) xsetp->src(0).mod = i->src(0).mod; xsetp->src(1).mod = i->src(1).mod; xsetp->setSrc(2, src2); + xsetp->ftz = i->ftz; i = bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), bld.mkImm(0), met, pred); i->src(2).mod = Modifier(NV50_IR_MOD_NOT); @@ -235,6 +236,7 @@ GV100LegalizeSSA::handleSUB(Instruction *i) bld.mkOp2(OP_ADD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1)); xadd->src(0).mod = i->src(0).mod; xadd->src(1).mod = i->src(1).mod ^ Modifier(NV50_IR_MOD_NEG); + xadd->ftz = i->ftz; return true; } @@ -244,6 +246,9 @@ GV100LegalizeSSA::visit(Instruction *i) bool lowered = false; bld.setPosition(i, false); + if (i->sType == TYPE_F32 && i->dType != TYPE_F16 && + prog->getType() != Program::TYPE_COMPUTE) + handleFTZ(i); switch (i->op) { case OP_AND: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index a4925013ee4..8c99427d3c0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -64,13 +64,14 @@ private: void handleDIV(Instruction *); // integer division, modulus void handleRCPRSQLib(Instruction *, Value *[]); void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt - void handleFTZ(Instruction *); void handleSET(CmpInstruction *); void handleTEXLOD(TexInstruction *); void handleShift(Instruction *); void handleBREV(Instruction *); protected: + void handleFTZ(Instruction *); + BuildUtil bld; };