From b87b498b88c51fb8c80901b8f581331d3fbcd972 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Mon, 7 Jul 2014 00:04:19 -0400 Subject: [PATCH] nvc0/ir: fix lowering of RSQ/RCP/SQRT/MOD to work with F64 Signed-off-by: Ilia Mirkin --- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + .../nouveau/codegen/nv50_ir_emit_gk110.cpp | 4 +- .../nouveau/codegen/nv50_ir_emit_gm107.cpp | 4 +- .../nouveau/codegen/nv50_ir_emit_nvc0.cpp | 4 +- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 43 ++++++++++++++----- 5 files changed, 40 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 0ff5e5dc3b2..529dcb9bdc2 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -175,6 +175,7 @@ enum operation #define NV50_IR_SUBOP_MOV_FINAL 1 #define NV50_IR_SUBOP_EXTBF_REV 1 #define NV50_IR_SUBOP_BFIND_SAMT 1 +#define NV50_IR_SUBOP_RCPRSQ_64H 1 #define NV50_IR_SUBOP_PERMT_F4E 1 #define NV50_IR_SUBOP_PERMT_B4E 2 #define NV50_IR_SUBOP_PERMT_RC8 3 diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 204d911ae17..674be692f8b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1771,10 +1771,10 @@ CodeEmitterGK110::emitInstruction(Instruction *insn) emitCVT(insn); break; case OP_RSQ: - emitSFnOp(insn, 5); + emitSFnOp(insn, 5 + 2 * insn->subOp); break; case OP_RCP: - emitSFnOp(insn, 4); + emitSFnOp(insn, 4 + 2 * insn->subOp); break; case OP_LG2: emitSFnOp(insn, 3); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index 3e1da7ec084..ee0487f15d6 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -1265,8 +1265,8 @@ CodeEmitterGM107::emitMUFU() case OP_SIN: mufu = 1; break; case OP_EX2: mufu = 2; break; case OP_LG2: mufu = 3; break; - case OP_RCP: mufu = 4; break; - case OP_RSQ: mufu = 5; break; + case OP_RCP: mufu = 4 + 2 * insn->subOp; break; + case OP_RSQ: mufu = 5 + 2 * insn->subOp; break; default: assert(!"invalid mufu"); break; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index e38a3b806b6..1a4f6e09b75 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -2365,10 +2365,10 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn) emitCVT(insn); break; case OP_RSQ: - emitSFnOp(insn, 5); + emitSFnOp(insn, 5 + 2 * insn->subOp); break; case OP_RCP: - emitSFnOp(insn, 4); + emitSFnOp(insn, 4 + 2 * insn->subOp); break; case OP_LG2: emitSFnOp(insn, 3); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 5dfb77745a6..8ac3b265893 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -70,7 +70,30 @@ NVC0LegalizeSSA::handleDIV(Instruction *i) void NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) { - // TODO + assert(i->dType == TYPE_F64); + // There are instructions that will compute the high 32 bits of the 64-bit + // float. We will just stick 0 in the bottom 32 bits. + + bld.setPosition(i, false); + + // 1. Take the source and it up. + Value *src[2], *dst[2], *def = i->getDef(0); + bld.mkSplit(src, 4, i->getSrc(0)); + + // 2. We don't care about the low 32 bits of the destination. Stick a 0 in. + dst[0] = bld.loadImm(NULL, 0); + dst[1] = bld.getSSA(); + + // 3. The new version of the instruction takes the high 32 bits of the + // source and outputs the high 32 bits of the destination. + i->setSrc(0, src[1]); + i->setDef(0, dst[1]); + i->setType(TYPE_F32); + i->subOp = NV50_IR_SUBOP_RCPRSQ_64H; + + // 4. Recombine the two dst pieces back into the original destination. + bld.setPosition(i, true); + bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); } bool @@ -1520,7 +1543,7 @@ NVC0LoweringPass::handleDIV(Instruction *i) if (!isFloatType(i->dType)) return true; bld.setPosition(i, false); - Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(), i->getSrc(1)); + Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(typeSizeof(i->dType)), i->getSrc(1)); i->op = OP_MUL; i->setSrc(1, rcp->getDef(0)); return true; @@ -1529,13 +1552,13 @@ NVC0LoweringPass::handleDIV(Instruction *i) bool NVC0LoweringPass::handleMOD(Instruction *i) { - if (i->dType != TYPE_F32) + if (!isFloatType(i->dType)) return true; - LValue *value = bld.getScratch(); - bld.mkOp1(OP_RCP, TYPE_F32, value, i->getSrc(1)); - bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(0), value); - bld.mkOp1(OP_TRUNC, TYPE_F32, value, value); - bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(1), value); + LValue *value = bld.getScratch(typeSizeof(i->dType)); + bld.mkOp1(OP_RCP, i->dType, value, i->getSrc(1)); + bld.mkOp2(OP_MUL, i->dType, value, i->getSrc(0), value); + bld.mkOp1(OP_TRUNC, i->dType, value, value); + bld.mkOp2(OP_MUL, i->dType, value, i->getSrc(1), value); i->op = OP_SUB; i->setSrc(1, value); return true; @@ -1544,8 +1567,8 @@ NVC0LoweringPass::handleMOD(Instruction *i) bool NVC0LoweringPass::handleSQRT(Instruction *i) { - Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32, - bld.getSSA(), i->getSrc(0)); + Instruction *rsq = bld.mkOp1(OP_RSQ, i->dType, + bld.getSSA(typeSizeof(i->dType)), i->getSrc(0)); i->op = OP_MUL; i->setSrc(1, rsq->getDef(0)); -- 2.30.2