From 854554c314e556c158b0e2aa3911a43f58fd6d34 Mon Sep 17 00:00:00 2001 From: Boyan Ding Date: Mon, 10 Apr 2017 22:55:57 +0800 Subject: [PATCH] gm107/ir: Emit third src 'bound' and optional predicate output of SHFL v2: Emit the original hard-coded 0x1c03 when OP_SHFL is used in gm107's lowering (Samuel Pitoiset) Signed-off-by: Boyan Ding Reviewed-by: Ilia Mirkin --- .../nouveau/codegen/nv50_ir_emit_gm107.cpp | 23 +++++++++++++++---- .../codegen/nv50_ir_lowering_gm107.cpp | 15 ++++++++---- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index c3c0dcd9fc1..944563c93cf 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -967,11 +967,26 @@ CodeEmitterGM107::emitSHFL() break; } - /*XXX: what is this arg? hardcode immediate for now */ - emitField(0x22, 13, 0x1c03); - type |= 2; + switch (insn->src(2).getFile()) { + case FILE_GPR: + emitGPR(0x27, insn->src(2)); + break; + case FILE_IMMEDIATE: + emitIMMD(0x22, 13, insn->src(2)); + type |= 2; + break; + default: + assert(!"invalid src2 file"); + break; + } + + if (!insn->defExists(1)) + emitPRED(0x30); + else { + assert(insn->def(1).getFile() == FILE_PREDICATE); + emitPRED(0x30, insn->def(1)); + } - emitPRED (0x30); emitField(0x1e, 2, insn->subOp); emitField(0x1c, 2, type); emitGPR (0x08, insn->src(0)); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp index 371ebae40c1..6b9edd48645 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp @@ -41,6 +41,8 @@ namespace nv50_ir { ((QOP_##q << 6) | (QOP_##r << 4) | \ (QOP_##s << 2) | (QOP_##t << 0)) +#define SHFL_BOUND_QUAD 0x1c03 + void GM107LegalizeSSA::handlePFETCH(Instruction *i) { @@ -120,7 +122,8 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i) // mov coordinates from lane l to all lanes bld.mkOp(OP_QUADON, TYPE_NONE, NULL); for (c = 0; c < dim; ++c) { - bld.mkOp2(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array), bld.mkImm(l)); + bld.mkOp3(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array), + bld.mkImm(l), bld.mkImm(SHFL_BOUND_QUAD)); add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], crd[c], zero); add->subOp = 0x00; add->lanes = 1; /* abused for .ndv */ @@ -128,7 +131,8 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i) // add dPdx from lane l to lanes dx for (c = 0; c < dim; ++c) { - bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), bld.mkImm(l)); + bld.mkOp3(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), bld.mkImm(l), + bld.mkImm(SHFL_BOUND_QUAD)); add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]); add->subOp = qOps[l][0]; add->lanes = 1; /* abused for .ndv */ @@ -136,7 +140,8 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i) // add dPdy from lane l to lanes dy for (c = 0; c < dim; ++c) { - bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), bld.mkImm(l)); + bld.mkOp3(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), bld.mkImm(l), + bld.mkImm(SHFL_BOUND_QUAD)); add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]); add->subOp = qOps[l][1]; add->lanes = 1; /* abused for .ndv */ @@ -203,8 +208,8 @@ GM107LoweringPass::handleDFDX(Instruction *insn) break; } - shfl = bld.mkOp2(OP_SHFL, TYPE_F32, bld.getScratch(), - insn->getSrc(0), bld.mkImm(xid)); + shfl = bld.mkOp3(OP_SHFL, TYPE_F32, bld.getScratch(), insn->getSrc(0), + bld.mkImm(xid), bld.mkImm(SHFL_BOUND_QUAD)); shfl->subOp = NV50_IR_SUBOP_SHFL_BFLY; insn->op = OP_QUADOP; insn->subOp = qop; -- 2.30.2