From 8c764a2321ad51dacd5538b0a59f3ca446d80747 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 14 Jan 2017 18:55:25 -0500 Subject: [PATCH] nvc0: add support for MUL_ZERO_WINS property This sets the dnz flag on all the relevant multiplication operations. At emission time, this will only be supported by nvc0+, so nv50 will need a different solution. Signed-off-by: Ilia Mirkin --- .../drivers/nouveau/codegen/nv50_ir_driver.h | 1 + .../nouveau/codegen/nv50_ir_from_tgsi.cpp | 30 ++++++++++++++----- .../nouveau/codegen/nv50_ir_peephole.cpp | 1 + .../drivers/nouveau/nvc0/nvc0_screen.c | 2 +- 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index a0388011aed..65d0904d0f1 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -177,6 +177,7 @@ struct nv50_ir_prog_info uint8_t backFaceColor[2]; /* input/output indices of back face colour */ uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */ bool fp64; /* program uses fp64 math */ + bool mul_zero_wins; /* program wants for x*0 = 0 */ bool nv50styleSurfaces; /* generate gX[] access for raw buffers */ uint16_t texBindBase; /* base address for tex handles (nve4) */ uint16_t fbtexBindBase; /* base address for fbtex handle (nve4) */ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 64bfd084326..6320e529980 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -1166,6 +1166,9 @@ void Source::scanProperty(const struct tgsi_full_property *prop) case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL: info->prop.fp.earlyFragTests = prop->u[0].Data; break; + case TGSI_PROPERTY_MUL_ZERO_WINS: + info->io.mul_zero_wins = prop->u[0].Data; + break; default: INFO("unhandled TGSI property %d\n", prop->Property.PropertyName); break; @@ -2058,12 +2061,14 @@ Converter::buildDot(int dim) Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0); Value *dotp = getScratch(); - mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1); + mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1) + ->dnz = info->io.mul_zero_wins; for (int c = 1; c < dim; ++c) { src0 = fetchSrc(0, c); src1 = fetchSrc(1, c); - mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp); + mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp) + ->dnz = info->io.mul_zero_wins; } return dotp; } @@ -3033,6 +3038,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) src1 = fetchSrc(1, c); geni = mkOp2(op, dstTy, dst0[c], src0, src1); geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); + if (op == OP_MUL && dstTy == TYPE_F32) + geni->dnz = info->io.mul_zero_wins; } break; case TGSI_OPCODE_MAD: @@ -3043,7 +3050,9 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) src0 = fetchSrc(0, c); src1 = fetchSrc(1, c); src2 = fetchSrc(2, c); - mkOp3(op, dstTy, dst0[c], src0, src1, src2); + geni = mkOp3(op, dstTy, dst0[c], src0, src1, src2); + if (dstTy == TYPE_F32) + geni->dnz = info->io.mul_zero_wins; } break; case TGSI_OPCODE_MOV: @@ -3142,7 +3151,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) if (dst0[1]) { mkOp1(OP_EX2, TYPE_F32, dst0[1], val1); mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]); - mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0); + mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0) + ->dnz = info->io.mul_zero_wins; } if (dst0[3]) loadImm(dst0[3], 1.0f); @@ -3175,7 +3185,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) if (dst0[1]) { src0 = fetchSrc(0, 1); src1 = fetchSrc(1, 1); - mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1); + mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1) + ->dnz = info->io.mul_zero_wins; } if (dst0[2]) mkMov(dst0[2], fetchSrc(0, 2)); @@ -3188,7 +3199,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) src1 = fetchSrc(1, c); src2 = fetchSrc(2, c); mkOp3(OP_MAD, TYPE_F32, dst0[c], - mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2); + mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2) + ->dnz = info->io.mul_zero_wins; } break; case TGSI_OPCODE_LIT: @@ -3200,12 +3212,14 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) val0 = getSSA(); src0 = fetchSrc(1, (c + 1) % 3); src1 = fetchSrc(0, (c + 2) % 3); - mkOp2(OP_MUL, TYPE_F32, val0, src0, src1); + mkOp2(OP_MUL, TYPE_F32, val0, src0, src1) + ->dnz = info->io.mul_zero_wins; mkOp1(OP_NEG, TYPE_F32, val0, val0); src0 = fetchSrc(0, (c + 1) % 3); src1 = fetchSrc(1, (c + 2) % 3); - mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0); + mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0) + ->dnz = info->io.mul_zero_wins; } else { loadImm(dst0[c], 1.0f); } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 04b6af2471b..c99680613f1 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -1730,6 +1730,7 @@ AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp) add->op = toOp; add->subOp = src->getInsn()->subOp; // potentially mul-high + add->dnz = src->getInsn()->dnz; add->dType = src->getInsn()->dType; // sign matters for imad hi add->sType = src->getInsn()->sType; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 7e9869dd4d1..47a6e6f89d9 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -242,6 +242,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_VOTE: case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED: case PIPE_CAP_TGSI_ARRAY_COMPONENTS: + case PIPE_CAP_TGSI_MUL_ZERO_WINS: return 1; case PIPE_CAP_COMPUTE: return (class_3d < GP100_3D_CLASS); @@ -279,7 +280,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_CAN_READ_OUTPUTS: case PIPE_CAP_NATIVE_FENCE_FD: case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: - case PIPE_CAP_TGSI_MUL_ZERO_WINS: return 0; case PIPE_CAP_VENDOR_ID: -- 2.30.2