From: Ilia Mirkin Date: Sat, 9 Apr 2016 16:00:54 +0000 (-0400) Subject: nvc0: fix gl_SampleMaskIn computation X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ba3f0b6d5920165c735d51500544da8c29b09060;p=mesa.git nvc0: fix gl_SampleMaskIn computation The SAMPLEMASK semantic should only return the bits set covered by the current invocation. However we were always retrieving the covmask, which returns the covered samples of the whole pixel. When not doing per-sample invocation, this is precisely what we want. However when doing per-sample invocation, we have to select the sampleid'th bit and only return that. Furthermore, this means that we have to have a 1:1 correlation for invocations and samples. This fixes most dEQP-GLES31.functional.shaders.sample_variables.sample_mask_in.* tests. A few failures remain due to disagreements about nr_samples==1 logic as well as what happens with MSAA x2 RTs when the shading fraction is 0.5. Signed-off-by: Ilia Mirkin --- diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 16dc1d12282..1f7de51e3f6 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -146,6 +146,8 @@ struct nv50_ir_prog_info bool earlyFragTests; bool separateFragData; bool usesDiscard; + bool persampleInvocation; + bool usesSampleMaskIn; } fp; struct { uint32_t inputOffset; /* base address for user args */ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 83009c5222e..6a5981daadf 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1113,12 +1113,26 @@ CodeEmitterGK110::emitSLCT(const CmpInstruction *i) } } +static void +selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) +{ + int loc = entry->loc; + if (data.force_persample_interp) + code[loc + 1] |= 1 << 13; + else + code[loc + 1] &= ~(1 << 13); +} + void CodeEmitterGK110::emitSELP(const Instruction *i) { emitForm_21(i, 0x250, 0x050); if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 13; + + if (i->subOp == 1) { + addInterp(0, 0, selpFlip); + } } void CodeEmitterGK110::emitTEXBAR(const Instruction *i) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index 9dc2e309e04..a43d7b1296a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -894,6 +894,16 @@ CodeEmitterGM107::emitI2I() emitGPR (0x00, insn->def(0)); } +static void +selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) +{ + int loc = entry->loc; + if (data.force_persample_interp) + code[loc + 1] |= 1 << 10; + else + code[loc + 1] &= ~(1 << 10); +} + void CodeEmitterGM107::emitSEL() { @@ -915,9 +925,14 @@ CodeEmitterGM107::emitSEL() break; } + emitINV (0x2a, insn->src(2)); emitPRED(0x27, insn->src(2)); emitGPR (0x08, insn->src(0)); emitGPR (0x00, insn->def(0)); + + if (insn->subOp == 1) { + addInterp(0, 0, selpFlip); + } } void diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index 8819e3b3f5e..14f4be4eed9 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -1177,12 +1177,26 @@ CodeEmitterNVC0::emitSLCT(const CmpInstruction *i) code[0] |= 1 << 5; } +static void +selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) +{ + int loc = entry->loc; + if (data.force_persample_interp) + code[loc + 1] |= 1 << 20; + else + code[loc + 1] &= ~(1 << 20); +} + void CodeEmitterNVC0::emitSELP(const Instruction *i) { emitForm_A(i, HEX64(20000000, 00000004)); if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 20; + + if (i->subOp == 1) { + addInterp(0, 0, selpFlip); + } } void CodeEmitterNVC0::emitTEXBAR(const Instruction *i) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index d59950eb6f4..69e1a341bc3 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -1273,6 +1273,13 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) case TGSI_SEMANTIC_DRAWID: info->prop.vp.usesDrawParameters = true; break; + case TGSI_SEMANTIC_SAMPLEID: + case TGSI_SEMANTIC_SAMPLEPOS: + info->prop.fp.persampleInvocation = true; + break; + case TGSI_SEMANTIC_SAMPLEMASK: + info->prop.fp.usesSampleMaskIn = true; + break; default: break; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 3bce9624ab6..1068c210f89 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -153,6 +153,7 @@ NVC0LegalizeSSA::visit(BasicBlock *bb) NVC0LegalizePostRA::NVC0LegalizePostRA(const Program *prog) : rZero(NULL), carry(NULL), + pOne(NULL), needTexBar(prog->getTarget()->getChipset() >= 0xe0) { } @@ -451,10 +452,12 @@ NVC0LegalizePostRA::visit(Function *fn) insertTextureBarriers(fn); rZero = new_LValue(fn, FILE_GPR); + pOne = new_LValue(fn, FILE_PREDICATE); carry = new_LValue(fn, FILE_FLAGS); rZero->reg.data.id = prog->getTarget()->getFileSize(FILE_GPR); carry->reg.data.id = 0; + pOne->reg.data.id = 7; return true; } @@ -466,8 +469,15 @@ NVC0LegalizePostRA::replaceZero(Instruction *i) if (s == 2 && i->op == OP_SUCLAMP) continue; ImmediateValue *imm = i->getSrc(s)->asImm(); - if (imm && imm->reg.data.u64 == 0) - i->setSrc(s, rZero); + if (imm) { + if (i->op == OP_SELP && s == 2) { + i->setSrc(s, pOne); + if (imm->reg.data.u64 == 0) + i->src(s).mod = i->src(s).mod ^ Modifier(NV50_IR_MOD_NOT); + } else if (imm->reg.data.u64 == 0) { + i->setSrc(s, rZero); + } + } } } @@ -2204,10 +2214,25 @@ NVC0LoweringPass::handleRDSV(Instruction *i) off); break; } - case SV_SAMPLE_MASK: + case SV_SAMPLE_MASK: { ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); ld->subOp = NV50_IR_SUBOP_PIXLD_COVMASK; + Instruction *sampleid = + bld.mkOp1(OP_PIXLD, TYPE_U32, bld.getSSA(), bld.mkImm(0)); + sampleid->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; + Value *masked = + bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ld->getDef(0), + bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), + bld.loadImm(NULL, 1), sampleid->getDef(0))); + if (prog->driver->prop.fp.persampleInvocation) { + bld.mkMov(i->getDef(0), masked); + } else { + bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), ld->getDef(0), masked, + bld.mkImm(0)) + ->subOp = 1; + } break; + } case SV_BASEVERTEX: case SV_BASEINSTANCE: case SV_DRAWID: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 17883a9b8f6..c007e09439e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -79,6 +79,7 @@ private: private: LValue *rZero; LValue *carry; + LValue *pOne; const bool needTexBar; }; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 944efa042bf..9db45c0759a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -464,6 +464,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) fp->hdr[18] |= 0xf; fp->fp.early_z = info->prop.fp.earlyFragTests; + fp->fp.sample_mask_in = info->prop.fp.usesSampleMaskIn; return 0; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h index bd852e27c36..08af3c823b8 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h @@ -48,6 +48,7 @@ struct nvc0_program { uint8_t early_z; uint8_t colors; uint8_t color_interp[2]; + bool sample_mask_in; bool force_persample_interp; bool flatshade; } fp; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index e8d41729392..4280db44bb6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -1,5 +1,6 @@ #include "util/u_format.h" +#include "util/u_framebuffer.h" #include "util/u_math.h" #include "nvc0/nvc0_context.h" @@ -551,8 +552,14 @@ nvc0_validate_min_samples(struct nvc0_context *nvc0) int samples; samples = util_next_power_of_two(nvc0->min_samples); - if (samples > 1) + if (samples > 1) { + // If we're using the incoming sample mask and doing sample shading, we + // have to do sample shading "to the max", otherwise there's no way to + // tell which sets of samples are covered by the current invocation. + if (nvc0->fragprog->fp.sample_mask_in) + samples = util_framebuffer_get_num_samples(&nvc0->framebuffer); samples |= NVC0_3D_SAMPLE_SHADING_ENABLE; + } IMMED_NVC0(push, NVC0_3D(SAMPLE_SHADING), samples); } @@ -708,6 +715,9 @@ validate_list_3d[] = { { nvc0_tevlprog_validate, NVC0_NEW_3D_TEVLPROG }, { nvc0_validate_tess_state, NVC0_NEW_3D_TESSFACTOR }, { nvc0_gmtyprog_validate, NVC0_NEW_3D_GMTYPROG }, + { nvc0_validate_min_samples, NVC0_NEW_3D_MIN_SAMPLES | + NVC0_NEW_3D_FRAGPROG | + NVC0_NEW_3D_FRAMEBUFFER }, { nvc0_fragprog_validate, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_RASTERIZER }, { nvc0_validate_derived_1, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_ZSA | NVC0_NEW_3D_RASTERIZER }, @@ -726,7 +736,6 @@ validate_list_3d[] = { { nvc0_validate_buffers, NVC0_NEW_3D_BUFFERS }, { nvc0_idxbuf_validate, NVC0_NEW_3D_IDXBUF }, { nvc0_tfb_validate, NVC0_NEW_3D_TFB_TARGETS | NVC0_NEW_3D_GMTYPROG }, - { nvc0_validate_min_samples, NVC0_NEW_3D_MIN_SAMPLES }, { nvc0_validate_driverconst, NVC0_NEW_3D_DRIVERCONST }, };