From: mmenzyns Date: Tue, 30 Jun 2020 13:57:49 +0000 (+0200) Subject: nv50: Clear nv50_ir_prog_info of dead and codegen specific variables X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=66ed9792edb702c18eeb4d23a7b792e32b3188d4;p=mesa.git nv50: Clear nv50_ir_prog_info of dead and codegen specific variables These variables are either not used in the code, only assigned but never accessed, or only used inside codegen. Another reason is that this patch will be preceding shader cache, and these variables are useless to cache. Removing/moving them should make it clearer by removing the case something from the structure is not cached. Shader cache patch: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4264 Signed-off-by: Mark Menzynski Reviewed-by: Karol Herbst Part-of: --- diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index d4bed4e7720..1796334b918 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -1254,14 +1254,12 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) info->prop.cp.numThreads[1] = info->prop.cp.numThreads[2] = 1; } - info->io.pointSize = 0xff; info->io.instanceId = 0xff; info->io.vertexId = 0xff; info->io.edgeFlagIn = 0xff; info->io.edgeFlagOut = 0xff; info->io.fragDepth = 0xff; info->io.sampleMask = 0xff; - info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff; } int diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index a0b545fd24d..4fb89fa3f5e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -1341,6 +1341,7 @@ public: int maxGPR; bool fp64; + bool persampleInvocation; MemoryPool mem_Instruction; MemoryPool mem_CmpInstruction; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 63ea7f5e7e8..53927893752 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -91,7 +91,6 @@ struct nv50_ir_prog_info struct { int16_t maxGPR; /* may be -1 if none used */ - int16_t maxOutput; uint32_t tlsSpace; /* required local memory per thread */ uint32_t smemSize; /* required shared memory per block */ uint32_t *code; @@ -113,17 +112,8 @@ struct nv50_ir_prog_info uint8_t numPatchConstants; /* also included in numInputs/numOutputs */ uint8_t numSysVals; - struct { - uint32_t *buf; /* for IMMEDIATE_ARRAY */ - uint16_t bufSize; /* size of immediate array */ - uint16_t count; /* count of inline immediates */ - uint32_t *data; /* inline immediate data */ - uint8_t *type; /* for each vec4 (128 bit) */ - } immd; - union { struct { - uint32_t inputMask[4]; /* mask of attributes read (1 bit per scalar) */ bool usesDrawParameters; } vp; struct { @@ -134,7 +124,6 @@ struct nv50_ir_prog_info uint8_t outputPrim; /* PIPE_PRIM_{TRIANGLES,LINES,POINTS} */ } tp; struct { - uint8_t inputPrim; uint8_t outputPrim; unsigned instanceCount; unsigned maxVertices; @@ -146,14 +135,12 @@ struct nv50_ir_prog_info bool postDepthCoverage; bool separateFragData; bool usesDiscard; - bool persampleInvocation; bool usesSampleMaskIn; bool readsFramebuffer; bool readsSampleLocations; } fp; struct { uint32_t inputOffset; /* base address for user args */ - uint32_t sharedOffset; /* reserved space in s[] */ uint32_t gridInfoBase; /* base address for NTID,NCTAID */ uint16_t numThreads[3]; /* max number of threads */ } cp; @@ -169,7 +156,6 @@ struct nv50_ir_prog_info uint16_t ucpBase; /* base address for UCPs */ uint16_t drawInfoBase; /* base address for draw parameters */ uint16_t alphaRefBase; /* base address for alpha test values */ - uint8_t pointSize; /* output index for PointSize */ uint8_t instanceId; /* system value index of InstanceID */ uint8_t vertexId; /* system value index of VertexID */ uint8_t edgeFlagIn; @@ -177,7 +163,6 @@ struct nv50_ir_prog_info int8_t viewportId; /* output index of ViewportIndex */ uint8_t fragDepth; /* output index of FragDepth */ uint8_t sampleMask; /* output index of SampleMask */ - uint8_t backFaceColor[2]; /* input/output indices of back face colour */ uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */ bool fp64; /* program uses fp64 math */ bool mul_zero_wins; /* program wants for x*0 = 0 */ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index a972b51b623..c5b11726cec 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -1260,7 +1260,7 @@ Converter::parseNIR() break; case Program::TYPE_FRAGMENT: info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests; - info->prop.fp.persampleInvocation = + prog->persampleInvocation = (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) || (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS); info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage; @@ -1271,7 +1271,6 @@ Converter::parseNIR() !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN); break; case Program::TYPE_GEOMETRY: - info->prop.gp.inputPrim = nir->info.gs.input_primitive; info->prop.gp.instanceCount = nir->info.gs.invocations; info->prop.gp.maxVertices = nir->info.gs.vertices_out; info->prop.gp.outputPrim = nir->info.gs.output_primitive; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 3fd76f64de0..f0bf2932e3b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -114,12 +114,12 @@ public: return SrcRegister(fsr->Indirect); } - uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const + uint32_t getValueU32(int c, const uint32_t *data) const { assert(reg.File == TGSI_FILE_IMMEDIATE); assert(!reg.Absolute); assert(!reg.Negate); - return info->immd.data[reg.Index * 4 + getSwizzle(c)]; + return data[reg.Index * 4 + getSwizzle(c)]; } private: @@ -986,7 +986,7 @@ bool Instruction::checkDstSrcAliasing() const class Source { public: - Source(struct nv50_ir_prog_info *); + Source(struct nv50_ir_prog_info *, nv50_ir::Program *); ~Source(); public: @@ -1034,7 +1034,13 @@ public: std::vector bufferAtomics; + struct { + uint16_t count; /* count of inline immediates */ + uint32_t *data; /* inline immediate data */ + } immd; + private: + nv50_ir::Program *prog; int inferSysValDirection(unsigned sn) const; bool scanDeclaration(const struct tgsi_full_declaration *); bool scanInstruction(const struct tgsi_full_instruction *); @@ -1047,12 +1053,16 @@ private: inline bool isEdgeFlagPassthrough(const Instruction&) const; }; -Source::Source(struct nv50_ir_prog_info *prog) : info(prog) +Source::Source(struct nv50_ir_prog_info *info, nv50_ir::Program *prog) +: info(info), prog(prog) { tokens = (const struct tgsi_token *)info->bin.source; - if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) + if (info->dbgFlags & NV50_IR_DEBUG_BASIC) tgsi_dump(tokens, 0); + + immd.count = 0; + immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16); } Source::~Source() @@ -1060,10 +1070,8 @@ Source::~Source() if (insns) FREE(insns); - if (info->immd.data) - FREE(info->immd.data); - if (info->immd.type) - FREE(info->immd.type); + if (immd.data) + FREE(immd.data); } bool Source::scanSource() @@ -1086,8 +1094,6 @@ bool Source::scanSource() memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1); bufferAtomics.resize(scan.file_max[TGSI_FILE_BUFFER] + 1); - info->immd.bufSize = 0; - info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1; info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1; info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1; @@ -1102,9 +1108,6 @@ bool Source::scanSource() info->io.viewportId = -1; - info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16); - info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte)); - tgsi_parse_init(&parse, tokens); while (!tgsi_parse_end_of_tokens(&parse)) { tgsi_parse_token(&parse); @@ -1164,9 +1167,6 @@ void Source::scanProperty(const struct tgsi_full_property *prop) case TGSI_PROPERTY_GS_OUTPUT_PRIM: info->prop.gp.outputPrim = prop->u[0].Data; break; - case TGSI_PROPERTY_GS_INPUT_PRIM: - info->prop.gp.inputPrim = prop->u[0].Data; - break; case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: info->prop.gp.maxVertices = prop->u[0].Data; break; @@ -1240,14 +1240,12 @@ void Source::scanProperty(const struct tgsi_full_property *prop) void Source::scanImmediate(const struct tgsi_full_immediate *imm) { - const unsigned n = info->immd.count++; + const unsigned n = immd.count++; assert(n < scan.immediate_count); for (int c = 0; c < 4; ++c) - info->immd.data[n * 4 + c] = imm->u[c].Uint; - - info->immd.type[n] = imm->Immediate.DataType; + immd.data[n * 4 + c] = imm->u[c].Uint; } int Source::inferSysValDirection(unsigned sn) const @@ -1388,7 +1386,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) break; case TGSI_SEMANTIC_SAMPLEID: case TGSI_SEMANTIC_SAMPLEPOS: - info->prop.fp.persampleInvocation = true; + prog->persampleInvocation = true; break; case TGSI_SEMANTIC_SAMPLEMASK: info->prop.fp.usesSampleMaskIn = true; @@ -2020,7 +2018,7 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) switch (src.getFile()) { case TGSI_FILE_IMMEDIATE: assert(!ptr); - return loadImm(NULL, info->immd.data[idx * 4 + swz]); + return loadImm(NULL, code->immd.data[idx * 4 + swz]); case TGSI_FILE_CONSTANT: return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr)); case TGSI_FILE_INPUT: @@ -2376,7 +2374,7 @@ Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) tgsi.getOpcode() == TGSI_OPCODE_TXP)) texi->tex.levelZero = true; if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && !tgt.isShadow()) - texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, info); + texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, code->immd.data); texi->tex.useOffsets = tgsi.getNumTexOffsets(); for (s = 0; s < tgsi.getNumTexOffsets(); ++s) { @@ -2637,7 +2635,7 @@ Converter::handleLOAD(Value *dst0[4]) if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) { off = NULL; sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, - tgsi.getSrc(1).getValueU32(0, info) + + tgsi.getSrc(1).getValueU32(0, code->immd.data) + src0_component_offset); } else { // yzw are ignored for buffers @@ -2784,7 +2782,7 @@ Converter::handleSTORE() if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) { off = NULL; sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, - tgsi.getSrc(0).getValueU32(0, info) + 4 * c); + tgsi.getSrc(0).getValueU32(0, code->immd.data) + 4 * c); } else { // yzw are ignored for buffers off = fetchSrc(0, 0); @@ -2902,7 +2900,7 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) Value *sym; if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, - tgsi.getSrc(1).getValueU32(c, info)); + tgsi.getSrc(1).getValueU32(c, code->immd.data)); else sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0); if (subOp == NV50_IR_SUBOP_ATOM_CAS) @@ -3537,7 +3535,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_ENDPRIM: { // get vertex stream (must be immediate) - unsigned int stream = tgsi.getSrc(0).getValueU32(0, info); + unsigned int stream = tgsi.getSrc(0).getValueU32(0, code->immd.data); if (stream && op == OP_RESTART) break; if (info->prop.gp.maxVertices == 0) @@ -3732,7 +3730,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) break; case TGSI_OPCODE_MEMBAR: { - uint32_t level = tgsi.getSrc(0).getValueU32(0, info); + uint32_t level = tgsi.getSrc(0).getValueU32(0, code->immd.data); geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL); geni->fixed = 1; if (!(level & ~(TGSI_MEMBAR_THREAD_GROUP | TGSI_MEMBAR_SHARED))) @@ -3794,8 +3792,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) val0 = getScratch(); if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE && tgsi.getSrc(2).getFile() == TGSI_FILE_IMMEDIATE) { - loadImm(val0, (tgsi.getSrc(2).getValueU32(c, info) << 8) | - tgsi.getSrc(1).getValueU32(c, info)); + loadImm(val0, (tgsi.getSrc(2).getValueU32(c, code->immd.data) << 8) | + tgsi.getSrc(1).getValueU32(c, code->immd.data)); } else { src1 = fetchSrc(1, c); src2 = fetchSrc(2, c); @@ -4338,7 +4336,7 @@ namespace nv50_ir { bool Program::makeFromTGSI(struct nv50_ir_prog_info *info) { - tgsi::Source src(info); + tgsi::Source src(info, this); if (!src.scanSource()) return false; tlsSize = info->bin.tlsSpace; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 067f9abaca8..1bcfb054ffa 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -2965,7 +2965,7 @@ NVC0LoweringPass::handleRDSV(Instruction *i) bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ld->getDef(0), bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), bld.loadImm(NULL, 1), sampleid->getDef(0))); - if (prog->driver->prop.fp.persampleInvocation) { + if (prog->persampleInvocation) { bld.mkMov(i->getDef(0), masked); } else { bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), ld->getDef(0), masked,