From f2924994bd3e0389446c2ca6bc23d4712a7742de Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Thu, 4 Jul 2019 16:02:09 +0200 Subject: [PATCH] nv50/ir: add nv50_ir_prog_info_out Split out the output relevant fields from the nv50_ir_prog_info struct in order to have a cleaner separation between the input and output of the compilation. Signed-off-by: Karol Herbst Part-of: --- .../drivers/nouveau/codegen/nv50_ir.cpp | 49 ++-- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 9 +- .../drivers/nouveau/codegen/nv50_ir_driver.h | 81 ++++-- .../nouveau/codegen/nv50_ir_from_common.cpp | 14 +- .../nouveau/codegen/nv50_ir_from_common.h | 3 +- .../nouveau/codegen/nv50_ir_from_nir.cpp | 176 ++++++------ .../nouveau/codegen/nv50_ir_from_tgsi.cpp | 261 +++++++++--------- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 6 +- .../nouveau/codegen/nv50_ir_target.cpp | 2 +- .../drivers/nouveau/codegen/nv50_ir_target.h | 5 +- .../nouveau/codegen/nv50_ir_target_nv50.cpp | 17 +- .../nouveau/codegen/nv50_ir_target_nv50.h | 3 +- .../drivers/nouveau/nouveau_compiler.c | 9 +- .../drivers/nouveau/nv50/nv50_program.c | 55 ++-- .../drivers/nouveau/nvc0/nvc0_program.c | 87 +++--- 15 files changed, 412 insertions(+), 365 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index 18419c1d7cb..af261381fbe 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -1240,37 +1240,43 @@ void Program::releaseValue(Value *value) extern "C" { static void -nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) +nv50_ir_init_prog_info(struct nv50_ir_prog_info *info, + struct nv50_ir_prog_info_out *info_out) { + info_out->target = info->target; + info_out->type = info->type; if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) { - info->prop.tp.domain = PIPE_PRIM_MAX; - info->prop.tp.outputPrim = PIPE_PRIM_MAX; + info_out->prop.tp.domain = PIPE_PRIM_MAX; + info_out->prop.tp.outputPrim = PIPE_PRIM_MAX; } if (info->type == PIPE_SHADER_GEOMETRY) { - info->prop.gp.instanceCount = 1; - info->prop.gp.maxVertices = 1; + info_out->prop.gp.instanceCount = 1; + info_out->prop.gp.maxVertices = 1; } if (info->type == PIPE_SHADER_COMPUTE) { info->prop.cp.numThreads[0] = info->prop.cp.numThreads[1] = info->prop.cp.numThreads[2] = 1; } - info->io.instanceId = 0xff; - info->io.vertexId = 0xff; - info->io.edgeFlagIn = 0xff; - info->io.edgeFlagOut = 0xff; - info->io.fragDepth = 0xff; - info->io.sampleMask = 0xff; + info_out->bin.smemSize = info->bin.smemSize; + info_out->io.genUserClip = info->io.genUserClip; + info_out->io.instanceId = 0xff; + info_out->io.vertexId = 0xff; + info_out->io.edgeFlagIn = 0xff; + info_out->io.edgeFlagOut = 0xff; + info_out->io.fragDepth = 0xff; + info_out->io.sampleMask = 0xff; } int -nv50_ir_generate_code(struct nv50_ir_prog_info *info) +nv50_ir_generate_code(struct nv50_ir_prog_info *info, + struct nv50_ir_prog_info_out *info_out) { int ret = 0; nv50_ir::Program::Type type; - nv50_ir_init_prog_info(info); + nv50_ir_init_prog_info(info, info_out); #define PROG_TYPE_CASE(a, b) \ case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break @@ -1298,15 +1304,16 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) return -1; } prog->driver = info; + prog->driver_out = info_out; prog->dbgFlags = info->dbgFlags; prog->optLevel = info->optLevel; switch (info->bin.sourceRep) { case PIPE_SHADER_IR_NIR: - ret = prog->makeFromNIR(info) ? 0 : -2; + ret = prog->makeFromNIR(info, info_out) ? 0 : -2; break; case PIPE_SHADER_IR_TGSI: - ret = prog->makeFromTGSI(info) ? 0 : -2; + ret = prog->makeFromTGSI(info, info_out) ? 0 : -2; break; default: ret = -1; @@ -1317,7 +1324,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) prog->print(); - targ->parseDriverInfo(info); + targ->parseDriverInfo(info, info_out); prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA); prog->convertToSSA(); @@ -1339,7 +1346,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) prog->optimizePostRA(info->optLevel); - if (!prog->emitBinary(info)) { + if (!prog->emitBinary(info_out)) { ret = -5; goto out; } @@ -1347,10 +1354,10 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) out: INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret); - info->bin.maxGPR = prog->maxGPR; - info->bin.code = prog->code; - info->bin.codeSize = prog->binSize; - info->bin.tlsSpace = prog->tlsSize; + info_out->bin.maxGPR = prog->maxGPR; + info_out->bin.code = prog->code; + info_out->bin.codeSize = prog->binSize; + info_out->bin.tlsSpace = prog->tlsSize; delete prog; nv50_ir::Target::destroy(targ); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index f75a221e648..c523dccde75 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -1312,13 +1312,15 @@ public: inline void del(Function *fn, int& id) { allFuncs.remove(id); } inline void add(Value *rval, int& id) { allRValues.insert(rval, id); } - bool makeFromNIR(struct nv50_ir_prog_info *); - bool makeFromTGSI(struct nv50_ir_prog_info *); + bool makeFromNIR(struct nv50_ir_prog_info *, + struct nv50_ir_prog_info_out *); + bool makeFromTGSI(struct nv50_ir_prog_info *, + struct nv50_ir_prog_info_out *); bool convertToSSA(); bool optimizeSSA(int level); bool optimizePostRA(int level); bool registerAllocation(); - bool emitBinary(struct nv50_ir_prog_info *); + bool emitBinary(struct nv50_ir_prog_info_out *); const Target *getTarget() const { return target; } @@ -1355,6 +1357,7 @@ public: void *targetPriv; // e.g. to carry information between passes const struct nv50_ir_prog_info *driver; // for driver configuration + const struct nv50_ir_prog_info_out *driver_out; // for driver configuration void releaseInstruction(Instruction *); void releaseValue(Value *); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 72b9b1c7b20..09079a4fb82 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -79,6 +79,9 @@ struct nv50_ir_prog_symbol #define NVISA_GM200_CHIPSET 0x120 #define NVISA_GV100_CHIPSET 0x140 +struct nv50_ir_prog_info_out; + +/* used for the input data and assignSlot interface */ struct nv50_ir_prog_info { uint16_t target; /* chipset (0x50, 0x84, 0xc0, ...) */ @@ -89,6 +92,51 @@ struct nv50_ir_prog_info uint8_t dbgFlags; bool omitLineNum; /* only used for printing the prog when dbgFlags is set */ + struct { + uint32_t smemSize; /* required shared memory per block */ + uint8_t sourceRep; /* PIPE_SHADER_IR_* */ + const void *source; + } bin; + + union { + struct { + uint32_t inputOffset; /* base address for user args */ + uint32_t gridInfoBase; /* base address for NTID,NCTAID */ + uint16_t numThreads[3]; /* max number of threads */ + } cp; + } prop; + + struct { + int8_t genUserClip; /* request user clip planes for ClipVertex */ + uint8_t auxCBSlot; /* driver constant buffer slot */ + uint16_t ucpBase; /* base address for UCPs */ + uint16_t drawInfoBase; /* base address for draw parameters */ + uint16_t alphaRefBase; /* base address for alpha test values */ + int8_t viewportId; /* output index of ViewportIndex */ + bool mul_zero_wins; /* program wants for x*0 = 0 */ + bool nv50styleSurfaces; /* generate gX[] access for raw buffers */ + uint16_t texBindBase; /* base address for tex handles (nve4) */ + uint16_t fbtexBindBase; /* base address for fbtex handle (nve4) */ + uint16_t suInfoBase; /* base address for surface info (nve4) */ + uint16_t bindlessBase; /* base address for bindless image info (nve4) */ + uint16_t bufInfoBase; /* base address for buffer info */ + uint16_t sampleInfoBase; /* base address for sample positions */ + uint8_t msInfoCBSlot; /* cX[] used for multisample info */ + uint16_t msInfoBase; /* base address for multisample info */ + uint16_t uboInfoBase; /* base address for compute UBOs (gk104+) */ + } io; + + /* driver callback to assign input/output locations */ + int (*assignSlots)(struct nv50_ir_prog_info_out *); +}; + +/* the produced binary with metadata */ +struct nv50_ir_prog_info_out +{ + uint16_t target; /* chipset (0x50, 0x84, 0xc0, ...) */ + + uint8_t type; /* PIPE_SHADER */ + struct { int16_t maxGPR; /* may be -1 if none used */ uint32_t tlsSpace; /* required local memory per thread */ @@ -96,8 +144,6 @@ struct nv50_ir_prog_info uint32_t *code; uint32_t codeSize; uint32_t instructions; - uint8_t sourceRep; /* PIPE_SHADER_IR_* */ - const void *source; void *relocData; void *fixupData; } bin; @@ -131,54 +177,30 @@ struct nv50_ir_prog_info bool writesDepth; bool earlyFragTests; bool postDepthCoverage; - bool separateFragData; bool usesDiscard; bool usesSampleMaskIn; bool readsFramebuffer; bool readsSampleLocations; + bool separateFragData; } fp; - struct { - uint32_t inputOffset; /* base address for user args */ - uint32_t gridInfoBase; /* base address for NTID,NCTAID */ - uint16_t numThreads[3]; /* max number of threads */ - } cp; } prop; - uint8_t numBarriers; - struct { uint8_t clipDistances; /* number of clip distance outputs */ uint8_t cullDistances; /* number of cull distance outputs */ int8_t genUserClip; /* request user clip planes for ClipVertex */ - uint8_t auxCBSlot; /* driver constant buffer slot */ - uint16_t ucpBase; /* base address for UCPs */ - uint16_t drawInfoBase; /* base address for draw parameters */ - uint16_t alphaRefBase; /* base address for alpha test values */ uint8_t instanceId; /* system value index of InstanceID */ uint8_t vertexId; /* system value index of VertexID */ uint8_t edgeFlagIn; uint8_t edgeFlagOut; - int8_t viewportId; /* output index of ViewportIndex */ uint8_t fragDepth; /* output index of FragDepth */ uint8_t sampleMask; /* output index of SampleMask */ uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */ bool fp64; /* program uses fp64 math */ - bool mul_zero_wins; /* program wants for x*0 = 0 */ bool layer_viewport_relative; - bool nv50styleSurfaces; /* generate gX[] access for raw buffers */ - uint16_t texBindBase; /* base address for tex handles (nve4) */ - uint16_t fbtexBindBase; /* base address for fbtex handle (nve4) */ - uint16_t suInfoBase; /* base address for surface info (nve4) */ - uint16_t bindlessBase; /* base address for bindless image info (nve4) */ - uint16_t bufInfoBase; /* base address for buffer info */ - uint16_t sampleInfoBase; /* base address for sample positions */ - uint8_t msInfoCBSlot; /* cX[] used for multisample info */ - uint16_t msInfoBase; /* base address for multisample info */ - uint16_t uboInfoBase; /* base address for compute UBOs (gk104+) */ } io; - /* driver callback to assign input/output locations */ - int (*assignSlots)(struct nv50_ir_prog_info *); + uint8_t numBarriers; void *driverPriv; }; @@ -190,7 +212,8 @@ extern "C" { const struct nir_shader_compiler_options * nv50_ir_nir_shader_compiler_options(int chipset); -extern int nv50_ir_generate_code(struct nv50_ir_prog_info *); +extern int nv50_ir_generate_code(struct nv50_ir_prog_info *, + struct nv50_ir_prog_info_out *); extern void nv50_ir_relocate_code(void *relocData, uint32_t *code, uint32_t codePos, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp index 0ad6087e588..0a70c6881cc 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp @@ -24,9 +24,11 @@ namespace nv50_ir { -ConverterCommon::ConverterCommon(Program *prog, nv50_ir_prog_info *info) +ConverterCommon::ConverterCommon(Program *prog, nv50_ir_prog_info *info, + nv50_ir_prog_info_out *info_out) : BuildUtil(prog), - info(info) {} + info(info), + info_out(info_out) {} ConverterCommon::Subroutine * ConverterCommon::getSubroutine(unsigned ip) @@ -82,7 +84,7 @@ ConverterCommon::handleUserClipPlanes() int n, i, c; for (c = 0; c < 4; ++c) { - for (i = 0; i < info->io.genUserClip; ++i) { + for (i = 0; i < info_out->io.genUserClip; ++i) { Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot, TYPE_F32, info->io.ucpBase + i * 16 + c * 4); Value *ucp = mkLoadv(TYPE_F32, sym, NULL); @@ -93,13 +95,13 @@ ConverterCommon::handleUserClipPlanes() } } - const int first = info->numOutputs - (info->io.genUserClip + 3) / 4; + const int first = info_out->numOutputs - (info_out->io.genUserClip + 3) / 4; - for (i = 0; i < info->io.genUserClip; ++i) { + for (i = 0; i < info_out->io.genUserClip; ++i) { n = i / 4 + first; c = i % 4; Symbol *sym = - mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4); + mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info_out->out[n].slot[c] * 4); mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]); } } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h index e44eea86a90..a144ca23af0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h @@ -28,7 +28,7 @@ namespace nv50_ir { class ConverterCommon : public BuildUtil { public: - ConverterCommon(Program *, nv50_ir_prog_info *); + ConverterCommon(Program *, nv50_ir_prog_info *, nv50_ir_prog_info_out *); protected: struct Subroutine { @@ -50,6 +50,7 @@ protected: } sub; struct nv50_ir_prog_info *info; + struct nv50_ir_prog_info_out *info_out; Value *fragCoord[4]; Value *clipVtx[4]; Value *outBase; // base address of vertex out patch (for TCP) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index b293e5d60b6..b8c77112c01 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -74,7 +74,7 @@ function_temp_type_info(const struct glsl_type *type, unsigned *size, unsigned * class Converter : public ConverterCommon { public: - Converter(Program *, nir_shader *, nv50_ir_prog_info *); + Converter(Program *, nir_shader *, nv50_ir_prog_info *, nv50_ir_prog_info_out *); bool run(); private: @@ -192,8 +192,9 @@ private: }; }; -Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info) - : ConverterCommon(prog, info), +Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info, + nv50_ir_prog_info_out *info_out) + : ConverterCommon(prog, info, info_out), nir(nir), curLoopDepth(0), curIfDepth(0), @@ -975,34 +976,34 @@ bool Converter::assignSlots() { unsigned index; info->io.viewportId = -1; - info->numInputs = 0; - info->numOutputs = 0; - info->numSysVals = 0; + info_out->numInputs = 0; + info_out->numOutputs = 0; + info_out->numSysVals = 0; for (uint8_t i = 0; i < SYSTEM_VALUE_MAX; ++i) { if (!(nir->info.system_values_read & 1ull << i)) continue; - info->sv[info->numSysVals].sn = tgsi_get_sysval_semantic(i); - info->sv[info->numSysVals].si = 0; - info->sv[info->numSysVals].input = 0; // TODO inferSysValDirection(sn); + info_out->sv[info_out->numSysVals].sn = tgsi_get_sysval_semantic(i); + info_out->sv[info_out->numSysVals].si = 0; + info_out->sv[info_out->numSysVals].input = 0; // TODO inferSysValDirection(sn); switch (i) { case SYSTEM_VALUE_INSTANCE_ID: - info->io.instanceId = info->numSysVals; + info_out->io.instanceId = info_out->numSysVals; break; case SYSTEM_VALUE_TESS_LEVEL_INNER: case SYSTEM_VALUE_TESS_LEVEL_OUTER: - info->sv[info->numSysVals].patch = 1; + info_out->sv[info_out->numSysVals].patch = 1; break; case SYSTEM_VALUE_VERTEX_ID: - info->io.vertexId = info->numSysVals; + info_out->io.vertexId = info_out->numSysVals; break; default: break; } - info->numSysVals += 1; + info_out->numSysVals += 1; } if (prog->getType() == Program::TYPE_COMPUTE) @@ -1021,7 +1022,7 @@ bool Converter::assignSlots() { tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true, &name, &index); for (uint16_t i = 0; i < slots; ++i) { - setInterpolate(&info->in[vary + i], var->data.interpolation, + setInterpolate(&info_out->in[vary + i], var->data.interpolation, var->data.centroid | var->data.sample, name); } break; @@ -1034,7 +1035,7 @@ bool Converter::assignSlots() { tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true, &name, &index); if (var->data.patch && name == TGSI_SEMANTIC_PATCH) - info->numPatchConstants = MAX2(info->numPatchConstants, index + slots); + info_out->numPatchConstants = MAX2(info_out->numPatchConstants, index + slots); break; case Program::TYPE_VERTEX: if (slot >= VERT_ATTRIB_GENERIC0) @@ -1042,7 +1043,7 @@ bool Converter::assignSlots() { vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index); switch (name) { case TGSI_SEMANTIC_EDGEFLAG: - info->io.edgeFlagIn = vary; + info_out->io.edgeFlagIn = vary; break; default: break; @@ -1054,14 +1055,14 @@ bool Converter::assignSlots() { } for (uint16_t i = 0u; i < slots; ++i, ++vary) { - nv50_ir_varying *v = &info->in[vary]; + nv50_ir_varying *v = &info_out->in[vary]; v->patch = var->data.patch; v->sn = name; v->si = index + i; v->mask |= getMaskForType(type, i) << var->data.location_frac; } - info->numInputs = std::max(info->numInputs, vary); + info_out->numInputs = std::max(info_out->numInputs, vary); } nir_foreach_shader_out_variable(var, nir) { @@ -1078,22 +1079,20 @@ bool Converter::assignSlots() { switch (name) { case TGSI_SEMANTIC_COLOR: if (!var->data.fb_fetch_output) - info->prop.fp.numColourResults++; - + info_out->prop.fp.numColourResults++; if (var->data.location == FRAG_RESULT_COLOR && nir->info.outputs_written & BITFIELD64_BIT(var->data.location)) - info->prop.fp.separateFragData = true; - + info_out->prop.fp.separateFragData = true; // sometimes we get FRAG_RESULT_DATAX with data.index 0 // sometimes we get FRAG_RESULT_DATA0 with data.index X index = index == 0 ? var->data.index : index; break; case TGSI_SEMANTIC_POSITION: - info->io.fragDepth = vary; - info->prop.fp.writesDepth = true; + info_out->io.fragDepth = vary; + info_out->prop.fp.writesDepth = true; break; case TGSI_SEMANTIC_SAMPLEMASK: - info->io.sampleMask = vary; + info_out->io.sampleMask = vary; break; default: break; @@ -1108,17 +1107,17 @@ bool Converter::assignSlots() { if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER && name != TGSI_SEMANTIC_TESSOUTER) - info->numPatchConstants = MAX2(info->numPatchConstants, index + slots); + info_out->numPatchConstants = MAX2(info_out->numPatchConstants, index + slots); switch (name) { case TGSI_SEMANTIC_CLIPDIST: - info->io.genUserClip = -1; + info_out->io.genUserClip = -1; break; case TGSI_SEMANTIC_CLIPVERTEX: clipVertexOutput = vary; break; case TGSI_SEMANTIC_EDGEFLAG: - info->io.edgeFlagOut = vary; + info_out->io.edgeFlagOut = vary; break; case TGSI_SEMANTIC_POSITION: if (clipVertexOutput < 0) @@ -1134,7 +1133,7 @@ bool Converter::assignSlots() { } for (uint16_t i = 0u; i < slots; ++i, ++vary) { - nv50_ir_varying *v = &info->out[vary]; + nv50_ir_varying *v = &info_out->out[vary]; v->patch = var->data.patch; v->sn = name; v->si = index + i; @@ -1143,24 +1142,24 @@ bool Converter::assignSlots() { if (nir->info.outputs_read & 1ull << slot) v->oread = 1; } - info->numOutputs = std::max(info->numOutputs, vary); + info_out->numOutputs = std::max(info_out->numOutputs, vary); } - if (info->io.genUserClip > 0) { - info->io.clipDistances = info->io.genUserClip; + if (info_out->io.genUserClip > 0) { + info_out->io.clipDistances = info_out->io.genUserClip; - const unsigned int nOut = (info->io.genUserClip + 3) / 4; + const unsigned int nOut = (info_out->io.genUserClip + 3) / 4; for (unsigned int n = 0; n < nOut; ++n) { - unsigned int i = info->numOutputs++; - info->out[i].id = i; - info->out[i].sn = TGSI_SEMANTIC_CLIPDIST; - info->out[i].si = n; - info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4); + unsigned int i = info_out->numOutputs++; + info_out->out[i].id = i; + info_out->out[i].sn = TGSI_SEMANTIC_CLIPDIST; + info_out->out[i].si = n; + info_out->out[i].mask = ((1 << info_out->io.clipDistances) - 1) >> (n * 4); } } - return info->assignSlots(info) == 0; + return info->assignSlots(info_out) == 0; } uint32_t @@ -1210,7 +1209,7 @@ Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot) assert(!input || idx < PIPE_MAX_SHADER_INPUTS); assert(input || idx < PIPE_MAX_SHADER_OUTPUTS); - const nv50_ir_varying *vary = input ? info->in : info->out; + const nv50_ir_varying *vary = input ? info_out->in : info_out->out; return vary[idx].slot[slot] * 4; } @@ -1268,63 +1267,63 @@ Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op, } mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0, - split[0])->perPatch = info->out[idx].patch; + split[0])->perPatch = info_out->out[idx].patch; mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0, - split[1])->perPatch = info->out[idx].patch; + split[1])->perPatch = info_out->out[idx].patch; } else { if (op == OP_EXPORT) src = mkMov(getSSA(size), src, ty)->getDef(0); mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0, - src)->perPatch = info->out[idx].patch; + src)->perPatch = info_out->out[idx].patch; } } bool Converter::parseNIR() { - info->bin.tlsSpace = nir->scratch_size; - info->io.clipDistances = nir->info.clip_distance_array_size; - info->io.cullDistances = nir->info.cull_distance_array_size; - info->io.layer_viewport_relative = nir->info.layer_viewport_relative; + info_out->bin.tlsSpace = nir->scratch_size; + info_out->io.clipDistances = nir->info.clip_distance_array_size; + info_out->io.cullDistances = nir->info.cull_distance_array_size; + info_out->io.layer_viewport_relative = nir->info.layer_viewport_relative; switch(prog->getType()) { case Program::TYPE_COMPUTE: info->prop.cp.numThreads[0] = nir->info.cs.local_size[0]; info->prop.cp.numThreads[1] = nir->info.cs.local_size[1]; info->prop.cp.numThreads[2] = nir->info.cs.local_size[2]; - info->bin.smemSize += nir->info.cs.shared_size; + info_out->bin.smemSize += nir->info.cs.shared_size; break; case Program::TYPE_FRAGMENT: - info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests; + info_out->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests; prog->persampleInvocation = (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) || (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS); - info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage; - info->prop.fp.readsSampleLocations = + info_out->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage; + info_out->prop.fp.readsSampleLocations = (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS); - info->prop.fp.usesDiscard = nir->info.fs.uses_discard || nir->info.fs.uses_demote; - info->prop.fp.usesSampleMaskIn = + info_out->prop.fp.usesDiscard = nir->info.fs.uses_discard || nir->info.fs.uses_demote; + info_out->prop.fp.usesSampleMaskIn = !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN); break; case Program::TYPE_GEOMETRY: - info->prop.gp.instanceCount = nir->info.gs.invocations; - info->prop.gp.maxVertices = nir->info.gs.vertices_out; - info->prop.gp.outputPrim = nir->info.gs.output_primitive; + info_out->prop.gp.instanceCount = nir->info.gs.invocations; + info_out->prop.gp.maxVertices = nir->info.gs.vertices_out; + info_out->prop.gp.outputPrim = nir->info.gs.output_primitive; break; case Program::TYPE_TESSELLATION_CONTROL: case Program::TYPE_TESSELLATION_EVAL: if (nir->info.tess.primitive_mode == GL_ISOLINES) - info->prop.tp.domain = GL_LINES; + info_out->prop.tp.domain = GL_LINES; else - info->prop.tp.domain = nir->info.tess.primitive_mode; - info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out; - info->prop.tp.outputPrim = + info_out->prop.tp.domain = nir->info.tess.primitive_mode; + info_out->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out; + info_out->prop.tp.outputPrim = nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES; - info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3; - info->prop.tp.winding = !nir->info.tess.ccw; + info_out->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3; + info_out->prop.tp.winding = !nir->info.tess.ccw; break; case Program::TYPE_VERTEX: - info->prop.vp.usesDrawParameters = + info_out->prop.vp.usesDrawParameters = (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) || (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) || (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)); @@ -1350,7 +1349,7 @@ Converter::visit(nir_function *function) setPosition(entry, true); - if (info->io.genUserClip > 0) { + if (info_out->io.genUserClip > 0) { for (int c = 0; c < 4; ++c) clipVtx[c] = getScratch(); } @@ -1383,7 +1382,7 @@ Converter::visit(nir_function *function) if ((prog->getType() == Program::TYPE_VERTEX || prog->getType() == Program::TYPE_TESSELLATION_EVAL) - && info->io.genUserClip > 0) + && info_out->io.genUserClip > 0) handleUserClipPlanes(); // TODO: for non main function this needs to be a OP_RETURN @@ -1646,7 +1645,7 @@ Converter::visit(nir_intrinsic_instr *insn) Value *src = getSrc(&insn->src[0], i); switch (prog->getType()) { case Program::TYPE_FRAGMENT: { - if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) { + if (info_out->out[idx].sn == TGSI_SEMANTIC_POSITION) { // TGSI uses a different interface than NIR, TGSI stores that // value in the z component, NIR in X offset += 2; @@ -1657,7 +1656,7 @@ Converter::visit(nir_intrinsic_instr *insn) case Program::TYPE_GEOMETRY: case Program::TYPE_TESSELLATION_EVAL: case Program::TYPE_VERTEX: { - if (info->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) { + if (info_out->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) { mkMov(clipVtx[i], src); src = clipVtx[i]; } @@ -1704,7 +1703,7 @@ Converter::visit(nir_intrinsic_instr *insn) texi->tex.r = 0xffff; texi->tex.s = 0xffff; - info->prop.fp.readsFramebuffer = true; + info_out->prop.fp.readsFramebuffer = true; break; } @@ -1715,7 +1714,7 @@ Converter::visit(nir_intrinsic_instr *insn) uint32_t mode = 0; uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect); - nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx]; + nv50_ir_varying& vary = input ? info_out->in[idx] : info_out->out[idx]; // see load_barycentric_* handling if (prog->getType() == Program::TYPE_FRAGMENT) { @@ -1806,7 +1805,7 @@ Converter::visit(nir_intrinsic_instr *insn) } else if (op == nir_intrinsic_load_barycentric_pixel) { mode = NV50_IR_INTERP_DEFAULT; } else if (op == nir_intrinsic_load_barycentric_at_sample) { - info->prop.fp.readsSampleLocations = true; + info_out->prop.fp.readsSampleLocations = true; mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET; mode = NV50_IR_INTERP_OFFSET; } else { @@ -1940,7 +1939,7 @@ Converter::visit(nir_intrinsic_instr *insn) for (uint8_t i = 0u; i < dest_components; ++i) { uint32_t address = getSlotAddress(insn, idx, i); loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0, - indirectOffset, vtxBase, info->in[idx].patch); + indirectOffset, vtxBase, info_out->in[idx].patch); } break; } @@ -1963,12 +1962,12 @@ Converter::visit(nir_intrinsic_instr *insn) for (uint8_t i = 0u; i < dest_components; ++i) { uint32_t address = getSlotAddress(insn, idx, i); loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0, - indirectOffset, vtxBase, info->in[idx].patch); + indirectOffset, vtxBase, info_out->in[idx].patch); } break; } case nir_intrinsic_emit_vertex: { - if (info->io.genUserClip > 0) + if (info_out->io.genUserClip > 0) handleUserClipPlanes(); uint32_t idx = nir_intrinsic_stream_id(insn); mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1; @@ -2020,7 +2019,7 @@ Converter::visit(nir_intrinsic_instr *insn) mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i)) ->setIndirect(0, 1, indirectBuffer); } - info->io.globalAccess |= 0x2; + info_out->io.globalAccess |= 0x2; break; } case nir_intrinsic_load_ssbo: { @@ -2035,7 +2034,7 @@ Converter::visit(nir_intrinsic_instr *insn) loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i, indirectOffset, indirectBuffer); - info->io.globalAccess |= 0x1; + info_out->io.globalAccess |= 0x1; break; } case nir_intrinsic_shared_atomic_add: @@ -2086,7 +2085,7 @@ Converter::visit(nir_intrinsic_instr *insn) atom->setIndirect(0, 1, indirectBuffer); atom->subOp = getSubOp(op); - info->io.globalAccess |= 0x2; + info_out->io.globalAccess |= 0x2; break; } case nir_intrinsic_global_atomic_add: @@ -2112,7 +2111,7 @@ Converter::visit(nir_intrinsic_instr *insn) atom->setIndirect(0, 0, address); atom->subOp = getSubOp(op); - info->io.globalAccess |= 0x2; + info_out->io.globalAccess |= 0x2; break; } case nir_intrinsic_bindless_image_atomic_add: @@ -2182,7 +2181,7 @@ Converter::visit(nir_intrinsic_instr *insn) case nir_intrinsic_bindless_image_atomic_dec_wrap: ty = getDType(insn); bindless = true; - info->io.globalAccess |= 0x2; + info_out->io.globalAccess |= 0x2; mask = 0x1; break; case nir_intrinsic_image_atomic_add: @@ -2199,25 +2198,27 @@ Converter::visit(nir_intrinsic_instr *insn) case nir_intrinsic_image_atomic_dec_wrap: ty = getDType(insn); bindless = false; - info->io.globalAccess |= 0x2; + info_out->io.globalAccess |= 0x2; mask = 0x1; break; case nir_intrinsic_bindless_image_load: case nir_intrinsic_image_load: ty = TYPE_U32; bindless = op == nir_intrinsic_bindless_image_load; - info->io.globalAccess |= 0x1; + info_out->io.globalAccess |= 0x1; lod_src = 4; break; case nir_intrinsic_bindless_image_store: case nir_intrinsic_image_store: ty = TYPE_U32; + mask = 0xf; bindless = op == nir_intrinsic_bindless_image_store; - info->io.globalAccess |= 0x2; + info_out->io.globalAccess |= 0x2; lod_src = 5; mask = 0xf; break; case nir_intrinsic_bindless_image_samples: + mask = 0x8; case nir_intrinsic_image_samples: ty = TYPE_U32; bindless = op == nir_intrinsic_bindless_image_samples; @@ -2301,7 +2302,7 @@ Converter::visit(nir_intrinsic_instr *insn) } case nir_intrinsic_control_barrier: { // TODO: add flag to shader_info - info->numBarriers = 1; + info_out->numBarriers = 1; Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0)); bar->fixed = 1; bar->subOp = NV50_IR_SUBOP_BAR_SYNC; @@ -2336,7 +2337,7 @@ Converter::visit(nir_intrinsic_instr *insn) for (auto i = 0u; i < dest_components; ++i) loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset); - info->io.globalAccess |= 0x1; + info_out->io.globalAccess |= 0x1; break; } case nir_intrinsic_store_global: { @@ -2360,7 +2361,7 @@ Converter::visit(nir_intrinsic_instr *insn) } } - info->io.globalAccess |= 0x2; + info_out->io.globalAccess |= 0x2; break; } default: @@ -3181,16 +3182,17 @@ Converter::run() namespace nv50_ir { bool -Program::makeFromNIR(struct nv50_ir_prog_info *info) +Program::makeFromNIR(struct nv50_ir_prog_info *info, + struct nv50_ir_prog_info_out *info_out) { nir_shader *nir = (nir_shader*)info->bin.source; - Converter converter(this, nir, info); + Converter converter(this, nir, info, info_out); bool result = converter.run(); if (!result) return result; LoweringHelper lowering; lowering.run(this); - tlsSize = info->bin.tlsSpace; + tlsSize = info_out->bin.tlsSpace; return result; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 5d396035c1a..f799a4d5659 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -986,7 +986,7 @@ bool Instruction::checkDstSrcAliasing() const class Source { public: - Source(struct nv50_ir_prog_info *, nv50_ir::Program *); + Source(struct nv50_ir_prog_info *, struct nv50_ir_prog_info_out *, nv50_ir::Program *); ~Source(); public: @@ -998,6 +998,7 @@ public: struct tgsi_full_instruction *insns; const struct tgsi_token *tokens; struct nv50_ir_prog_info *info; + struct nv50_ir_prog_info_out *info_out; nv50_ir::DynArray tempArrays; nv50_ir::DynArray immdArrays; @@ -1053,8 +1054,9 @@ private: inline bool isEdgeFlagPassthrough(const Instruction&) const; }; -Source::Source(struct nv50_ir_prog_info *info, nv50_ir::Program *prog) -: info(info), prog(prog) +Source::Source(struct nv50_ir_prog_info *info, struct nv50_ir_prog_info_out *info_out, + nv50_ir::Program *prog) +: info(info), info_out(info_out), prog(prog) { tokens = (const struct tgsi_token *)info->bin.source; @@ -1094,16 +1096,16 @@ bool Source::scanSource() memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1); bufferAtomics.resize(scan.file_max[TGSI_FILE_BUFFER] + 1); - info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1; - info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1; - info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1; + info_out->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1; + info_out->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1; + info_out->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1; if (info->type == PIPE_SHADER_FRAGMENT) { - info->prop.fp.writesDepth = scan.writes_z; - info->prop.fp.usesDiscard = scan.uses_kill || info->io.alphaRefBase; + info_out->prop.fp.writesDepth = scan.writes_z; + info_out->prop.fp.usesDiscard = scan.uses_kill || info->io.alphaRefBase; } else if (info->type == PIPE_SHADER_GEOMETRY) { - info->prop.gp.instanceCount = 1; // default value + info_out->prop.gp.instanceCount = 1; // default value } info->io.viewportId = -1; @@ -1141,40 +1143,40 @@ bool Source::scanSource() indirectTempOffsets.insert(std::make_pair(*it, tempBase - info.first)); tempBase += info.second; } - info->bin.tlsSpace += tempBase * 16; + info_out->bin.tlsSpace += tempBase * 16; } - if (info->io.genUserClip > 0) { - info->io.clipDistances = info->io.genUserClip; + if (info_out->io.genUserClip > 0) { + info_out->io.clipDistances = info_out->io.genUserClip; - const unsigned int nOut = (info->io.genUserClip + 3) / 4; + const unsigned int nOut = (info_out->io.genUserClip + 3) / 4; for (unsigned int n = 0; n < nOut; ++n) { - unsigned int i = info->numOutputs++; - info->out[i].id = i; - info->out[i].sn = TGSI_SEMANTIC_CLIPDIST; - info->out[i].si = n; - info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4); + unsigned int i = info_out->numOutputs++; + info_out->out[i].id = i; + info_out->out[i].sn = TGSI_SEMANTIC_CLIPDIST; + info_out->out[i].si = n; + info_out->out[i].mask = ((1 << info_out->io.clipDistances) - 1) >> (n * 4); } } - return info->assignSlots(info) == 0; + return info->assignSlots(info_out) == 0; } void Source::scanProperty(const struct tgsi_full_property *prop) { switch (prop->Property.PropertyName) { case TGSI_PROPERTY_GS_OUTPUT_PRIM: - info->prop.gp.outputPrim = prop->u[0].Data; + info_out->prop.gp.outputPrim = prop->u[0].Data; break; case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: - info->prop.gp.maxVertices = prop->u[0].Data; + info_out->prop.gp.maxVertices = prop->u[0].Data; break; case TGSI_PROPERTY_GS_INVOCATIONS: - info->prop.gp.instanceCount = prop->u[0].Data; + info_out->prop.gp.instanceCount = prop->u[0].Data; break; case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: - info->prop.fp.separateFragData = true; + info_out->prop.fp.separateFragData = true; break; case TGSI_PROPERTY_FS_COORD_ORIGIN: case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: @@ -1183,25 +1185,25 @@ void Source::scanProperty(const struct tgsi_full_property *prop) // we don't care break; case TGSI_PROPERTY_VS_PROHIBIT_UCPS: - info->io.genUserClip = -1; + info_out->io.genUserClip = -1; break; case TGSI_PROPERTY_TCS_VERTICES_OUT: - info->prop.tp.outputPatchSize = prop->u[0].Data; + info_out->prop.tp.outputPatchSize = prop->u[0].Data; break; case TGSI_PROPERTY_TES_PRIM_MODE: - info->prop.tp.domain = prop->u[0].Data; + info_out->prop.tp.domain = prop->u[0].Data; break; case TGSI_PROPERTY_TES_SPACING: - info->prop.tp.partitioning = prop->u[0].Data; + info_out->prop.tp.partitioning = prop->u[0].Data; break; case TGSI_PROPERTY_TES_VERTEX_ORDER_CW: - info->prop.tp.winding = prop->u[0].Data; + info_out->prop.tp.winding = prop->u[0].Data; break; case TGSI_PROPERTY_TES_POINT_MODE: if (prop->u[0].Data) - info->prop.tp.outputPrim = PIPE_PRIM_POINTS; + info_out->prop.tp.outputPrim = PIPE_PRIM_POINTS; else - info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */ + info_out->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */ break; case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH: info->prop.cp.numThreads[0] = prop->u[0].Data; @@ -1213,25 +1215,25 @@ void Source::scanProperty(const struct tgsi_full_property *prop) info->prop.cp.numThreads[2] = prop->u[0].Data; break; case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: - info->io.clipDistances = prop->u[0].Data; + info_out->io.clipDistances = prop->u[0].Data; break; case TGSI_PROPERTY_NUM_CULLDIST_ENABLED: - info->io.cullDistances = prop->u[0].Data; + info_out->io.cullDistances = prop->u[0].Data; break; case TGSI_PROPERTY_NEXT_SHADER: /* Do not need to know the next shader stage. */ break; case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL: - info->prop.fp.earlyFragTests = prop->u[0].Data; + info_out->prop.fp.earlyFragTests = prop->u[0].Data; break; case TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE: - info->prop.fp.postDepthCoverage = prop->u[0].Data; + info_out->prop.fp.postDepthCoverage = prop->u[0].Data; break; case TGSI_PROPERTY_MUL_ZERO_WINS: info->io.mul_zero_wins = prop->u[0].Data; break; case TGSI_PROPERTY_LAYER_VIEWPORT_RELATIVE: - info->io.layer_viewport_relative = prop->u[0].Data; + info_out->io.layer_viewport_relative = prop->u[0].Data; break; default: INFO("unhandled TGSI property %d\n", prop->Property.PropertyName); @@ -1294,37 +1296,37 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) if (info->type == PIPE_SHADER_VERTEX) { // all vertex attributes are equal for (i = first; i <= last; ++i) { - info->in[i].sn = TGSI_SEMANTIC_GENERIC; - info->in[i].si = i; + info_out->in[i].sn = TGSI_SEMANTIC_GENERIC; + info_out->in[i].si = i; } } else { for (i = first; i <= last; ++i, ++si) { - info->in[i].id = i; - info->in[i].sn = sn; - info->in[i].si = si; + info_out->in[i].id = i; + info_out->in[i].sn = sn; + info_out->in[i].si = si; if (info->type == PIPE_SHADER_FRAGMENT) { // translate interpolation mode switch (decl->Interp.Interpolate) { case TGSI_INTERPOLATE_CONSTANT: - info->in[i].flat = 1; + info_out->in[i].flat = 1; break; case TGSI_INTERPOLATE_COLOR: - info->in[i].sc = 1; + info_out->in[i].sc = 1; break; case TGSI_INTERPOLATE_LINEAR: - info->in[i].linear = 1; + info_out->in[i].linear = 1; break; default: break; } if (decl->Interp.Location) - info->in[i].centroid = 1; + info_out->in[i].centroid = 1; } if (sn == TGSI_SEMANTIC_PATCH) - info->in[i].patch = 1; + info_out->in[i].patch = 1; if (sn == TGSI_SEMANTIC_PATCH) - info->numPatchConstants = MAX2(info->numPatchConstants, si + 1); + info_out->numPatchConstants = MAX2(info_out->numPatchConstants, si + 1); } } break; @@ -1333,77 +1335,77 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) switch (sn) { case TGSI_SEMANTIC_POSITION: if (info->type == PIPE_SHADER_FRAGMENT) - info->io.fragDepth = i; + info_out->io.fragDepth = i; else if (clipVertexOutput < 0) clipVertexOutput = i; break; case TGSI_SEMANTIC_COLOR: if (info->type == PIPE_SHADER_FRAGMENT) - info->prop.fp.numColourResults++; + info_out->prop.fp.numColourResults++; break; case TGSI_SEMANTIC_EDGEFLAG: - info->io.edgeFlagOut = i; + info_out->io.edgeFlagOut = i; break; case TGSI_SEMANTIC_CLIPVERTEX: clipVertexOutput = i; break; case TGSI_SEMANTIC_CLIPDIST: - info->io.genUserClip = -1; + info_out->io.genUserClip = -1; break; case TGSI_SEMANTIC_SAMPLEMASK: - info->io.sampleMask = i; + info_out->io.sampleMask = i; break; case TGSI_SEMANTIC_VIEWPORT_INDEX: info->io.viewportId = i; break; case TGSI_SEMANTIC_PATCH: - info->numPatchConstants = MAX2(info->numPatchConstants, si + 1); + info_out->numPatchConstants = MAX2(info_out->numPatchConstants, si + 1); /* fallthrough */ case TGSI_SEMANTIC_TESSOUTER: case TGSI_SEMANTIC_TESSINNER: - info->out[i].patch = 1; + info_out->out[i].patch = 1; break; default: break; } - info->out[i].id = i; - info->out[i].sn = sn; - info->out[i].si = si; + info_out->out[i].id = i; + info_out->out[i].sn = sn; + info_out->out[i].si = si; } break; case TGSI_FILE_SYSTEM_VALUE: switch (sn) { case TGSI_SEMANTIC_INSTANCEID: - info->io.instanceId = first; + info_out->io.instanceId = first; break; case TGSI_SEMANTIC_VERTEXID: - info->io.vertexId = first; + info_out->io.vertexId = first; break; case TGSI_SEMANTIC_BASEVERTEX: case TGSI_SEMANTIC_BASEINSTANCE: case TGSI_SEMANTIC_DRAWID: - info->prop.vp.usesDrawParameters = true; + info_out->prop.vp.usesDrawParameters = true; break; case TGSI_SEMANTIC_SAMPLEID: case TGSI_SEMANTIC_SAMPLEPOS: prog->persampleInvocation = true; break; case TGSI_SEMANTIC_SAMPLEMASK: - info->prop.fp.usesSampleMaskIn = true; + info_out->prop.fp.usesSampleMaskIn = true; break; default: break; } for (i = first; i <= last; ++i, ++si) { - info->sv[i].sn = sn; - info->sv[i].si = si; - info->sv[i].input = inferSysValDirection(sn); + info_out->sv[i].sn = sn; + info_out->sv[i].si = si; + info_out->sv[i].input = inferSysValDirection(sn); switch (sn) { case TGSI_SEMANTIC_TESSOUTER: case TGSI_SEMANTIC_TESSINNER: - info->sv[i].patch = 1; + info_out->sv[i].patch = 1; break; } } @@ -1453,7 +1455,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const { return insn.getOpcode() == TGSI_OPCODE_MOV && - insn.getDst(0).getIndex(0) == info->io.edgeFlagOut && + insn.getDst(0).getIndex(0) == info_out->io.edgeFlagOut && insn.getSrc(0).getFile() == TGSI_FILE_INPUT; } @@ -1469,22 +1471,22 @@ void Source::scanInstructionSrc(const Instruction& insn, if (src.isIndirect(0)) { // We don't know which one is accessed, just mark everything for // reading. This is an extremely unlikely occurrence. - for (unsigned i = 0; i < info->numOutputs; ++i) - info->out[i].oread = 1; + for (unsigned i = 0; i < info_out->numOutputs; ++i) + info_out->out[i].oread = 1; } else { - info->out[src.getIndex(0)].oread = 1; + info_out->out[src.getIndex(0)].oread = 1; } } if (src.getFile() == TGSI_FILE_SYSTEM_VALUE) { - if (info->sv[src.getIndex(0)].sn == TGSI_SEMANTIC_SAMPLEPOS) - info->prop.fp.readsSampleLocations = true; + if (info_out->sv[src.getIndex(0)].sn == TGSI_SEMANTIC_SAMPLEPOS) + info_out->prop.fp.readsSampleLocations = true; } if (src.getFile() != TGSI_FILE_INPUT) return; if (src.isIndirect(0)) { - for (unsigned i = 0; i < info->numInputs; ++i) - info->in[i].mask = 0xf; + for (unsigned i = 0; i < info_out->numInputs; ++i) + info_out->in[i].mask = 0xf; } else { const int i = src.getIndex(0); for (unsigned c = 0; c < 4; ++c) { @@ -1492,16 +1494,16 @@ void Source::scanInstructionSrc(const Instruction& insn, continue; int k = src.getSwizzle(c); if (k <= TGSI_SWIZZLE_W) - info->in[i].mask |= 1 << k; + info_out->in[i].mask |= 1 << k; } - switch (info->in[i].sn) { + switch (info_out->in[i].sn) { case TGSI_SEMANTIC_PSIZE: case TGSI_SEMANTIC_PRIMID: case TGSI_SEMANTIC_FOG: - info->in[i].mask &= 0x1; + info_out->in[i].mask &= 0x1; break; case TGSI_SEMANTIC_PCOORD: - info->in[i].mask &= 0x3; + info_out->in[i].mask &= 0x3; break; default: break; @@ -1514,48 +1516,47 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) Instruction insn(inst); if (insn.getOpcode() == TGSI_OPCODE_BARRIER) - info->numBarriers = 1; + info_out->numBarriers = 1; if (insn.getOpcode() == TGSI_OPCODE_FBFETCH) - info->prop.fp.readsFramebuffer = true; + info_out->prop.fp.readsFramebuffer = true; if (insn.getOpcode() == TGSI_OPCODE_INTERP_SAMPLE) - info->prop.fp.readsSampleLocations = true; + info_out->prop.fp.readsSampleLocations = true; if (insn.getOpcode() == TGSI_OPCODE_DEMOTE) - info->prop.fp.usesDiscard = true; + info_out->prop.fp.usesDiscard = true; if (insn.dstCount()) { Instruction::DstRegister dst = insn.getDst(0); if (insn.getOpcode() == TGSI_OPCODE_STORE && dst.getFile() != TGSI_FILE_MEMORY) { - info->io.globalAccess |= 0x2; + info_out->io.globalAccess |= 0x2; if (dst.getFile() == TGSI_FILE_INPUT) { // TODO: Handle indirect somehow? const int i = dst.getIndex(0); - info->in[i].mask |= 1; + info_out->in[i].mask |= 1; } } if (dst.getFile() == TGSI_FILE_OUTPUT) { if (dst.isIndirect(0)) - for (unsigned i = 0; i < info->numOutputs; ++i) - info->out[i].mask = 0xf; + for (unsigned i = 0; i < info_out->numOutputs; ++i) + info_out->out[i].mask = 0xf; else - info->out[dst.getIndex(0)].mask |= dst.getMask(); + info_out->out[dst.getIndex(0)].mask |= dst.getMask(); - if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE || - info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID || - info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_LAYER || - info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_VIEWPORT_INDEX || - info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_VIEWPORT_MASK || - info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG) - info->out[dst.getIndex(0)].mask &= 1; + if (info_out->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE || + info_out->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID || + info_out->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_LAYER || + info_out->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_VIEWPORT_INDEX || + info_out->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG) + info_out->out[dst.getIndex(0)].mask &= 1; if (isEdgeFlagPassthrough(insn)) - info->io.edgeFlagIn = insn.getSrc(0).getIndex(0); + info_out->io.edgeFlagIn = insn.getSrc(0).getIndex(0); } else if (dst.getFile() == TGSI_FILE_TEMPORARY) { if (dst.isIndirect(0)) @@ -1565,7 +1566,7 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) dst.getFile() == TGSI_FILE_IMAGE || (dst.getFile() == TGSI_FILE_MEMORY && memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) { - info->io.globalAccess |= 0x2; + info_out->io.globalAccess |= 0x2; } } @@ -1588,7 +1589,7 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) case TGSI_OPCODE_ATOMDEC_WRAP: case TGSI_OPCODE_ATOMINC_WRAP: case TGSI_OPCODE_LOAD: - info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? + info_out->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? 0x1 : 0x2; break; } @@ -1633,7 +1634,7 @@ using namespace nv50_ir; class Converter : public ConverterCommon { public: - Converter(Program *, const tgsi::Source *); + Converter(Program *, const tgsi::Source *, nv50_ir_prog_info_out *); ~Converter(); bool run(); @@ -1792,13 +1793,13 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) if (idx >= 0) { if (sym->reg.file == FILE_SHADER_INPUT) - sym->setOffset(info->in[idx].slot[c] * 4); + sym->setOffset(info_out->in[idx].slot[c] * 4); else if (sym->reg.file == FILE_SHADER_OUTPUT) - sym->setOffset(info->out[idx].slot[c] * 4); + sym->setOffset(info_out->out[idx].slot[c] * 4); else if (sym->reg.file == FILE_SYSTEM_VALUE) - sym->setSV(tgsi::translateSysVal(info->sv[idx].sn), c); + sym->setSV(tgsi::translateSysVal(info_out->sv[idx].sn), c); else sym->setOffset(address); } else { @@ -1813,7 +1814,7 @@ Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr) operation op; // XXX: no way to know interpolation mode if we don't know what's accessed - const uint8_t mode = translateInterpMode(&info->in[ptr ? 0 : + const uint8_t mode = translateInterpMode(&info_out->in[ptr ? 0 : src.getIndex(0)], op); Instruction *insn = new_Instruction(func, op, TYPE_F32); @@ -2025,12 +2026,12 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) case TGSI_FILE_INPUT: if (prog->getType() == Program::TYPE_FRAGMENT) { // don't load masked inputs, won't be assigned a slot - if (!ptr && !(info->in[idx].mask & (1 << swz))) + if (!ptr && !(info_out->in[idx].mask & (1 << swz))) return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f); return interpolate(src, c, shiftAddress(ptr)); } else if (prog->getType() == Program::TYPE_GEOMETRY) { - if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_PRIMID) + if (!ptr && info_out->in[idx].sn == TGSI_SEMANTIC_PRIMID) return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0)); // XXX: This is going to be a problem with scalar arrays, i.e. when // we cannot assume that the address is given in units of vec4. @@ -2041,24 +2042,24 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) return mkLoadv(TYPE_U32, srcToSym(src, c), ptr); } ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr)); - ld->perPatch = info->in[idx].patch; + ld->perPatch = info_out->in[idx].patch; return ld->getDef(0); case TGSI_FILE_OUTPUT: assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL); ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr)); - ld->perPatch = info->out[idx].patch; + ld->perPatch = info_out->out[idx].patch; return ld->getDef(0); case TGSI_FILE_SYSTEM_VALUE: assert(!ptr); - if (info->sv[idx].sn == TGSI_SEMANTIC_THREAD_ID && + if (info_out->sv[idx].sn == TGSI_SEMANTIC_THREAD_ID && info->prop.cp.numThreads[swz] == 1) return loadImm(NULL, 0u); - if (isSubGroupMask(info->sv[idx].sn) && swz > 0) + if (isSubGroupMask(info_out->sv[idx].sn) && swz > 0) return loadImm(NULL, 0u); - if (info->sv[idx].sn == TGSI_SEMANTIC_SUBGROUP_SIZE) + if (info_out->sv[idx].sn == TGSI_SEMANTIC_SUBGROUP_SIZE) return loadImm(NULL, 32u); ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c)); - ld->perPatch = info->sv[idx].patch; + ld->perPatch = info_out->sv[idx].patch; return ld->getDef(0); case TGSI_FILE_TEMPORARY: { int arrayid = src.getArrayId(); @@ -2113,7 +2114,7 @@ Converter::storeDst(int d, int c, Value *val) if (dst.isIndirect(0)) ptr = shiftAddress(fetchSrc(dst.getIndirect(0), 0, NULL)); - if (info->io.genUserClip > 0 && + if (info_out->io.genUserClip > 0 && dst.getFile() == TGSI_FILE_OUTPUT && !dst.isIndirect(0) && dst.getIndex(0) == code->clipVertexOutput) { mkMov(clipVtx[c], val); @@ -2137,16 +2138,16 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c, } else if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) { - if (ptr || (info->out[idx].mask & (1 << c))) { + if (ptr || (info_out->out[idx].mask & (1 << c))) { /* Save the viewport index into a scratch register so that it can be exported at EMIT time */ - if (info->out[idx].sn == TGSI_SEMANTIC_VIEWPORT_INDEX && + if (info_out->out[idx].sn == TGSI_SEMANTIC_VIEWPORT_INDEX && prog->getType() == Program::TYPE_GEOMETRY && viewport != NULL) mkOp1(OP_MOV, TYPE_U32, viewport, val); else mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val)->perPatch = - info->out[idx].patch; + info_out->out[idx].patch; } } else if (f == TGSI_FILE_TEMPORARY || @@ -3033,7 +3034,7 @@ Converter::handleINTERP(Value *dst[4]) // We can assume that the fixed index will point to an input of the same // interpolation type in case of an indirect. // TODO: Make use of ArrayID. - linear = info->in[src.getIndex(0)].linear; + linear = info_out->in[src.getIndex(0)].linear; if (linear) { op = OP_LINTERP; mode = NV50_IR_INTERP_LINEAR; @@ -3526,11 +3527,11 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) /* export the saved viewport index */ if (viewport != NULL) { Symbol *vpSym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_U32, - info->out[info->io.viewportId].slot[0] * 4); + info_out->out[info->io.viewportId].slot[0] * 4); mkStore(OP_EXPORT, TYPE_U32, vpSym, NULL, viewport); } /* handle user clip planes for each emitted vertex */ - if (info->io.genUserClip > 0) + if (info_out->io.genUserClip > 0) handleUserClipPlanes(); /* fallthrough */ case TGSI_OPCODE_ENDPRIM: @@ -3539,7 +3540,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) unsigned int stream = tgsi.getSrc(0).getValueU32(0, code->immd.data); if (stream && op == OP_RESTART) break; - if (info->prop.gp.maxVertices == 0) + if (info_out->prop.gp.maxVertices == 0) break; src0 = mkImm(stream); mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1; @@ -3708,7 +3709,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) exportOutputs(); if ((prog->getType() == Program::TYPE_VERTEX || prog->getType() == Program::TYPE_TESSELLATION_EVAL - ) && info->io.genUserClip > 0) + ) && info_out->io.genUserClip > 0) handleUserClipPlanes(); mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1; } @@ -4154,9 +4155,9 @@ void Converter::exportOutputs() { if (info->io.alphaRefBase) { - for (unsigned int i = 0; i < info->numOutputs; ++i) { - if (info->out[i].sn != TGSI_SEMANTIC_COLOR || - info->out[i].si != 0) + for (unsigned int i = 0; i < info_out->numOutputs; ++i) { + if (info_out->out[i].sn != TGSI_SEMANTIC_COLOR || + info_out->out[i].si != 0) continue; const unsigned int c = 3; if (!oData.exists(sub.cur->values, i, c)) @@ -4175,15 +4176,15 @@ Converter::exportOutputs() } } - for (unsigned int i = 0; i < info->numOutputs; ++i) { + for (unsigned int i = 0; i < info_out->numOutputs; ++i) { for (unsigned int c = 0; c < 4; ++c) { if (!oData.exists(sub.cur->values, i, c)) continue; Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, - info->out[i].slot[c] * 4); + info_out->out[i].slot[c] * 4); Value *val = oData.load(sub.cur->values, i, c, NULL); if (val) { - if (info->out[i].sn == TGSI_SEMANTIC_POSITION) + if (info_out->out[i].sn == TGSI_SEMANTIC_POSITION) mkOp1(OP_SAT, TYPE_F32, val, val); mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val); } @@ -4191,7 +4192,8 @@ Converter::exportOutputs() } } -Converter::Converter(Program *ir, const tgsi::Source *code) : ConverterCommon(ir, code->info), +Converter::Converter(Program *ir, const tgsi::Source *code, nv50_ir_prog_info_out *info_out) +: ConverterCommon(ir, code->info, info_out), code(code), tgsi(NULL), tData(this), lData(this), aData(this), oData(this) @@ -4292,7 +4294,7 @@ Converter::run() setPosition(entry, true); sub.cur = getSubroutine(prog->main); - if (info->io.genUserClip > 0) { + if (info_out->io.genUserClip > 0) { for (int c = 0; c < 4; ++c) clipVtx[c] = getScratch(); } @@ -4335,14 +4337,15 @@ Converter::run() namespace nv50_ir { bool -Program::makeFromTGSI(struct nv50_ir_prog_info *info) +Program::makeFromTGSI(struct nv50_ir_prog_info *info, + struct nv50_ir_prog_info_out *info_out) { - tgsi::Source src(info, this); + tgsi::Source src(info, info_out, this); if (!src.scanSource()) return false; - tlsSize = info->bin.tlsSpace; + tlsSize = info_out->bin.tlsSpace; - Converter builder(this, &src); + Converter builder(this, &src, info_out); return builder.run(); } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 1bcfb054ffa..52106cb931f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -2822,7 +2822,7 @@ NVC0LoweringPass::readTessCoord(LValue *dst, int c) y = dst; } else { assert(c == 2); - if (prog->driver->prop.tp.domain != PIPE_PRIM_TRIANGLES) { + if (prog->driver_out->prop.tp.domain != PIPE_PRIM_TRIANGLES) { bld.mkMov(dst, bld.loadImm(NULL, 0)); return; } @@ -2931,7 +2931,7 @@ NVC0LoweringPass::handleRDSV(Instruction *i) ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; Value *offset = calculateSampleOffset(sampleID); - assert(prog->driver->prop.fp.readsSampleLocations); + assert(prog->driver_out->prop.fp.readsSampleLocations); if (targ->getChipset() >= NVISA_GM200_CHIPSET) { bld.mkLoad(TYPE_F32, @@ -3166,7 +3166,7 @@ NVC0LoweringPass::handlePIXLD(Instruction *i) if (targ->getChipset() < NVISA_GM200_CHIPSET) return; - assert(prog->driver->prop.fp.readsSampleLocations); + assert(prog->driver_out->prop.fp.readsSampleLocations); bld.mkLoad(TYPE_F32, i->getDef(0), diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index f2b3c37db7b..b827819075a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -352,7 +352,7 @@ CodeEmitter::prepareEmission(BasicBlock *bb) } bool -Program::emitBinary(struct nv50_ir_prog_info *info) +Program::emitBinary(struct nv50_ir_prog_info_out *info) { CodeEmitter *emit = target->getCodeEmitter(progType); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h index 0f7db116577..db8ae0ae99e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h @@ -173,8 +173,9 @@ public: // The address chosen is supplied to the relocation routine. virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const = 0; - virtual void parseDriverInfo(const struct nv50_ir_prog_info *info) { - if (info->type == PIPE_SHADER_COMPUTE) { + virtual void parseDriverInfo(const struct nv50_ir_prog_info *info, + const struct nv50_ir_prog_info_out *info_out) { + if (info_out->type == PIPE_SHADER_COMPUTE) { threads = info->prop.cp.numThreads[0] * info->prop.cp.numThreads[1] * info->prop.cp.numThreads[2]; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp index 0f041bac3c8..d0865b82990 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp @@ -584,15 +584,16 @@ recordLocation(uint16_t *locs, uint8_t *masks, } void -TargetNV50::parseDriverInfo(const struct nv50_ir_prog_info *info) +TargetNV50::parseDriverInfo(const struct nv50_ir_prog_info *info, + const struct nv50_ir_prog_info_out *info_out) { unsigned int i; - for (i = 0; i < info->numOutputs; ++i) - recordLocation(sysvalLocation, NULL, &info->out[i]); - for (i = 0; i < info->numInputs; ++i) - recordLocation(sysvalLocation, &wposMask, &info->in[i]); - for (i = 0; i < info->numSysVals; ++i) - recordLocation(sysvalLocation, NULL, &info->sv[i]); + for (i = 0; i < info_out->numOutputs; ++i) + recordLocation(sysvalLocation, NULL, &info_out->out[i]); + for (i = 0; i < info_out->numInputs; ++i) + recordLocation(sysvalLocation, &wposMask, &info_out->in[i]); + for (i = 0; i < info_out->numSysVals; ++i) + recordLocation(sysvalLocation, NULL, &info_out->sv[i]); if (sysvalLocation[SV_POSITION] >= 0x200) { // not assigned by driver, but we need it internally @@ -600,7 +601,7 @@ TargetNV50::parseDriverInfo(const struct nv50_ir_prog_info *info) sysvalLocation[SV_POSITION] = 0; } - Target::parseDriverInfo(info); + Target::parseDriverInfo(info, info_out); } } // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.h index 00243d7958e..caf66b269db 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.h @@ -42,7 +42,8 @@ public: virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const; - virtual void parseDriverInfo(const struct nv50_ir_prog_info *); + virtual void parseDriverInfo(const struct nv50_ir_prog_info *, + const struct nv50_ir_prog_info_out *); virtual bool insnCanLoad(const Instruction *insn, int s, const Instruction *ld) const; diff --git a/src/gallium/drivers/nouveau/nouveau_compiler.c b/src/gallium/drivers/nouveau/nouveau_compiler.c index f2c175661be..cf4b0995145 100644 --- a/src/gallium/drivers/nouveau/nouveau_compiler.c +++ b/src/gallium/drivers/nouveau/nouveau_compiler.c @@ -75,7 +75,7 @@ nv30_codegen(int chipset, int type, struct tgsi_token tokens[], } static int -dummy_assign_slots(struct nv50_ir_prog_info *info) +dummy_assign_slots(struct nv50_ir_prog_info_out *info) { unsigned i, n, c; @@ -105,6 +105,7 @@ static int nouveau_codegen(int chipset, int type, struct tgsi_token tokens[], unsigned *size, unsigned **code) { struct nv50_ir_prog_info info = {0}; + struct nv50_ir_prog_info_out info_out = {0}; int ret; info.type = type; @@ -124,14 +125,14 @@ nouveau_codegen(int chipset, int type, struct tgsi_token tokens[], info.dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); info.omitLineNum = debug_get_num_option("NV50_PROG_DEBUG_OMIT_LINENUM", 0); - ret = nv50_ir_generate_code(&info); + ret = nv50_ir_generate_code(&info, &info_out); if (ret) { _debug_printf("Error compiling program: %d\n", ret); return ret; } - *size = info.bin.codeSize; - *code = info.bin.code; + *size = info_out.bin.codeSize; + *code = info_out.bin.code; return 0; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index 58c0463f1a2..21ffd951c26 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -38,7 +38,7 @@ bitcount4(const uint32_t val) } static int -nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) +nv50_vertprog_assign_slots(struct nv50_ir_prog_info_out *info) { struct nv50_program *prog = (struct nv50_program *)info->driverPriv; unsigned i, n, c; @@ -140,7 +140,7 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) } static int -nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info) +nv50_fragprog_assign_slots(struct nv50_ir_prog_info_out *info) { struct nv50_program *prog = (struct nv50_program *)info->driverPriv; unsigned i, n, m, c; @@ -249,7 +249,7 @@ nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info) } static int -nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info) +nv50_program_assign_varying_slots(struct nv50_ir_prog_info_out *info) { switch (info->type) { case PIPE_SHADER_VERTEX: @@ -266,7 +266,7 @@ nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info) } static struct nv50_stream_output_state * -nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info, +nv50_program_create_strmout_state(const struct nv50_ir_prog_info_out *info, const struct pipe_stream_output_info *pso) { struct nv50_stream_output_state *so; @@ -326,6 +326,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, struct pipe_debug_callback *debug) { struct nv50_ir_prog_info *info; + struct nv50_ir_prog_info_out info_out = {}; int i, ret; const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80; @@ -376,7 +377,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, if (prog->type == PIPE_SHADER_COMPUTE) info->prop.cp.inputOffset = 0x10; - info->driverPriv = prog; + info_out.driverPriv = prog; #ifndef NDEBUG info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); @@ -386,39 +387,39 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, info->optLevel = 3; #endif - ret = nv50_ir_generate_code(info); + ret = nv50_ir_generate_code(info, &info_out); if (ret) { NOUVEAU_ERR("shader translation failed: %i\n", ret); goto out; } - prog->code = info->bin.code; - prog->code_size = info->bin.codeSize; - prog->fixups = info->bin.relocData; - prog->interps = info->bin.fixupData; - prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1); - prog->tls_space = info->bin.tlsSpace; - prog->cp.smem_size = info->bin.smemSize; + prog->code = info_out.bin.code; + prog->code_size = info_out.bin.codeSize; + prog->fixups = info_out.bin.relocData; + prog->interps = info_out.bin.fixupData; + prog->max_gpr = MAX2(4, (info_out.bin.maxGPR >> 1) + 1); + prog->tls_space = info_out.bin.tlsSpace; + prog->cp.smem_size = info_out.bin.smemSize; prog->mul_zero_wins = info->io.mul_zero_wins; - prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS; + prog->vp.need_vertex_id = info_out.io.vertexId < PIPE_MAX_SHADER_INPUTS; - prog->vp.clip_enable = (1 << info->io.clipDistances) - 1; + prog->vp.clip_enable = (1 << info_out.io.clipDistances) - 1; prog->vp.cull_enable = - ((1 << info->io.cullDistances) - 1) << info->io.clipDistances; + ((1 << info_out.io.cullDistances) - 1) << info_out.io.clipDistances; prog->vp.clip_mode = 0; - for (i = 0; i < info->io.cullDistances; ++i) - prog->vp.clip_mode |= 1 << ((info->io.clipDistances + i) * 4); + for (i = 0; i < info_out.io.cullDistances; ++i) + prog->vp.clip_mode |= 1 << ((info_out.io.clipDistances + i) * 4); if (prog->type == PIPE_SHADER_FRAGMENT) { - if (info->prop.fp.writesDepth) { + if (info_out.prop.fp.writesDepth) { prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z; prog->fp.flags[1] = 0x11; } - if (info->prop.fp.usesDiscard) + if (info_out.prop.fp.usesDiscard) prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL; } else if (prog->type == PIPE_SHADER_GEOMETRY) { - switch (info->prop.gp.outputPrim) { + switch (info_out.prop.gp.outputPrim) { case PIPE_PRIM_LINE_STRIP: prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP; break; @@ -427,22 +428,22 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, break; case PIPE_PRIM_POINTS: default: - assert(info->prop.gp.outputPrim == PIPE_PRIM_POINTS); + assert(info_out.prop.gp.outputPrim == PIPE_PRIM_POINTS); prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS; break; } - prog->gp.vert_count = CLAMP(info->prop.gp.maxVertices, 1, 1024); + prog->gp.vert_count = CLAMP(info_out.prop.gp.maxVertices, 1, 1024); } if (prog->pipe.stream_output.num_outputs) - prog->so = nv50_program_create_strmout_state(info, + prog->so = nv50_program_create_strmout_state(&info_out, &prog->pipe.stream_output); pipe_debug_message(debug, SHADER_INFO, "type: %d, local: %d, shared: %d, gpr: %d, inst: %d, bytes: %d", - prog->type, info->bin.tlsSpace, info->bin.smemSize, - prog->max_gpr, info->bin.instructions, - info->bin.codeSize); + prog->type, info_out.bin.tlsSpace, info_out.bin.smemSize, + prog->max_gpr, info_out.bin.instructions, + info_out.bin.codeSize); out: if (info->bin.sourceRep == PIPE_SHADER_IR_NIR) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 59fa2359cb4..087493fe2e2 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -90,7 +90,7 @@ nvc0_shader_output_address(unsigned sn, unsigned si) } static int -nvc0_vp_assign_input_slots(struct nv50_ir_prog_info *info) +nvc0_vp_assign_input_slots(struct nv50_ir_prog_info_out *info) { unsigned i, c, n; @@ -114,7 +114,7 @@ nvc0_vp_assign_input_slots(struct nv50_ir_prog_info *info) } static int -nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info) +nvc0_sp_assign_input_slots(struct nv50_ir_prog_info_out *info) { unsigned offset; unsigned i, c; @@ -130,7 +130,7 @@ nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info) } static int -nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info) +nvc0_fp_assign_output_slots(struct nv50_ir_prog_info_out *info) { unsigned count = info->prop.fp.numColourResults * 4; unsigned i, c; @@ -163,7 +163,7 @@ nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info) } static int -nvc0_sp_assign_output_slots(struct nv50_ir_prog_info *info) +nvc0_sp_assign_output_slots(struct nv50_ir_prog_info_out *info) { unsigned offset; unsigned i, c; @@ -179,7 +179,7 @@ nvc0_sp_assign_output_slots(struct nv50_ir_prog_info *info) } static int -nvc0_program_assign_varying_slots(struct nv50_ir_prog_info *info) +nvc0_program_assign_varying_slots(struct nv50_ir_prog_info_out *info) { int ret; @@ -211,7 +211,7 @@ nvc0_vtgp_hdr_update_oread(struct nvc0_program *vp, uint8_t slot) /* Common part of header generation for VP, TCP, TEP and GP. */ static int -nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) +nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info_out *info) { unsigned i, c, a; @@ -278,7 +278,7 @@ nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) } static int -nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) +nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info_out *info) { vp->hdr[0] = 0x20061 | (1 << 10); vp->hdr[4] = 0xff000; @@ -287,7 +287,7 @@ nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) } static void -nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info) +nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info_out *info) { if (info->prop.tp.outputPrim == PIPE_PRIM_MAX) { tp->tp.tess_mode = ~0; @@ -341,7 +341,7 @@ nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info) } static int -nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info *info) +nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info_out *info) { unsigned opcs = 6; /* output patch constants (at least the TessFactors) */ @@ -372,7 +372,7 @@ nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info *info) } static int -nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info) +nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info_out *info) { tep->hdr[0] = 0x20061 | (3 << 10); tep->hdr[4] = 0xff000; @@ -387,7 +387,7 @@ nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info) } static int -nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info *info) +nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info_out *info) { gp->hdr[0] = 0x20061 | (4 << 10); @@ -432,7 +432,7 @@ nvc0_hdr_interp_mode(const struct nv50_ir_varying *var) } static int -nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) +nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info_out *info) { unsigned i, c, a, m; @@ -509,7 +509,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) } static struct nvc0_transform_feedback_state * -nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info, +nvc0_program_create_tfb_state(const struct nv50_ir_prog_info_out *info, const struct pipe_stream_output_info *pso) { struct nvc0_transform_feedback_state *tfb; @@ -573,6 +573,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, struct pipe_debug_callback *debug) { struct nv50_ir_prog_info *info; + struct nv50_ir_prog_info_out info_out = {}; int ret; info = CALLOC_STRUCT(nv50_ir_prog_info); @@ -633,45 +634,45 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, info->assignSlots = nvc0_program_assign_varying_slots; - ret = nv50_ir_generate_code(info); + ret = nv50_ir_generate_code(info, &info_out); if (ret) { NOUVEAU_ERR("shader translation failed: %i\n", ret); goto out; } - prog->code = info->bin.code; - prog->code_size = info->bin.codeSize; - prog->relocs = info->bin.relocData; - prog->fixups = info->bin.fixupData; - if (info->target >= NVISA_GV100_CHIPSET) - prog->num_gprs = MIN2(info->bin.maxGPR + 5, 256); //XXX: why? + prog->code = info_out.bin.code; + prog->code_size = info_out.bin.codeSize; + prog->relocs = info_out.bin.relocData; + prog->fixups = info_out.bin.fixupData; + if (info_out.target >= NVISA_GV100_CHIPSET) + prog->num_gprs = MIN2(info_out.bin.maxGPR + 5, 256); //XXX: why? else - prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1)); - prog->cp.smem_size = info->bin.smemSize; - prog->num_barriers = info->numBarriers; + prog->num_gprs = MAX2(4, (info_out.bin.maxGPR + 1)); + prog->cp.smem_size = info_out.bin.smemSize; + prog->num_barriers = info_out.numBarriers; - prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS; - prog->vp.need_draw_parameters = info->prop.vp.usesDrawParameters; + prog->vp.need_vertex_id = info_out.io.vertexId < PIPE_MAX_SHADER_INPUTS; + prog->vp.need_draw_parameters = info_out.prop.vp.usesDrawParameters; - if (info->io.edgeFlagOut < PIPE_MAX_ATTRIBS) - info->out[info->io.edgeFlagOut].mask = 0; /* for headergen */ - prog->vp.edgeflag = info->io.edgeFlagIn; + if (info_out.io.edgeFlagOut < PIPE_MAX_ATTRIBS) + info_out.out[info_out.io.edgeFlagOut].mask = 0; /* for headergen */ + prog->vp.edgeflag = info_out.io.edgeFlagIn; switch (prog->type) { case PIPE_SHADER_VERTEX: - ret = nvc0_vp_gen_header(prog, info); + ret = nvc0_vp_gen_header(prog, &info_out); break; case PIPE_SHADER_TESS_CTRL: - ret = nvc0_tcp_gen_header(prog, info); + ret = nvc0_tcp_gen_header(prog, &info_out); break; case PIPE_SHADER_TESS_EVAL: - ret = nvc0_tep_gen_header(prog, info); + ret = nvc0_tep_gen_header(prog, &info_out); break; case PIPE_SHADER_GEOMETRY: - ret = nvc0_gp_gen_header(prog, info); + ret = nvc0_gp_gen_header(prog, &info_out); break; case PIPE_SHADER_FRAGMENT: - ret = nvc0_fp_gen_header(prog, info); + ret = nvc0_fp_gen_header(prog, &info_out); break; case PIPE_SHADER_COMPUTE: break; @@ -683,10 +684,10 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, if (ret) goto out; - if (info->bin.tlsSpace) { - assert(info->bin.tlsSpace < (1 << 24)); + if (info_out.bin.tlsSpace) { + assert(info_out.bin.tlsSpace < (1 << 24)); prog->hdr[0] |= 1 << 26; - prog->hdr[1] |= align(info->bin.tlsSpace, 0x10); /* l[] size */ + prog->hdr[1] |= align(info_out.bin.tlsSpace, 0x10); /* l[] size */ prog->need_tls = true; } /* TODO: factor 2 only needed where joinat/precont is used, @@ -698,22 +699,22 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, prog->need_tls = true; } */ - if (info->io.globalAccess) + if (info_out.io.globalAccess) prog->hdr[0] |= 1 << 26; - if (info->io.globalAccess & 0x2) + if (info_out.io.globalAccess & 0x2) prog->hdr[0] |= 1 << 16; - if (info->io.fp64) + if (info_out.io.fp64) prog->hdr[0] |= 1 << 27; if (prog->pipe.stream_output.num_outputs) - prog->tfb = nvc0_program_create_tfb_state(info, + prog->tfb = nvc0_program_create_tfb_state(&info_out, &prog->pipe.stream_output); pipe_debug_message(debug, SHADER_INFO, "type: %d, local: %d, shared: %d, gpr: %d, inst: %d, bytes: %d", - prog->type, info->bin.tlsSpace, info->bin.smemSize, - prog->num_gprs, info->bin.instructions, - info->bin.codeSize); + prog->type, info_out.bin.tlsSpace, info_out.bin.smemSize, + prog->num_gprs, info_out.bin.instructions, + info_out.bin.codeSize); #ifndef NDEBUG if (debug_get_option("NV50_PROG_CHIPSET", NULL) && info->dbgFlags) -- 2.30.2