From: Christoph Bumiller Date: Fri, 22 Feb 2013 23:39:23 +0000 (+0100) Subject: nv50/ir/tgsi: handle TGSI_OPCODE_LOAD,STORE X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c2dfcd7f0eaf4eda375eb320e0e87793a80ef92d;p=mesa.git nv50/ir/tgsi: handle TGSI_OPCODE_LOAD,STORE Squashed and (heavily) modified original patches by Francisco Jerez: nv50/ir/tgsi: Implement resource LOAD/STORE (wip). nv50/ir/tgsi: Emit SUST/SULD for surface access, and add CB LOAD/STORE support nv50/ir/tgsi: Fix/clean up the LOAD/STORE handling code. Left out for now: nv50/ir/tgsi: Resource indirect indexing Treating raw, read-only surfaces as constant buffers (CBs) was removed because CBs are limited to a size of 64 KiB which isn't desireable, and because this decision should probably be made by the state tracker. If we used a number of CB slots for surfaces, it might find that we cannot accomodate the advertised limit. --- diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h index bdea48bbdf3..dd7ff90de96 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h @@ -871,9 +871,9 @@ public: struct { Target target; - uint8_t r; + uint16_t r; + uint16_t s; int8_t rIndirectSrc; - uint8_t s; int8_t sIndirectSrc; uint8_t mask; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp index 4448e8299d2..0af2c61b3e5 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp @@ -240,15 +240,17 @@ BuildUtil::mkCmp(operation op, CondCode cc, DataType ty, Value *dst, return insn; } -Instruction * -BuildUtil::mkTex(operation op, TexTarget targ, uint8_t tic, uint8_t tsc, - Value **def, Value **src) +TexInstruction * +BuildUtil::mkTex(operation op, TexTarget targ, + uint16_t tic, uint16_t tsc, + const std::vector &def, + const std::vector &src) { TexInstruction *tex = new_TexInstruction(func, op); - for (int d = 0; d < 4 && def[d]; ++d) + for (size_t d = 0; d < def.size() && def[d]; ++d) tex->setDef(d, def[d]); - for (int s = 0; s < 4 && src[s]; ++s) + for (size_t s = 0; s < src.size() && src[s]; ++s) tex->setSrc(s, src[s]); tex->setTexture(targ, tic, tsc); diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h index 963c3505083..f48dbc21168 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h @@ -75,8 +75,10 @@ public: CmpInstruction *mkCmp(operation, CondCode, DataType, Value *, Value *, Value *, Value * = NULL); - Instruction *mkTex(operation, TexTarget, uint8_t tic, uint8_t tsc, - Value **def, Value **src); + TexInstruction *mkTex(operation, TexTarget, + uint16_t tic, uint16_t tsc, + const std::vector &def, + const std::vector &src); Instruction *mkQuadop(uint8_t qop, Value *, uint8_t l, Value *, Value *); FlowInstruction *mkFlow(operation, void *target, CondCode, Value *pred); diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h index deee60cd6a7..933a5e106ac 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h @@ -161,6 +161,10 @@ struct nv50_ir_prog_info boolean separateFragData; boolean usesDiscard; } fp; + struct { + uint32_t inputOffset; /* base address for user args */ + uint32_t sharedOffset; /* reserved space in s[] */ + } cp; } prop; struct { @@ -179,6 +183,7 @@ struct nv50_ir_prog_info uint8_t sampleMask; /* output index of SampleMask */ uint8_t backFaceColor[2]; /* input/output indices of back face colour */ uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */ + boolean nv50styleSurfaces; /* generate gX[] access for raw buffers */ uint8_t resInfoCBSlot; /* cX[] used for tex handles, surface info */ uint16_t texBindBase; /* base address for tex handles (nve4) */ uint16_t suInfoBase; /* base address for surface info (nve4) */ diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp index 69c05c1464c..afbabfde23d 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp @@ -559,7 +559,6 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(USLT, SET); NV50_IR_OPCODE_CASE(USNE, SET); - NV50_IR_OPCODE_CASE(LOAD, TXF); NV50_IR_OPCODE_CASE(SAMPLE, TEX); NV50_IR_OPCODE_CASE(SAMPLE_B, TXB); NV50_IR_OPCODE_CASE(SAMPLE_C, TEX); @@ -620,8 +619,17 @@ public: int clipVertexOutput; - uint8_t *samplerViewTargets; // TGSI_TEXTURE_* - unsigned samplerViewCount; + struct TextureView { + uint8_t target; // TGSI_TEXTURE_* + }; + std::vector textureViews; + + struct Resource { + uint8_t target; // TGSI_TEXTURE_* + bool raw; + uint8_t slot; // $surface index + }; + std::vector resources; private: int inferSysValDirection(unsigned sn) const; @@ -640,8 +648,6 @@ Source::Source(struct nv50_ir_prog_info *prog) : info(prog) if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) tgsi_dump(tokens, 0); - samplerViewTargets = NULL; - mainTempsInLMem = FALSE; } @@ -654,9 +660,6 @@ Source::~Source() FREE(info->immd.data); if (info->immd.type) FREE(info->immd.type); - - if (samplerViewTargets) - delete[] samplerViewTargets; } bool Source::scanSource() @@ -673,8 +676,8 @@ bool Source::scanSource() clipVertexOutput = -1; - samplerViewCount = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; - samplerViewTargets = new uint8_t[samplerViewCount]; + textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1); + resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1); info->immd.bufSize = 0; tempArrayCount = 0; @@ -899,9 +902,16 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) info->sv[i].input = inferSysValDirection(sn); } break; + case TGSI_FILE_RESOURCE: + for (i = first; i <= last; ++i) { + resources[i].target = decl->Resource.Resource; + resources[i].raw = decl->Resource.Raw; + resources[i].slot = i; + } + break; case TGSI_FILE_SAMPLER_VIEW: for (i = first; i <= last; ++i) - samplerViewTargets[i] = decl->SamplerView.Resource; + textureViews[i].target = decl->SamplerView.Resource; break; case TGSI_FILE_IMMEDIATE_ARRAY: { @@ -997,9 +1007,15 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) for (unsigned s = 0; s < insn.srcCount(); ++s) { Instruction::SrcRegister src = insn.getSrc(s); - if (src.getFile() == TGSI_FILE_TEMPORARY) + if (src.getFile() == TGSI_FILE_TEMPORARY) { if (src.isIndirect(0)) mainTempsInLMem = TRUE; + } else + if (src.getFile() == TGSI_FILE_RESOURCE) { + if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL) + info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? + 0x1 : 0x2; + } if (src.getFile() != TGSI_FILE_INPUT) continue; unsigned mask = insn.srcMask(s); @@ -1025,13 +1041,16 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) nv50_ir::TexInstruction::Target Instruction::getTexture(const tgsi::Source *code, int s) const { + // XXX: indirect access + unsigned int r; + switch (getSrc(s).getFile()) { - case TGSI_FILE_SAMPLER_VIEW: { - // XXX: indirect access - unsigned int r = getSrc(s).getIndex(0); - assert(r < code->samplerViewCount); - return translateTexture(code->samplerViewTargets[r]); - } + case TGSI_FILE_RESOURCE: + r = getSrc(s).getIndex(0); + return translateTexture(code->resources.at(r).target); + case TGSI_FILE_SAMPLER_VIEW: + r = getSrc(s).getIndex(0); + return translateTexture(code->textureViews.at(r).target); default: return translateTexture(insn->Texture.Texture); } @@ -1091,6 +1110,12 @@ private: void handleLIT(Value *dst0[4]); void handleUserClipPlanes(); + Symbol *getResourceBase(int r); + void getResourceCoords(std::vector&, int r, int s); + + void handleLOAD(Value *dst0[4]); + void handleSTORE(); + Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr); void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork); @@ -1710,6 +1735,236 @@ Converter::handleLIT(Value *dst0[4]) } } +static inline bool +isResourceSpecial(const int r) +{ + return (r == TGSI_RESOURCE_GLOBAL || + r == TGSI_RESOURCE_LOCAL || + r == TGSI_RESOURCE_PRIVATE || + r == TGSI_RESOURCE_INPUT); +} + +static inline bool +isResourceRaw(const struct tgsi::Source *code, const int r) +{ + return isResourceSpecial(r) || code->resources[r].raw; +} + +static inline nv50_ir::TexTarget +getResourceTarget(const struct tgsi::Source *code, int r) +{ + if (isResourceSpecial(r)) + return nv50_ir::TEX_TARGET_BUFFER; + return tgsi::translateTexture(code->resources.at(r).target); +} + +Symbol * +Converter::getResourceBase(const int r) +{ + Symbol *sym = NULL; + + switch (r) { + case TGSI_RESOURCE_GLOBAL: + sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15); + break; + case TGSI_RESOURCE_LOCAL: + assert(prog->getType() == Program::TYPE_COMPUTE); + sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32, + info->prop.cp.sharedOffset); + break; + case TGSI_RESOURCE_PRIVATE: + sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32, + info->bin.tlsSpace); + break; + case TGSI_RESOURCE_INPUT: + assert(prog->getType() == Program::TYPE_COMPUTE); + sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32, + info->prop.cp.inputOffset); + break; + default: + sym = new_Symbol(prog, + nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot); + break; + } + return sym; +} + +void +Converter::getResourceCoords(std::vector &coords, int r, int s) +{ + const int arg = + TexInstruction::Target(getResourceTarget(code, r)).getArgCount(); + + for (int c = 0; c < arg; ++c) + coords.push_back(fetchSrc(s, c)); + + // NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk + if (r == TGSI_RESOURCE_LOCAL || + r == TGSI_RESOURCE_PRIVATE || + r == TGSI_RESOURCE_INPUT) + coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS), + coords[0]); +} + +static inline int +partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask) +{ + int n = 0; + + while (mask) { + if (mask & 1) { + size[n]++; + } else { + if (size[n]) + comp[n = 1] = size[0] + 1; + else + comp[n]++; + } + mask >>= 1; + } + if (size[0] == 3) { + n = 1; + size[0] = (comp[0] == 1) ? 1 : 2; + size[1] = 3 - size[0]; + comp[1] = comp[0] + size[0]; + } + return n + 1; +} + +// For raw loads, granularity is 4 byte. +// Usage of the texture read mask on OP_SULDP is not allowed. +void +Converter::handleLOAD(Value *dst0[4]) +{ + const int r = tgsi.getSrc(0).getIndex(0); + int c; + std::vector off, src, ldv, def; + + getResourceCoords(off, r, 1); + + if (isResourceRaw(code, r)) { + uint8_t mask = 0; + uint8_t comp[2] = { 0, 0 }; + uint8_t size[2] = { 0, 0 }; + + Symbol *base = getResourceBase(r); + + // determine the base and size of the at most 2 load ops + for (c = 0; c < 4; ++c) + if (!tgsi.getDst(0).isMasked(c)) + mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X); + + int n = partitionLoadStore(comp, size, mask); + + src = off; + + def.resize(4); // index by component, the ones we need will be non-NULL + for (c = 0; c < 4; ++c) { + if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c)) + def[c] = dst0[c]; + else + if (mask & (1 << c)) + def[c] = getScratch(); + } + + const bool useLd = isResourceSpecial(r) || + (info->io.nv50styleSurfaces && + code->resources[r].target == TGSI_TEXTURE_BUFFER); + + for (int i = 0; i < n; ++i) { + ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]); + + if (comp[i]) // adjust x component of source address if necessary + src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file), + off[0], mkImm(comp[i] * 4)); + else + src[0] = off[0]; + + if (useLd) { + Instruction *ld = + mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]); + for (size_t c = 1; c < ldv.size(); ++c) + ld->setDef(c, ldv[c]); + } else { + mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot, + 0, ldv, src)->dType = typeOfSize(size[i] * 4); + } + } + } else { + def.resize(4); + for (c = 0; c < 4; ++c) { + if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c)) + def[c] = getScratch(); + else + def[c] = dst0[c]; + } + + mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0, + def, off); + } + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + if (dst0[c] != def[c]) + mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]); +} + +// For formatted stores, the write mask on OP_SUSTP can be used. +// Raw stores have to be split. +void +Converter::handleSTORE() +{ + const int r = tgsi.getDst(0).getIndex(0); + int c; + std::vector off, src, dummy; + + getResourceCoords(off, r, 0); + src = off; + const int s = src.size(); + + if (isResourceRaw(code, r)) { + uint8_t comp[2] = { 0, 0 }; + uint8_t size[2] = { 0, 0 }; + + int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask()); + + Symbol *base = getResourceBase(r); + + const bool useSt = isResourceSpecial(r) || + (info->io.nv50styleSurfaces && + code->resources[r].target == TGSI_TEXTURE_BUFFER); + + for (int i = 0; i < n; ++i) { + if (comp[i]) // adjust x component of source address if necessary + src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file), + off[0], mkImm(comp[i] * 4)); + else + src[0] = off[0]; + + const DataType stTy = typeOfSize(size[i] * 4); + + if (useSt) { + Instruction *st = + mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i])); + for (c = 1; c < size[i]; ++c) + st->setSrc(1 + c, fetchSrc(1, comp[i] + c)); + st->setIndirect(0, 0, src[0]); + } else { + // attach values to be stored + src.resize(s + size[i]); + for (c = 0; c < size[i]; ++c) + src[s + c] = fetchSrc(1, comp[i] + c); + mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot, + 0, dummy, src)->setType(stTy); + } + } + } else { + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + src.push_back(fetchSrc(1, c)); + + mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0, + dummy, src)->tex.mask = tgsi.getDst(0).getMask(); + } +} + Converter::Subroutine * Converter::getSubroutine(unsigned ip) { @@ -2072,7 +2327,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40); break; case TGSI_OPCODE_TXF: - case TGSI_OPCODE_LOAD: handleTXF(dst0, 1); break; case TGSI_OPCODE_TXQ: @@ -2257,6 +2511,12 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) ERROR("switch/case opcode encountered, should have been lowered\n"); abort(); break; + case TGSI_OPCODE_LOAD: + handleLOAD(dst0); + break; + case TGSI_OPCODE_STORE: + handleSTORE(); + break; default: ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode()); assert(0); diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp index db1306151ea..83f7201fc35 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp @@ -594,11 +594,13 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i) i->setSrc(arg - 1, src); if (i->tex.target.isCube()) { - Value *acube[4], *a2d[4]; + std::vector acube, a2d; int c; + acube.resize(4); for (c = 0; c < 4; ++c) acube[c] = i->getSrc(c); + a2d.resize(4); for (c = 0; c < 3; ++c) a2d[c] = new_LValue(func, FILE_GPR); a2d[3] = NULL; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp index 00a80544c17..a2b61104f6f 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp @@ -532,7 +532,9 @@ void Instruction::print() const if (perPatch) PRINT("patch "); if (asTex()) - PRINT("%s ", asTex()->tex.target.getName()); + PRINT("%s %s$r%u $s%u %s", asTex()->tex.target.getName(), + colour[TXT_MEM], asTex()->tex.r, asTex()->tex.s, + colour[TXT_INSN]); if (postFactor) PRINT("x2^%i ", postFactor); PRINT("%s%s", dnz ? "dnz " : (ftz ? "ftz " : ""), DataTypeStr[dType]);