From c3083c70823d8f4bfdabcf38f98dfebeff0a2b2b Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 2 Jan 2016 22:25:31 -0500 Subject: [PATCH] nv50/ir: add support for BUFFER accesses This largely leaves the existing image logic alone. When image support is added this will have to be harmonized somehow. Signed-off-by: Ilia Mirkin --- .../drivers/nouveau/codegen/nv50_ir.cpp | 3 + .../nouveau/codegen/nv50_ir_from_tgsi.cpp | 98 +++++++++++++++++-- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 50 +++++++++- .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 2 + .../nouveau/codegen/nv50_ir_peephole.cpp | 2 +- .../drivers/nouveau/nvc0/nvc0_program.c | 3 + 6 files changed, 147 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index 6ad9dd31681..75e5fd843c2 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -393,6 +393,9 @@ ImmediateValue::isInteger(const int i) const case TYPE_S32: case TYPE_U32: return reg.data.s32 == i; // as if ... + case TYPE_S64: + case TYPE_U64: + return reg.data.s64 == i; // as if ... case TYPE_F32: return reg.data.f32 == static_cast(i); case TYPE_F64: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 9c4a38f291b..81cd4e9584f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -38,6 +38,7 @@ static nv50_ir::operation translateOpcode(uint opcode); static nv50_ir::DataFile translateFile(uint file); static nv50_ir::TexTarget translateTexture(uint texTarg); static nv50_ir::SVSemantic translateSysVal(uint sysval); +static nv50_ir::CacheMode translateCacheMode(uint qualifier); class Instruction { @@ -213,6 +214,12 @@ public: nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const; + nv50_ir::CacheMode getCacheMode() const { + if (!insn->Instruction.Memory) + return nv50_ir::CACHE_CA; + return translateCacheMode(insn->Memory.Qualifier); + } + inline uint getLabel() { return insn->Label.Label; } unsigned getSaturate() const { return insn->Instruction.Saturate; } @@ -366,7 +373,7 @@ static nv50_ir::DataFile translateFile(uint file) case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE; case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; - //case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL; + case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL; case TGSI_FILE_SAMPLER: case TGSI_FILE_NULL: default: @@ -436,6 +443,15 @@ static nv50_ir::TexTarget translateTexture(uint tex) } } +static nv50_ir::CacheMode translateCacheMode(uint qualifier) +{ + if (qualifier & TGSI_MEMORY_VOLATILE) + return nv50_ir::CACHE_CV; + if (qualifier & TGSI_MEMORY_COHERENT) + return nv50_ir::CACHE_CG; + return nv50_ir::CACHE_CA; +} + nv50_ir::DataType Instruction::inferSrcType() const { switch (getOpcode()) { @@ -1210,6 +1226,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) case TGSI_FILE_IMMEDIATE: case TGSI_FILE_PREDICATE: case TGSI_FILE_SAMPLER: + case TGSI_FILE_BUFFER: break; default: ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File); @@ -1255,6 +1272,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) { if (insn.getDst(0).isIndirect(0)) indirectTempArrays.insert(insn.getDst(0).getArrayId()); + } else + if (insn.getDst(0).getFile() == TGSI_FILE_BUFFER) { + info->io.globalAccess |= 0x2; } } @@ -1264,13 +1284,10 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) if (src.isIndirect(0)) indirectTempArrays.insert(src.getArrayId()); } else -/* - if (src.getFile() == TGSI_FILE_RESOURCE) { - if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL) - info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? + if (src.getFile() == TGSI_FILE_BUFFER) { + info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? 0x1 : 0x2; } else -*/ if (src.getFile() == TGSI_FILE_OUTPUT) { if (src.isIndirect(0)) { // We don't know which one is accessed, just mark everything for @@ -1752,7 +1769,7 @@ Converter::acquireDst(int d, int c) int idx = dst.getIndex(0); int idx2d = dst.is2D() ? dst.getIndex(1) : 0; - if (dst.isMasked(c)/* || f == TGSI_FILE_RESOURCE*/) + if (dst.isMasked(c) || f == TGSI_FILE_BUFFER) return NULL; if (dst.isIndirect(0) || @@ -2222,6 +2239,25 @@ Converter::handleLOAD(Value *dst0[4]) int c; std::vector off, src, ldv, def; + if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) { + for (c = 0; c < 4; ++c) { + if (!dst0[c]) + continue; + + Value *off = fetchSrc(1, c); + Symbol *sym; + if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) { + off = NULL; + sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(0, info) + 4 * c); + } else { + sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c); + } + + mkLoad(TYPE_U32, dst0[c], sym, off)->cache = tgsi.getCacheMode(); + } + return; + } + getResourceCoords(off, r, 1); if (isResourceRaw(code, r)) { @@ -2298,6 +2334,28 @@ Converter::handleSTORE() int c; std::vector off, src, dummy; + if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER) { + for (c = 0; c < 4; ++c) { + if (!(tgsi.getDst(0).getMask() & (1 << c))) + continue; + + Symbol *sym; + Value *off; + if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) { + off = NULL; + sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, + tgsi.getSrc(0).getValueU32(0, info) + 4 * c); + } else { + off = fetchSrc(0, 0); + sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c); + } + + mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c)) + ->cache = tgsi.getCacheMode(); + } + return; + } + getResourceCoords(off, r, 0); src = off; const int s = src.size(); @@ -2359,6 +2417,32 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) std::vector defv; LValue *dst = getScratch(); + if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) { + for (int c = 0; c < 4; ++c) { + if (!dst0[c]) + continue; + + Instruction *insn; + Value *off = fetchSrc(1, c); + Value *sym; + if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) + sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(c, info)); + else + sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 0); + insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c)); + if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE) + insn->setIndirect(0, 0, off); + insn->subOp = subOp; + if (subOp == NV50_IR_SUBOP_ATOM_CAS) + insn->setSrc(2, fetchSrc(3, 0)); + } + for (int c = 0; c < 4; ++c) + if (dst0[c]) + dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov + return; + } + + getResourceCoords(srcv, r, 1); if (isResourceSpecial(r)) { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index dc1ab769b98..638cef921b6 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1022,11 +1022,11 @@ NVC0LoweringPass::handleTXLQ(TexInstruction *i) return true; } - bool NVC0LoweringPass::handleATOM(Instruction *atom) { SVSemantic sv; + Value *ptr = atom->getIndirect(0, 0), *base; switch (atom->src(0).getFile()) { case FILE_MEMORY_LOCAL: @@ -1037,11 +1037,16 @@ NVC0LoweringPass::handleATOM(Instruction *atom) break; default: assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); + base = loadResInfo64(NULL, atom->getSrc(0)->reg.fileIndex * 16); + assert(base->reg.size == 8); + if (ptr) + base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); + assert(base->reg.size == 8); + atom->setIndirect(0, 0, base); return true; } - Value *base = + base = bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(), bld.mkSysVal(sv, 0)); - Value *ptr = atom->getIndirect(0, 0); atom->setSrc(0, cloneShallow(func, atom->getSrc(0))); atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; @@ -1093,6 +1098,26 @@ NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off) mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); } +inline Value * +NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off) +{ + uint8_t b = prog->driver->io.resInfoCBSlot; + off += prog->driver->io.suInfoBase; + + return bld. + mkLoadv(TYPE_U64, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off), ptr); +} + +inline Value * +NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off) +{ + uint8_t b = prog->driver->io.resInfoCBSlot; + off += prog->driver->io.suInfoBase; + + return bld. + mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off + 8), ptr); +} + inline Value * NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off) { @@ -1786,6 +1811,7 @@ NVC0LoweringPass::visit(Instruction *i) return handleRDSV(i); case OP_WRSV: return handleWRSV(i); + case OP_STORE: case OP_LOAD: if (i->src(0).getFile() == FILE_SHADER_INPUT) { if (prog->getType() == Program::TYPE_COMPUTE) { @@ -1820,6 +1846,24 @@ NVC0LoweringPass::visit(Instruction *i) } else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) { assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL); i->op = OP_VFETCH; + } else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) { + Value *ptr = loadResInfo64(NULL, i->getSrc(0)->reg.fileIndex * 16); + // XXX come up with a way not to do this for EVERY little access but + // rather to batch these up somehow. Unfortunately we've lost the + // information about the field width by the time we get here. + Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset + typeSizeof(i->sType)); + Value *length = loadResLength32(NULL, i->getSrc(0)->reg.fileIndex * 16); + Value *pred = new_LValue(func, FILE_PREDICATE); + if (i->src(0).isIndirect(0)) { + bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0)); + bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, i->getIndirect(0, 0)); + } + i->setIndirect(0, 0, ptr); + bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length); + i->setPredicate(CC_NOT_P, pred); + if (i->defExists(0)) { + bld.mkMov(i->getDef(0), bld.mkImm(0)); + } } break; case OP_ATOM: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index adb400a559a..874b81e82b8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -116,6 +116,8 @@ private: void readTessCoord(LValue *dst, int c); Value *loadResInfo32(Value *ptr, uint32_t off); + Value *loadResInfo64(Value *ptr, uint32_t off); + Value *loadResLength32(Value *ptr, uint32_t off); Value *loadMsInfo32(Value *ptr, uint32_t off); Value *loadTexHandle(Value *ptr, unsigned int slot); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index eb790d028f1..684998e817b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -3044,7 +3044,7 @@ Instruction::isResultEqual(const Instruction *that) const if (that->srcExists(s)) return false; - if (op == OP_LOAD || op == OP_VFETCH) { + if (op == OP_LOAD || op == OP_VFETCH || op == OP_ATOM) { switch (src(0).getFile()) { case FILE_MEMORY_CONST: case FILE_SHADER_INPUT: diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index c3b53621630..93f211bd5fc 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -554,6 +554,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, } info->io.resInfoCBSlot = 15; info->io.sampleInfoBase = 256 + 128; + info->io.suInfoBase = 512; info->io.msInfoCBSlot = 15; info->io.msInfoBase = 0; /* TODO */ } @@ -635,6 +636,8 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, } */ if (info->io.globalAccess) + prog->hdr[0] |= 1 << 26; + if (info->io.globalAccess & 0x2) prog->hdr[0] |= 1 << 16; if (info->io.fp64) prog->hdr[0] |= 1 << 27; -- 2.30.2