From 7b9a77b905bda3003dc57efb99879499ebc4ba41 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Mon, 18 Jan 2016 23:34:01 -0500 Subject: [PATCH] nv50/ir: add support for indirect buffer loading Signed-off-by: Ilia Mirkin --- .../nouveau/codegen/nv50_ir_from_tgsi.cpp | 19 +++++++++++----- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 22 ++++++++++++++----- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index f5448293e54..735e2891cf2 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -2253,7 +2253,10 @@ Converter::handleLOAD(Value *dst0[4]) sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c); } - mkLoad(TYPE_U32, dst0[c], sym, off)->cache = tgsi.getCacheMode(); + Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off); + ld->cache = tgsi.getCacheMode(); + if (tgsi.getSrc(0).isIndirect(0)) + ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0)); } return; } @@ -2350,8 +2353,10 @@ Converter::handleSTORE() sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c); } - mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c)) - ->cache = tgsi.getCacheMode(); + Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c)); + st->cache = tgsi.getCacheMode(); + if (tgsi.getDst(0).isIndirect(0)) + st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0)); } return; } @@ -2432,6 +2437,8 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c)); if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE) insn->setIndirect(0, 0, off); + if (tgsi.getSrc(0).isIndirect(0)) + insn->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0)); insn->subOp = subOp; if (subOp == NV50_IR_SUBOP_ATOM_CAS) insn->setSrc(2, fetchSrc(3, 0)); @@ -3200,8 +3207,10 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode())); break; case TGSI_OPCODE_RESQ: - mkOp1(OP_SUQ, TYPE_U32, dst0[0], - makeSym(TGSI_FILE_BUFFER, tgsi.getSrc(0).getIndex(0), -1, 0, 0)); + geni = mkOp1(OP_SUQ, TYPE_U32, dst0[0], + makeSym(TGSI_FILE_BUFFER, tgsi.getSrc(0).getIndex(0), -1, 0, 0)); + if (tgsi.getSrc(0).isIndirect(0)) + geni->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0)); break; case TGSI_OPCODE_IBFE: case TGSI_OPCODE_UBFE: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 7f65b13b55f..0a77dce85c2 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1026,7 +1026,10 @@ bool NVC0LoweringPass::handleSUQ(Instruction *suq) { suq->op = OP_MOV; - suq->setSrc(0, loadResLength32(NULL, suq->getSrc(0)->reg.fileIndex * 16)); + suq->setSrc(0, loadResLength32(suq->getIndirect(0, 1), + suq->getSrc(0)->reg.fileIndex * 16)); + suq->setIndirect(0, 0, NULL); + suq->setIndirect(0, 1, NULL); return true; } @@ -1034,7 +1037,7 @@ bool NVC0LoweringPass::handleATOM(Instruction *atom) { SVSemantic sv; - Value *ptr = atom->getIndirect(0, 0), *base; + Value *ptr = atom->getIndirect(0, 0), *ind = atom->getIndirect(0, 1), *base; switch (atom->src(0).getFile()) { case FILE_MEMORY_LOCAL: @@ -1045,7 +1048,7 @@ NVC0LoweringPass::handleATOM(Instruction *atom) break; default: assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); - base = loadResInfo64(NULL, atom->getSrc(0)->reg.fileIndex * 16); + base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); assert(base->reg.size == 8); if (ptr) base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); @@ -1060,6 +1063,7 @@ NVC0LoweringPass::handleATOM(Instruction *atom) atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; if (ptr) base = bld.mkOp2v(OP_ADD, TYPE_U32, base, base, ptr); + atom->setIndirect(0, 1, NULL); atom->setIndirect(0, 0, base); return true; @@ -1112,6 +1116,9 @@ NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off) uint8_t b = prog->driver->io.resInfoCBSlot; off += prog->driver->io.suInfoBase; + if (ptr) + ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4)); + return bld. mkLoadv(TYPE_U64, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off), ptr); } @@ -1122,6 +1129,9 @@ NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off) uint8_t b = prog->driver->io.resInfoCBSlot; off += prog->driver->io.suInfoBase; + if (ptr) + ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4)); + return bld. mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off + 8), ptr); } @@ -1855,17 +1865,19 @@ NVC0LoweringPass::visit(Instruction *i) assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL); i->op = OP_VFETCH; } else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) { - Value *ptr = loadResInfo64(NULL, i->getSrc(0)->reg.fileIndex * 16); + Value *ind = i->getIndirect(0, 1); + Value *ptr = loadResInfo64(ind, i->getSrc(0)->reg.fileIndex * 16); // XXX come up with a way not to do this for EVERY little access but // rather to batch these up somehow. Unfortunately we've lost the // information about the field width by the time we get here. Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset + typeSizeof(i->sType)); - Value *length = loadResLength32(NULL, i->getSrc(0)->reg.fileIndex * 16); + Value *length = loadResLength32(ind, i->getSrc(0)->reg.fileIndex * 16); Value *pred = new_LValue(func, FILE_PREDICATE); if (i->src(0).isIndirect(0)) { bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0)); bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, i->getIndirect(0, 0)); } + i->setIndirect(0, 1, NULL); i->setIndirect(0, 0, ptr); bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length); i->setPredicate(CC_NOT_P, pred); -- 2.30.2