From 14a810e9d0dbf52e547ae6e16a68487d7cb92004 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 26 Jan 2016 01:32:23 +0100 Subject: [PATCH] nv50/ir: add atomics support on shared memory for Fermi Changes from v3: - move the previous OP_SELP change to the previous commit Changes from v2: - make sure the op is OP_SELP when emitting the predicate and add one assert - use bld.getSSA() for mkOp2() - add cross edge between tryLockAndSetBB and joinBB Signed-off-by: Samuel Pitoiset Acked-by: Ilia Mirkin --- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 103 +++++++++++++++++- .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 + 2 files changed, 102 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index e7cb54bc426..21a6f1eebf6 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1033,6 +1033,100 @@ NVC0LoweringPass::handleSUQ(Instruction *suq) return true; } +void +NVC0LoweringPass::handleSharedATOM(Instruction *atom) +{ + assert(atom->src(0).getFile() == FILE_MEMORY_SHARED); + + BasicBlock *currBB = atom->bb; + BasicBlock *tryLockAndSetBB = atom->bb->splitBefore(atom, false); + BasicBlock *joinBB = atom->bb->splitAfter(atom); + + bld.setPosition(currBB, true); + assert(!currBB->joinAt); + currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL); + + bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_ALWAYS, NULL); + currBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::TREE); + + bld.setPosition(tryLockAndSetBB, true); + + Instruction *ld = + bld.mkLoad(TYPE_U32, atom->getDef(0), + bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0), NULL); + ld->setDef(1, bld.getSSA(1, FILE_PREDICATE)); + ld->subOp = NV50_IR_SUBOP_LOAD_LOCKED; + + Value *stVal; + if (atom->subOp == NV50_IR_SUBOP_ATOM_EXCH) { + // Read the old value, and write the new one. + stVal = atom->getSrc(1); + } else if (atom->subOp == NV50_IR_SUBOP_ATOM_CAS) { + CmpInstruction *set = + bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), + TYPE_U32, ld->getDef(0), atom->getSrc(1)); + set->setPredicate(CC_P, ld->getDef(1)); + + CmpInstruction *selp = + bld.mkCmp(OP_SELP, CC_NOT_P, TYPE_U32, bld.getSSA(4, FILE_ADDRESS), + TYPE_U32, ld->getDef(0), atom->getSrc(2), + set->getDef(0)); + selp->setPredicate(CC_P, ld->getDef(1)); + + stVal = selp->getDef(0); + } else { + operation op; + + switch (atom->subOp) { + case NV50_IR_SUBOP_ATOM_ADD: + op = OP_ADD; + break; + case NV50_IR_SUBOP_ATOM_AND: + op = OP_AND; + break; + case NV50_IR_SUBOP_ATOM_OR: + op = OP_OR; + break; + case NV50_IR_SUBOP_ATOM_XOR: + op = OP_XOR; + break; + case NV50_IR_SUBOP_ATOM_MIN: + op = OP_MIN; + break; + case NV50_IR_SUBOP_ATOM_MAX: + op = OP_MAX; + break; + default: + assert(0); + } + + Instruction *i = + bld.mkOp2(op, atom->dType, bld.getSSA(), ld->getDef(0), + atom->getSrc(1)); + i->setPredicate(CC_P, ld->getDef(1)); + + stVal = i->getDef(0); + } + + Instruction *st = + bld.mkStore(OP_STORE, TYPE_U32, + bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0), + NULL, stVal); + st->setPredicate(CC_P, ld->getDef(1)); + st->subOp = NV50_IR_SUBOP_STORE_UNLOCKED; + + // Loop until the lock is acquired. + bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_NOT_P, ld->getDef(1)); + tryLockAndSetBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::BACK); + tryLockAndSetBB->cfg.attach(&joinBB->cfg, Graph::Edge::CROSS); + bld.mkFlow(OP_BRA, joinBB, CC_ALWAYS, NULL); + + bld.remove(atom); + + bld.setPosition(joinBB, false); + bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1; +} + bool NVC0LoweringPass::handleATOM(Instruction *atom) { @@ -1044,8 +1138,8 @@ NVC0LoweringPass::handleATOM(Instruction *atom) sv = SV_LBASE; break; case FILE_MEMORY_SHARED: - sv = SV_SBASE; - break; + handleSharedATOM(atom); + return true; default: assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); @@ -1072,6 +1166,11 @@ NVC0LoweringPass::handleATOM(Instruction *atom) bool NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl) { + if (cas->src(0).getFile() == FILE_MEMORY_SHARED) { + // ATOM_CAS and ATOM_EXCH are handled in handleSharedATOM(). + return false; + } + if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS && cas->subOp != NV50_IR_SUBOP_ATOM_EXCH) return false; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 09ec7e69ddc..6eb8aff3036 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -105,6 +105,7 @@ protected: bool handleATOM(Instruction *); bool handleCasExch(Instruction *, bool needCctl); void handleSurfaceOpNVE4(TexInstruction *); + void handleSharedATOM(Instruction *); void checkPredicate(Instruction *); -- 2.30.2