nv50/ir: add atomics support on shared memory for Fermi
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Tue, 26 Jan 2016 00:32:23 +0000 (01:32 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Sun, 21 Feb 2016 09:42:32 +0000 (10:42 +0100)
Changes from v3:
 - move the previous OP_SELP change to the previous commit

Changes from v2:
 - make sure the op is OP_SELP when emitting the predicate and add one
   assert
 - use bld.getSSA() for mkOp2()
 - add cross edge between tryLockAndSetBB and joinBB

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Acked-by: Ilia Mirkin <imirkin@alum.mit.edu>
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h

index e7cb54bc426ec7e78e3a237766cba4cf5f072e8f..21a6f1eebf6f11d32eb4c670ae284f5d4dc14023 100644 (file)
@@ -1033,6 +1033,100 @@ NVC0LoweringPass::handleSUQ(Instruction *suq)
    return true;
 }
 
+void
+NVC0LoweringPass::handleSharedATOM(Instruction *atom)
+{
+   assert(atom->src(0).getFile() == FILE_MEMORY_SHARED);
+
+   BasicBlock *currBB = atom->bb;
+   BasicBlock *tryLockAndSetBB = atom->bb->splitBefore(atom, false);
+   BasicBlock *joinBB = atom->bb->splitAfter(atom);
+
+   bld.setPosition(currBB, true);
+   assert(!currBB->joinAt);
+   currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL);
+
+   bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_ALWAYS, NULL);
+   currBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::TREE);
+
+   bld.setPosition(tryLockAndSetBB, true);
+
+   Instruction *ld =
+      bld.mkLoad(TYPE_U32, atom->getDef(0),
+                 bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0), NULL);
+   ld->setDef(1, bld.getSSA(1, FILE_PREDICATE));
+   ld->subOp = NV50_IR_SUBOP_LOAD_LOCKED;
+
+   Value *stVal;
+   if (atom->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
+      // Read the old value, and write the new one.
+      stVal = atom->getSrc(1);
+   } else if (atom->subOp == NV50_IR_SUBOP_ATOM_CAS) {
+      CmpInstruction *set =
+         bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
+                   TYPE_U32, ld->getDef(0), atom->getSrc(1));
+      set->setPredicate(CC_P, ld->getDef(1));
+
+      CmpInstruction *selp =
+         bld.mkCmp(OP_SELP, CC_NOT_P, TYPE_U32, bld.getSSA(4, FILE_ADDRESS),
+                   TYPE_U32, ld->getDef(0), atom->getSrc(2),
+                   set->getDef(0));
+      selp->setPredicate(CC_P, ld->getDef(1));
+
+      stVal = selp->getDef(0);
+   } else {
+      operation op;
+
+      switch (atom->subOp) {
+      case NV50_IR_SUBOP_ATOM_ADD:
+         op = OP_ADD;
+         break;
+      case NV50_IR_SUBOP_ATOM_AND:
+         op = OP_AND;
+         break;
+      case NV50_IR_SUBOP_ATOM_OR:
+         op = OP_OR;
+         break;
+      case NV50_IR_SUBOP_ATOM_XOR:
+         op = OP_XOR;
+         break;
+      case NV50_IR_SUBOP_ATOM_MIN:
+         op = OP_MIN;
+         break;
+      case NV50_IR_SUBOP_ATOM_MAX:
+         op = OP_MAX;
+         break;
+      default:
+         assert(0);
+      }
+
+      Instruction *i =
+         bld.mkOp2(op, atom->dType, bld.getSSA(), ld->getDef(0),
+                   atom->getSrc(1));
+      i->setPredicate(CC_P, ld->getDef(1));
+
+      stVal = i->getDef(0);
+   }
+
+   Instruction *st =
+      bld.mkStore(OP_STORE, TYPE_U32,
+                  bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0),
+                  NULL, stVal);
+   st->setPredicate(CC_P, ld->getDef(1));
+   st->subOp = NV50_IR_SUBOP_STORE_UNLOCKED;
+
+   // Loop until the lock is acquired.
+   bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_NOT_P, ld->getDef(1));
+   tryLockAndSetBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::BACK);
+   tryLockAndSetBB->cfg.attach(&joinBB->cfg, Graph::Edge::CROSS);
+   bld.mkFlow(OP_BRA, joinBB, CC_ALWAYS, NULL);
+
+   bld.remove(atom);
+
+   bld.setPosition(joinBB, false);
+   bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
+}
+
 bool
 NVC0LoweringPass::handleATOM(Instruction *atom)
 {
@@ -1044,8 +1138,8 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
       sv = SV_LBASE;
       break;
    case FILE_MEMORY_SHARED:
-      sv = SV_SBASE;
-      break;
+      handleSharedATOM(atom);
+      return true;
    default:
       assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
       base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
@@ -1072,6 +1166,11 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
 bool
 NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
 {
+   if (cas->src(0).getFile() == FILE_MEMORY_SHARED) {
+      // ATOM_CAS and ATOM_EXCH are handled in handleSharedATOM().
+      return false;
+   }
+
    if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS &&
        cas->subOp != NV50_IR_SUBOP_ATOM_EXCH)
       return false;
index 09ec7e69ddcc97f5ca431f251ebdb4daf3d16978..6eb8aff303605f151556744f041ba227e9b80956 100644 (file)
@@ -105,6 +105,7 @@ protected:
    bool handleATOM(Instruction *);
    bool handleCasExch(Instruction *, bool needCctl);
    void handleSurfaceOpNVE4(TexInstruction *);
+   void handleSharedATOM(Instruction *);
 
    void checkPredicate(Instruction *);