nvc0/ir: simplify predicate logic for GK104 atomic operations

[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_lowering_nvc0.cpp
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp

index 18955eb2e183250b3d7297e55a111af51dee197f..68f2b1544cba8c4a15451e6637ca5ec4a2011eb8 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -598,7 +598,6 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
  NVC0LoweringPass::NVC0LoweringPass(Program *prog) : targ(prog->getTarget())
  {
     bld.setProgram(prog);
-   gMemBase = NULL;
  }
  
  bool
@@ -751,6 +750,16 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
           i->tex.rIndirectSrc = 0;
           i->tex.sIndirectSrc = -1;
        }
+      // Move the indirect reference to right after the coords
+      else if (i->tex.rIndirectSrc >= 0 && chipset >= NVISA_GM107_CHIPSET) {
+         Value *hnd = i->getIndirectR();
+
+         i->setIndirectR(NULL);
+         i->moveSources(arg, 1);
+         i->setSrc(arg, hnd);
+         i->tex.rIndirectSrc = 0;
+         i->tex.sIndirectSrc = -1;
+      }
     } else
     // (nvc0) generate and move the tsc/tic/array source to the front
     if (i->tex.target.isArray() || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
@@ -824,7 +833,7 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
           for (n = 0; n < i->tex.useOffsets; n++) {
              for (c = 0; c < 2; ++c) {
                 if ((n % 2) == 0 && c == 0)
-                  offs[n / 2] = i->offset[n][c].get();
+                  bld.mkMov(offs[n / 2] = bld.getScratch(), i->offset[n][c].get());
                 else
                    bld.mkOp3(OP_INSBF, TYPE_U32,
                              offs[n / 2],
@@ -1940,6 +1949,10 @@ NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su)
           bld.mkCvt(OP_CVT, TYPE_F32, typedDst[i], TYPE_F16, typedDst[i]);
        }
     }
+
+   if (format->bgra) {
+      std::swap(typedDst[0], typedDst[2]);
+   }
  }
  
  void
@@ -1951,23 +1964,14 @@ NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
        convertSurfaceFormat(su);
  
     if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
-      Value *pred = su->getSrc(2);
-      CondCode cc = CC_NOT_P;
-      if (su->getPredicate()) {
-         pred = bld.getScratch(1, FILE_PREDICATE);
-         cc = su->cc;
-         if (cc == CC_NOT_P) {
-            bld.mkOp2(OP_OR, TYPE_U8, pred, su->getPredicate(), su->getSrc(2));
-         } else {
-            bld.mkOp2(OP_AND, TYPE_U8, pred, su->getPredicate(), su->getSrc(2));
-            pred->getInsn()->src(1).mod = Modifier(NV50_IR_MOD_NOT);
-         }
-      }
+      assert(su->getPredicate());
+      Value *pred =
+         bld.mkOp2v(OP_OR, TYPE_U8, bld.getScratch(1, FILE_PREDICATE),
+                    su->getPredicate(), su->getSrc(2));
+
        Instruction *red = bld.mkOp(OP_ATOM, su->dType, bld.getSSA());
        red->subOp = su->subOp;
-      if (!gMemBase)
-         gMemBase = bld.mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, 0);
-      red->setSrc(0, gMemBase);
+      red->setSrc(0, bld.mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, 0));
        red->setSrc(1, su->getSrc(3));
        if (su->subOp == NV50_IR_SUBOP_ATOM_CAS)
           red->setSrc(2, su->getSrc(4));
@@ -1977,8 +1981,8 @@ NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
        // performed
        Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
  
-      assert(cc == CC_NOT_P);
-      red->setPredicate(cc, pred);
+      assert(su->cc == CC_NOT_P);
+      red->setPredicate(su->cc, pred);
        mov->setPredicate(CC_P, pred);
  
        bld.mkOp2(OP_UNION, TYPE_U32, su->getDef(0),
@@ -2108,6 +2112,78 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su)
     }
  }
  
+void
+NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
+{
+   const int slot = su->tex.r;
+   const int dim = su->tex.target.getDim();
+   const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
+   Value *ind = su->getIndirectR();
+   int pos = 0;
+
+   bld.setPosition(su, false);
+
+   // add texture handle
+   switch (su->op) {
+   case OP_SUSTP:
+      pos = 4;
+      break;
+   case OP_SUREDP:
+      pos = (su->subOp == NV50_IR_SUBOP_ATOM_CAS) ? 2 : 1;
+      break;
+   default:
+      assert(pos == 0);
+      break;
+   }
+   su->setSrc(arg + pos, loadTexHandle(ind, slot + 32));
+
+   // prevent read fault when the image is not actually bound
+   CmpInstruction *pred =
+      bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
+                TYPE_U32, bld.mkImm(0),
+                loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR));
+   if (su->op != OP_SUSTP && su->tex.format) {
+      const TexInstruction::ImgFormatDesc *format = su->tex.format;
+      int blockwidth = format->bits[0] + format->bits[1] +
+                       format->bits[2] + format->bits[3];
+
+      assert(format->components != 0);
+      // make sure that the format doesn't mismatch when it's not FMT_NONE
+      bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
+                TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
+                loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE),
+                pred->getDef(0));
+   }
+   su->setPredicate(CC_NOT_P, pred->getDef(0));
+}
+
+void
+NVC0LoweringPass::handleSurfaceOpGM107(TexInstruction *su)
+{
+   processSurfaceCoordsGM107(su);
+
+   if (su->op == OP_SULDP)
+      convertSurfaceFormat(su);
+
+   if (su->op == OP_SUREDP) {
+      Value *def = su->getDef(0);
+
+      su->op = OP_SUREDB;
+      su->setDef(0, bld.getSSA());
+
+      bld.setPosition(su, true);
+
+      // make sure to initialize dst value when the atomic operation is not
+      // performed
+      Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
+
+      assert(su->cc == CC_NOT_P);
+      mov->setPredicate(CC_P, su->getPredicate());
+
+      bld.mkOp2(OP_UNION, TYPE_U32, def, su->getDef(0), mov->getDef(0));
+   }
+}
+
  bool
  NVC0LoweringPass::handleWRSV(Instruction *i)
  {
@@ -2600,7 +2676,9 @@ NVC0LoweringPass::visit(Instruction *i)
     case OP_SUSTP:
     case OP_SUREDB:
     case OP_SUREDP:
-      if (targ->getChipset() >= NVISA_GK104_CHIPSET)
+      if (targ->getChipset() >= NVISA_GM107_CHIPSET)
+         handleSurfaceOpGM107(i->asTex());
+      else if (targ->getChipset() >= NVISA_GK104_CHIPSET)
           handleSurfaceOpNVE4(i->asTex());
        else
           handleSurfaceOpNVC0(i->asTex());
@@ -2617,13 +2695,30 @@ NVC0LoweringPass::visit(Instruction *i)
  
     /* Kepler+ has a special opcode to compute a new base address to be used
      * for indirect loads.
+    *
+    * Maxwell+ has an additional similar requirement for indirect
+    * interpolation ops in frag shaders.
      */
-   if (targ->getChipset() >= NVISA_GK104_CHIPSET && !i->perPatch &&
-       (i->op == OP_VFETCH || i->op == OP_EXPORT) && i->src(0).isIndirect(0)) {
+   bool doAfetch = false;
+   if (targ->getChipset() >= NVISA_GK104_CHIPSET &&
+       !i->perPatch &&
+       (i->op == OP_VFETCH || i->op == OP_EXPORT) &&
+       i->src(0).isIndirect(0)) {
+      doAfetch = true;
+   }
+   if (targ->getChipset() >= NVISA_GM107_CHIPSET &&
+       (i->op == OP_LINTERP || i->op == OP_PINTERP) &&
+       i->src(0).isIndirect(0)) {
+      doAfetch = true;
+   }
+
+   if (doAfetch) {
+      Value *addr = cloneShallow(func, i->getSrc(0));
        Instruction *afetch = bld.mkOp1(OP_AFETCH, TYPE_U32, bld.getSSA(),
-                                      cloneShallow(func, i->getSrc(0)));
+                                      i->getSrc(0));
        afetch->setIndirect(0, 0, i->getIndirect(0, 0));
-      i->src(0).get()->reg.data.offset = 0;
+      addr->reg.data.offset = 0;
+      i->setSrc(0, addr);
        i->setIndirect(0, 0, afetch->getDef(0));
     }