From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Fri, 22 Feb 2013 23:39:23 +0000 (+0100)
Subject: nv50/ir/tgsi: handle TGSI_OPCODE_LOAD,STORE
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c2dfcd7f0eaf4eda375eb320e0e87793a80ef92d;p=mesa.git

nv50/ir/tgsi: handle TGSI_OPCODE_LOAD,STORE

Squashed and (heavily) modified original patches by Francisco Jerez:
nv50/ir/tgsi: Implement resource LOAD/STORE (wip).
nv50/ir/tgsi: Emit SUST/SULD for surface access, and add CB LOAD/STORE support
nv50/ir/tgsi: Fix/clean up the LOAD/STORE handling code.

Left out for now:
nv50/ir/tgsi: Resource indirect indexing

Treating raw, read-only surfaces as constant buffers (CBs) was removed
because CBs are limited to a size of 64 KiB which isn't desireable, and
because this decision should probably be made by the state tracker.
If we used a number of CB slots for surfaces, it might find that we
cannot accomodate the advertised limit.
---

diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h
index bdea48bbdf3..dd7ff90de96 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h
@@ -871,9 +871,9 @@ public:
    struct {
       Target target;
 
-      uint8_t r;
+      uint16_t r;
+      uint16_t s;
       int8_t rIndirectSrc;
-      uint8_t s;
       int8_t sIndirectSrc;
 
       uint8_t mask;
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
index 4448e8299d2..0af2c61b3e5 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
@@ -240,15 +240,17 @@ BuildUtil::mkCmp(operation op, CondCode cc, DataType ty, Value *dst,
    return insn;
 }
 
-Instruction *
-BuildUtil::mkTex(operation op, TexTarget targ, uint8_t tic, uint8_t tsc,
-                 Value **def, Value **src)
+TexInstruction *
+BuildUtil::mkTex(operation op, TexTarget targ,
+                 uint16_t tic, uint16_t tsc,
+                 const std::vector<Value *> &def,
+                 const std::vector<Value *> &src)
 {
    TexInstruction *tex = new_TexInstruction(func, op);
 
-   for (int d = 0; d < 4 && def[d]; ++d)
+   for (size_t d = 0; d < def.size() && def[d]; ++d)
       tex->setDef(d, def[d]);
-   for (int s = 0; s < 4 && src[s]; ++s)
+   for (size_t s = 0; s < src.size() && src[s]; ++s)
       tex->setSrc(s, src[s]);
 
    tex->setTexture(targ, tic, tsc);
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
index 963c3505083..f48dbc21168 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
@@ -75,8 +75,10 @@ public:
    CmpInstruction *mkCmp(operation, CondCode, DataType,
 			 Value *,
 			 Value *, Value *, Value * = NULL);
-   Instruction *mkTex(operation, TexTarget, uint8_t tic, uint8_t tsc,
-                      Value **def, Value **src);
+   TexInstruction *mkTex(operation, TexTarget,
+                         uint16_t tic, uint16_t tsc,
+                         const std::vector<Value *> &def,
+                         const std::vector<Value *> &src);
    Instruction *mkQuadop(uint8_t qop, Value *, uint8_t l, Value *, Value *);
 
    FlowInstruction *mkFlow(operation, void *target, CondCode, Value *pred);
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
index deee60cd6a7..933a5e106ac 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
@@ -161,6 +161,10 @@ struct nv50_ir_prog_info
          boolean separateFragData;
          boolean usesDiscard;
       } fp;
+      struct {
+         uint32_t inputOffset; /* base address for user args */
+         uint32_t sharedOffset; /* reserved space in s[] */
+      } cp;
    } prop;
 
    struct {
@@ -179,6 +183,7 @@ struct nv50_ir_prog_info
       uint8_t sampleMask;        /* output index of SampleMask */
       uint8_t backFaceColor[2];  /* input/output indices of back face colour */
       uint8_t globalAccess;      /* 1 for read, 2 for wr, 3 for rw */
+      boolean nv50styleSurfaces; /* generate gX[] access for raw buffers */
       uint8_t resInfoCBSlot;     /* cX[] used for tex handles, surface info */
       uint16_t texBindBase;      /* base address for tex handles (nve4) */
       uint16_t suInfoBase;       /* base address for surface info (nve4) */
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
index 69c05c1464c..afbabfde23d 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
@@ -559,7 +559,6 @@ static nv50_ir::operation translateOpcode(uint opcode)
    NV50_IR_OPCODE_CASE(USLT, SET);
    NV50_IR_OPCODE_CASE(USNE, SET);
 
-   NV50_IR_OPCODE_CASE(LOAD, TXF);
    NV50_IR_OPCODE_CASE(SAMPLE, TEX);
    NV50_IR_OPCODE_CASE(SAMPLE_B, TXB);
    NV50_IR_OPCODE_CASE(SAMPLE_C, TEX);
@@ -620,8 +619,17 @@ public:
 
    int clipVertexOutput;
 
-   uint8_t *samplerViewTargets; // TGSI_TEXTURE_*
-   unsigned samplerViewCount;
+   struct TextureView {
+      uint8_t target; // TGSI_TEXTURE_*
+   };
+   std::vector<TextureView> textureViews;
+
+   struct Resource {
+      uint8_t target; // TGSI_TEXTURE_*
+      bool raw;
+      uint8_t slot; // $surface index
+   };
+   std::vector<Resource> resources;
 
 private:
    int inferSysValDirection(unsigned sn) const;
@@ -640,8 +648,6 @@ Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
       tgsi_dump(tokens, 0);
 
-   samplerViewTargets = NULL;
-
    mainTempsInLMem = FALSE;
 }
 
@@ -654,9 +660,6 @@ Source::~Source()
       FREE(info->immd.data);
    if (info->immd.type)
       FREE(info->immd.type);
-
-   if (samplerViewTargets)
-      delete[] samplerViewTargets;
 }
 
 bool Source::scanSource()
@@ -673,8 +676,8 @@ bool Source::scanSource()
 
    clipVertexOutput = -1;
 
-   samplerViewCount = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
-   samplerViewTargets = new uint8_t[samplerViewCount];
+   textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
+   resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
 
    info->immd.bufSize = 0;
    tempArrayCount = 0;
@@ -899,9 +902,16 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
          info->sv[i].input = inferSysValDirection(sn);
       }
       break;
+   case TGSI_FILE_RESOURCE:
+      for (i = first; i <= last; ++i) {
+         resources[i].target = decl->Resource.Resource;
+         resources[i].raw = decl->Resource.Raw;
+         resources[i].slot = i;
+      }
+      break;
    case TGSI_FILE_SAMPLER_VIEW:
       for (i = first; i <= last; ++i)
-         samplerViewTargets[i] = decl->SamplerView.Resource;
+         textureViews[i].target = decl->SamplerView.Resource;
       break;
    case TGSI_FILE_IMMEDIATE_ARRAY:
    {
@@ -997,9 +1007,15 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
 
    for (unsigned s = 0; s < insn.srcCount(); ++s) {
       Instruction::SrcRegister src = insn.getSrc(s);
-      if (src.getFile() == TGSI_FILE_TEMPORARY)
+      if (src.getFile() == TGSI_FILE_TEMPORARY) {
          if (src.isIndirect(0))
             mainTempsInLMem = TRUE;
+      } else
+      if (src.getFile() == TGSI_FILE_RESOURCE) {
+         if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
+            info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
+               0x1 : 0x2;
+      }
       if (src.getFile() != TGSI_FILE_INPUT)
          continue;
       unsigned mask = insn.srcMask(s);
@@ -1025,13 +1041,16 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
 nv50_ir::TexInstruction::Target
 Instruction::getTexture(const tgsi::Source *code, int s) const
 {
+   // XXX: indirect access
+   unsigned int r;
+
    switch (getSrc(s).getFile()) {
-   case TGSI_FILE_SAMPLER_VIEW: {
-      // XXX: indirect access
-      unsigned int r = getSrc(s).getIndex(0);
-      assert(r < code->samplerViewCount);
-      return translateTexture(code->samplerViewTargets[r]);
-   }
+   case TGSI_FILE_RESOURCE:
+      r = getSrc(s).getIndex(0);
+      return translateTexture(code->resources.at(r).target);
+   case TGSI_FILE_SAMPLER_VIEW:
+      r = getSrc(s).getIndex(0);
+      return translateTexture(code->textureViews.at(r).target);
    default:
       return translateTexture(insn->Texture.Texture);
    }
@@ -1091,6 +1110,12 @@ private:
    void handleLIT(Value *dst0[4]);
    void handleUserClipPlanes();
 
+   Symbol *getResourceBase(int r);
+   void getResourceCoords(std::vector<Value *>&, int r, int s);
+
+   void handleLOAD(Value *dst0[4]);
+   void handleSTORE();
+
    Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
 
    void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
@@ -1710,6 +1735,236 @@ Converter::handleLIT(Value *dst0[4])
    }
 }
 
+static inline bool
+isResourceSpecial(const int r)
+{
+   return (r == TGSI_RESOURCE_GLOBAL ||
+           r == TGSI_RESOURCE_LOCAL ||
+           r == TGSI_RESOURCE_PRIVATE ||
+           r == TGSI_RESOURCE_INPUT);
+}
+
+static inline bool
+isResourceRaw(const struct tgsi::Source *code, const int r)
+{
+   return isResourceSpecial(r) || code->resources[r].raw;
+}
+
+static inline nv50_ir::TexTarget
+getResourceTarget(const struct tgsi::Source *code, int r)
+{
+   if (isResourceSpecial(r))
+      return nv50_ir::TEX_TARGET_BUFFER;
+   return tgsi::translateTexture(code->resources.at(r).target);
+}
+
+Symbol *
+Converter::getResourceBase(const int r)
+{
+   Symbol *sym = NULL;
+
+   switch (r) {
+   case TGSI_RESOURCE_GLOBAL:
+      sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15);
+      break;
+   case TGSI_RESOURCE_LOCAL:
+      assert(prog->getType() == Program::TYPE_COMPUTE);
+      sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32,
+                     info->prop.cp.sharedOffset);
+      break;
+   case TGSI_RESOURCE_PRIVATE:
+      sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32,
+                     info->bin.tlsSpace);
+      break;
+   case TGSI_RESOURCE_INPUT:
+      assert(prog->getType() == Program::TYPE_COMPUTE);
+      sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32,
+                     info->prop.cp.inputOffset);
+      break;
+   default:
+      sym = new_Symbol(prog,
+                       nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot);
+      break;
+   }
+   return sym;
+}
+
+void
+Converter::getResourceCoords(std::vector<Value *> &coords, int r, int s)
+{
+   const int arg =
+      TexInstruction::Target(getResourceTarget(code, r)).getArgCount();
+
+   for (int c = 0; c < arg; ++c)
+      coords.push_back(fetchSrc(s, c));
+
+   // NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk
+   if (r == TGSI_RESOURCE_LOCAL ||
+       r == TGSI_RESOURCE_PRIVATE ||
+       r == TGSI_RESOURCE_INPUT)
+      coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS),
+                         coords[0]);
+}
+
+static inline int
+partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask)
+{
+   int n = 0;
+
+   while (mask) {
+      if (mask & 1) {
+         size[n]++;
+      } else {
+         if (size[n])
+            comp[n = 1] = size[0] + 1;
+         else
+            comp[n]++;
+      }
+      mask >>= 1;
+   }
+   if (size[0] == 3) {
+      n = 1;
+      size[0] = (comp[0] == 1) ? 1 : 2;
+      size[1] = 3 - size[0];
+      comp[1] = comp[0] + size[0];
+   }
+   return n + 1;
+}
+
+// For raw loads, granularity is 4 byte.
+// Usage of the texture read mask on OP_SULDP is not allowed.
+void
+Converter::handleLOAD(Value *dst0[4])
+{
+   const int r = tgsi.getSrc(0).getIndex(0);
+   int c;
+   std::vector<Value *> off, src, ldv, def;
+
+   getResourceCoords(off, r, 1);
+
+   if (isResourceRaw(code, r)) {
+      uint8_t mask = 0;
+      uint8_t comp[2] = { 0, 0 };
+      uint8_t size[2] = { 0, 0 };
+
+      Symbol *base = getResourceBase(r);
+
+      // determine the base and size of the at most 2 load ops
+      for (c = 0; c < 4; ++c)
+         if (!tgsi.getDst(0).isMasked(c))
+            mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X);
+
+      int n = partitionLoadStore(comp, size, mask);
+
+      src = off;
+
+      def.resize(4); // index by component, the ones we need will be non-NULL
+      for (c = 0; c < 4; ++c) {
+         if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c))
+            def[c] = dst0[c];
+         else
+         if (mask & (1 << c))
+            def[c] = getScratch();
+      }
+
+      const bool useLd = isResourceSpecial(r) ||
+         (info->io.nv50styleSurfaces &&
+          code->resources[r].target == TGSI_TEXTURE_BUFFER);
+
+      for (int i = 0; i < n; ++i) {
+         ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]);
+
+         if (comp[i]) // adjust x component of source address if necessary
+            src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
+                            off[0], mkImm(comp[i] * 4));
+         else
+            src[0] = off[0];
+
+         if (useLd) {
+            Instruction *ld =
+               mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]);
+            for (size_t c = 1; c < ldv.size(); ++c)
+               ld->setDef(c, ldv[c]);
+         } else {
+            mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot,
+                  0, ldv, src)->dType = typeOfSize(size[i] * 4);
+         }
+      }
+   } else {
+      def.resize(4);
+      for (c = 0; c < 4; ++c) {
+         if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))
+            def[c] = getScratch();
+         else
+            def[c] = dst0[c];
+      }
+
+      mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0,
+            def, off);
+   }
+   FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+      if (dst0[c] != def[c])
+         mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
+}
+
+// For formatted stores, the write mask on OP_SUSTP can be used.
+// Raw stores have to be split.
+void
+Converter::handleSTORE()
+{
+   const int r = tgsi.getDst(0).getIndex(0);
+   int c;
+   std::vector<Value *> off, src, dummy;
+
+   getResourceCoords(off, r, 0);
+   src = off;
+   const int s = src.size();
+
+   if (isResourceRaw(code, r)) {
+      uint8_t comp[2] = { 0, 0 };
+      uint8_t size[2] = { 0, 0 };
+
+      int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask());
+
+      Symbol *base = getResourceBase(r);
+
+      const bool useSt = isResourceSpecial(r) ||
+         (info->io.nv50styleSurfaces &&
+          code->resources[r].target == TGSI_TEXTURE_BUFFER);
+
+      for (int i = 0; i < n; ++i) {
+         if (comp[i]) // adjust x component of source address if necessary
+            src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
+                            off[0], mkImm(comp[i] * 4));
+         else
+            src[0] = off[0];
+
+         const DataType stTy = typeOfSize(size[i] * 4);
+
+         if (useSt) {
+            Instruction *st =
+               mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i]));
+            for (c = 1; c < size[i]; ++c)
+               st->setSrc(1 + c, fetchSrc(1, comp[i] + c));
+            st->setIndirect(0, 0, src[0]);
+         } else {
+            // attach values to be stored
+            src.resize(s + size[i]);
+            for (c = 0; c < size[i]; ++c)
+               src[s + c] = fetchSrc(1, comp[i] + c);
+            mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot,
+                  0, dummy, src)->setType(stTy);
+         }
+      }
+   } else {
+      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+         src.push_back(fetchSrc(1, c));
+
+      mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0,
+            dummy, src)->tex.mask = tgsi.getDst(0).getMask();
+   }
+}
+
 Converter::Subroutine *
 Converter::getSubroutine(unsigned ip)
 {
@@ -2072,7 +2327,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
       handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40);
       break;
    case TGSI_OPCODE_TXF:
-   case TGSI_OPCODE_LOAD:
       handleTXF(dst0, 1);
       break;
    case TGSI_OPCODE_TXQ:
@@ -2257,6 +2511,12 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
       ERROR("switch/case opcode encountered, should have been lowered\n");
       abort();
       break;
+   case TGSI_OPCODE_LOAD:
+      handleLOAD(dst0);
+      break;
+   case TGSI_OPCODE_STORE:
+      handleSTORE();
+      break;
    default:
       ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
       assert(0);
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
index db1306151ea..83f7201fc35 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
@@ -594,11 +594,13 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i)
       i->setSrc(arg - 1, src);
 
       if (i->tex.target.isCube()) {
-         Value *acube[4], *a2d[4];
+         std::vector<Value *> acube, a2d;
          int c;
 
+         acube.resize(4);
          for (c = 0; c < 4; ++c)
             acube[c] = i->getSrc(c);
+         a2d.resize(4);
          for (c = 0; c < 3; ++c)
             a2d[c] = new_LValue(func, FILE_GPR);
          a2d[3] = NULL;
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
index 00a80544c17..a2b61104f6f 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
@@ -532,7 +532,9 @@ void Instruction::print() const
       if (perPatch)
          PRINT("patch ");
       if (asTex())
-         PRINT("%s ", asTex()->tex.target.getName());
+         PRINT("%s %s$r%u $s%u %s", asTex()->tex.target.getName(),
+               colour[TXT_MEM], asTex()->tex.r, asTex()->tex.s,
+               colour[TXT_INSN]);
       if (postFactor)
          PRINT("x2^%i ", postFactor);
       PRINT("%s%s", dnz ? "dnz " : (ftz ? "ftz " : ""),  DataTypeStr[dType]);