swr/rast: Change gfx pointers to gfxptr_t

author George Kyriazis <george.kyriazis@intel.com>

Thu, 5 Apr 2018 17:08:15 +0000 (12:08 -0500)

committer George Kyriazis <george.kyriazis@intel.com>

Wed, 18 Apr 2018 15:51:38 +0000 (10:51 -0500)
author George Kyriazis <george.kyriazis@intel.com>
Thu, 5 Apr 2018 17:08:15 +0000 (12:08 -0500)
committer George Kyriazis <george.kyriazis@intel.com>
Wed, 18 Apr 2018 15:51:38 +0000 (10:51 -0500)
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py

index bdd785a155d52e9674db7762279e982a7ecc537e..2636e60ae9aa91902c175f9da121da15694ae3fa 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -162,7 +162,9 @@ def parse_ir_builder(input_file):
                      if (func_name == 'CreateInsertNUWNSWBinOp' or
                          func_name == 'CreateMaskedIntrinsic' or
                          func_name == 'CreateAlignmentAssumptionHelper' or
-                        func_name == 'CreateLoad'):
+                        func_name == 'CreateGEP' or
+                        func_name == 'CreateLoad' or
+                        func_name == 'CreateMaskedLoad'):
                          ignore = True
  
                      # Convert CamelCase to CAMEL_CASE
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp

index 53bd2d28555c4fca7eda3b84f10c6f44f9ec1fa9..3141db69ef15edae63f9575d8ffd98337037bb54 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -1321,8 +1321,8 @@ void DrawIndexedInstance(
      }
  
      int draw = 0;
-    uint8_t *pIB = (uint8_t*)pState->indexBuffer.pIndices;
-    pIB += (uint64_t)indexOffset * (uint64_t)indexSize;
+    gfxptr_t xpIB = pState->indexBuffer.xpIndices;
+    xpIB += (uint64_t)indexOffset * (uint64_t)indexSize;
  
      pState->topology = topology;
      pState->forceFront = false;
@@ -1360,7 +1360,7 @@ void DrawIndexedInstance(
              pDC->pState->pfnProcessPrims != nullptr);
          pDC->FeWork.desc.draw.pDC = pDC;
          pDC->FeWork.desc.draw.numIndices = numIndicesForDraw;
-        pDC->FeWork.desc.draw.pIB = (int*)pIB;
+        pDC->FeWork.desc.draw.xpIB = xpIB;
          pDC->FeWork.desc.draw.type = pDC->pState->state.indexBuffer.format;
  
          pDC->FeWork.desc.draw.numInstances = numInstances;
@@ -1376,7 +1376,7 @@ void DrawIndexedInstance(
          AR_API_EVENT(DrawIndexedInstancedEvent(pDC->drawId, topology, numIndicesForDraw, indexOffset, baseVertex,
              numInstances, startInstance, pState->tsState.tsEnable, pState->gsState.gsEnable, pState->soState.soEnable, pState->gsState.outputTopology, draw));
  
-        pIB += maxIndicesPerDraw * indexSize;
+        xpIB += maxIndicesPerDraw * indexSize;
          remainingIndices -= numIndicesForDraw;
          draw++;
      }
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h

index 489aa7862cbf3dea610bfe6c381f0d41e3cfb704..7bc69f507236e8ba69431c4a921e174041372ac1 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -176,7 +176,7 @@ struct DRAW_WORK
      };
      union
      {
-        const int32_t* pIB;        // DrawIndexed: App supplied indices
+        gfxptr_t   xpIB;              // DrawIndexed: App supplied int32 indices 
          uint32_t   startVertex;    // Draw: Starting vertex in VB to render from.
      };
      int32_t    baseVertex;
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp

index 207685991202bd03c4e014aabe31f28f99ffe4c0..30c2e7bab517ab599868db2eb34059bcc093417a 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -1527,28 +1527,24 @@ void ProcessDraw(
      uint32_t indexSize = 0;
      uint32_t endVertex = work.numVerts;
  
-    const int32_t* pLastRequestedIndex = nullptr;
+    gfxptr_t xpLastRequestedIndex = 0;
      if (IsIndexedT::value)
      {
          switch (work.type)
          {
          case R32_UINT:
              indexSize = sizeof(uint32_t);
-            pLastRequestedIndex = &(work.pIB[endVertex]);
              break;
          case R16_UINT:
              indexSize = sizeof(uint16_t);
-            // nasty address offset to last index
-            pLastRequestedIndex = (int32_t*)(&(((uint16_t*)work.pIB)[endVertex]));
              break;
          case R8_UINT:
              indexSize = sizeof(uint8_t);
-            // nasty address offset to last index
-            pLastRequestedIndex = (int32_t*)(&(((uint8_t*)work.pIB)[endVertex]));
              break;
          default:
              SWR_INVALID("Invalid work.type: %d", work.type);
          }
+        xpLastRequestedIndex = work.xpIB + endVertex * indexSize;
      }
      else
      {
@@ -1660,10 +1656,10 @@ void ProcessDraw(
  
          // if the entire index buffer isn't being consumed, set the last index
          // so that fetches < a SIMD wide will be masked off
-        fetchInfo_lo.pLastIndex = (const int32_t*)(((uint8_t*)state.indexBuffer.pIndices) + state.indexBuffer.size);
-        if (pLastRequestedIndex < fetchInfo_lo.pLastIndex)
+        fetchInfo_lo.xpLastIndex = state.indexBuffer.xpIndices + state.indexBuffer.size;
+        if (xpLastRequestedIndex < fetchInfo_lo.xpLastIndex)
          {
-            fetchInfo_lo.pLastIndex = pLastRequestedIndex;
+            fetchInfo_lo.xpLastIndex = xpLastRequestedIndex;
          }
      }
      else
@@ -1683,15 +1679,15 @@ void ProcessDraw(
  
          if (IsIndexedT::value)
          {
-            fetchInfo_lo.pIndices = work.pIB;
-            fetchInfo_hi.pIndices = (int32_t *)((uint8_t *)fetchInfo_lo.pIndices + KNOB_SIMD_WIDTH * indexSize);    // 1/2 of KNOB_SIMD16_WIDTH
+            fetchInfo_lo.xpIndices = work.xpIB;
+            fetchInfo_hi.xpIndices = fetchInfo_lo.xpIndices + KNOB_SIMD_WIDTH * indexSize;    // 1/2 of KNOB_SIMD16_WIDTH
          }
          else
          {
              vIndex = _simd16_add_epi32(_simd16_set1_epi32(work.startVertexID), vScale);
  
-            fetchInfo_lo.pIndices = (const int32_t *)&vIndex;
-            fetchInfo_hi.pIndices = (const int32_t *)&vIndex + KNOB_SIMD_WIDTH; // 1/2 of KNOB_SIMD16_WIDTH
+            fetchInfo_lo.xpIndices = (gfxptr_t)&vIndex;
+            fetchInfo_hi.xpIndices = (gfxptr_t)&vIndex + KNOB_SIMD_WIDTH * sizeof(int32_t); // 1/2 of KNOB_SIMD16_WIDTH
          }
  
          fetchInfo_lo.CurInstance = instanceNum;
@@ -1725,18 +1721,18 @@ void ProcessDraw(
              {
                  if (!IsIndexedT::value)
                  {
-                    fetchInfo_lo.pLastIndex = fetchInfo_lo.pIndices;
+                    fetchInfo_lo.xpLastIndex = fetchInfo_lo.xpIndices;
                      uint32_t offset;
                      offset = std::min(endVertex-i, (uint32_t) KNOB_SIMD16_WIDTH);
  #if USE_SIMD16_SHADERS
                      offset *= 4; // convert from index to address
-                    fetchInfo_lo.pLastIndex += offset;
+                    fetchInfo_lo.xpLastIndex += offset;
  #else
-                    fetchInfo_lo.pLastIndex += std::min(offset, (uint32_t) KNOB_SIMD_WIDTH) * 4; // * 4 for converting index to address
+                    fetchInfo_lo.xpLastIndex += std::min(offset, (uint32_t) KNOB_SIMD_WIDTH) * 4; // * 4 for converting index to address
                      uint32_t offset2 = std::min(offset, (uint32_t) KNOB_SIMD16_WIDTH)-KNOB_SIMD_WIDTH;
                      assert(offset >= 0);
-                    fetchInfo_hi.pLastIndex = fetchInfo_hi.pIndices;
-                    fetchInfo_hi.pLastIndex += offset2 * 4; // * 4 for converting index to address
+                    fetchInfo_hi.xpLastIndex = fetchInfo_hi.xpIndices;
+                    fetchInfo_hi.xpLastIndex += offset2 * 4; // * 4 for converting index to address
  #endif
                  }
                  // 1. Execute FS/VS for a single SIMD.
@@ -1919,8 +1915,8 @@ void ProcessDraw(
  
              if (IsIndexedT::value)
              {
-                fetchInfo_lo.pIndices = (int32_t *)((uint8_t*)fetchInfo_lo.pIndices + KNOB_SIMD16_WIDTH * indexSize);
-                fetchInfo_hi.pIndices = (int32_t *)((uint8_t*)fetchInfo_hi.pIndices + KNOB_SIMD16_WIDTH * indexSize);
+                fetchInfo_lo.xpIndices = fetchInfo_lo.xpIndices + KNOB_SIMD16_WIDTH * indexSize;
+                fetchInfo_hi.xpIndices = fetchInfo_hi.xpIndices + KNOB_SIMD16_WIDTH * indexSize;
              }
              else
              {
@@ -1948,9 +1944,9 @@ void ProcessDraw(
          // if the entire index buffer isn't being consumed, set the last index
          // so that fetches < a SIMD wide will be masked off
          fetchInfo.pLastIndex = (const int32_t*)(((uint8_t*)state.indexBuffer.pIndices) + state.indexBuffer.size);
-        if (pLastRequestedIndex < fetchInfo.pLastIndex)
+        if (xpLastRequestedIndex < fetchInfo.pLastIndex)
          {
-            fetchInfo.pLastIndex = pLastRequestedIndex;
+            fetchInfo.pLastIndex = xpLastRequestedIndex;
          }
      }
      else
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h

index 92334469ed6d06db74c6f040d748fac0e4147f19..cdb30f60fdf27f109579066bcd71331994a84848 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -582,7 +582,7 @@ struct SWR_VERTEX_BUFFER_STATE
  
  struct SWR_INDEX_BUFFER_STATE
  {
-    const void *pIndices;
+    gfxptr_t xpIndices;
      // Format type for indices (e.g. UINT16, UINT32, etc.)
      SWR_FORMAT format; // @llvm_enum
      uint32_t size;
@@ -598,8 +598,8 @@ struct SWR_INDEX_BUFFER_STATE
  struct SWR_FETCH_CONTEXT
  {
      const SWR_VERTEX_BUFFER_STATE* pStreams;    // IN: array of bound vertex buffers
-    const int32_t* pIndices;                    // IN: pointer to index buffer for indexed draws
-    const int32_t* pLastIndex;                  // IN: pointer to end of index buffer, used for bounds checking
+    gfxptr_t xpIndices;                          // IN: pointer to int32 index buffer for indexed draws
+    gfxptr_t xpLastIndex;                        // IN: pointer to end of index buffer, used for bounds checking
      uint32_t CurInstance;                       // IN: current instance
      uint32_t BaseVertex;                        // IN: base vertex
      uint32_t StartVertex;                       // IN: start vertex
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp

index 44fe776d3409336934d81217bdddb2dae23e119e..6ecd96978dd8f99fc83bafff680384a714f7afe1 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
@@ -40,6 +40,7 @@ namespace SwrJit
      BuilderGfxMem::BuilderGfxMem(JitManager* pJitMgr) :
          Builder(pJitMgr)
      {
+        mpTranslationFuncTy = nullptr;
          mpfnTranslateGfxAddress = nullptr;
          mpParamSimDC = nullptr;
  
@@ -51,8 +52,7 @@ namespace SwrJit
  
      void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage)
      {
-        SWR_ASSERT(ptr->getType() == mInt64Ty, "GFX addresses must be gfxptr_t and not converted to system pointers.");
-        SWR_ASSERT(usage != MEM_CLIENT_INTERNAL, "Internal memory should not go through the translation path and should not be gfxptr_t.");
+        SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT_INTERNAL), "Internal memory should not be gfxptr_t.");
      }
  
      //////////////////////////////////////////////////////////////////////////
@@ -106,32 +106,94 @@ namespace SwrJit
          return ADD(base, offset);
      }
      
-    LoadInst* BuilderGfxMem::LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage)
+    Value *BuilderGfxMem::GEP(Value *Ptr, Value *Idx, Type *Ty, const Twine &Name)
      {
-        // the 64 bit gfx pointers are not yet propagated up the stack 
-        // so there is some casting in here and the test for type is not yet enabled
+        Ptr = TranslationHelper(Ptr, Ty);
+        return Builder::GEP(Ptr, Idx, nullptr, Name);
+    }
+
+    Value *BuilderGfxMem::GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name)
+    {
+        Ptr = TranslationHelper(Ptr, Ty);
+        return Builder::GEP(Ty, Ptr, Idx, Name);
+    }
+
+    Value *BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<Value*> &indexList, Type *Ty)
+    {
+        Ptr = TranslationHelper(Ptr, Ty);
+        return Builder::GEP(Ptr, indexList);
+    }
+
+    Value *BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty)
+    {
+        Ptr = TranslationHelper(Ptr, Ty);
+        return Builder::GEP(Ptr, indexList);
+    }
+
+    Value* BuilderGfxMem::TranslationHelper(Value *Ptr, Type *Ty)
+    {
+        SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr), "Access of GFX pointers must have non-null type specified.");
+
+
+        // address may be coming in as 64bit int now so get the pointer
+        if (Ptr->getType() == mInt64Ty)
+        {
+            Ptr = INT_TO_PTR(Ptr, Ty);
+        }
+
+        return Ptr;
+    }
+
+    LoadInst* BuilderGfxMem::LOAD(Value *Ptr, const char *Name, Type *Ty, JIT_MEM_CLIENT usage)
+    {
+        AssertGFXMemoryParams(Ptr, usage);
  
+        Ptr = TranslationHelper(Ptr, Ty);
          return Builder::LOAD(Ptr, Name);
      }
  
-    LoadInst* BuilderGfxMem::LOAD(Value *Ptr, const Twine &Name, JIT_MEM_CLIENT usage)
+    LoadInst* BuilderGfxMem::LOAD(Value *Ptr, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
      {
+        AssertGFXMemoryParams(Ptr, usage);
+
+        Ptr = TranslationHelper(Ptr, Ty);
          return Builder::LOAD(Ptr, Name);
      }
  
      LoadInst* BuilderGfxMem::LOAD(Type *Ty, Value *Ptr, const Twine &Name, JIT_MEM_CLIENT usage)
      {
+        AssertGFXMemoryParams(Ptr, usage);
+
+        Ptr = TranslationHelper(Ptr, Ty);
          return Builder::LOAD(Ty, Ptr, Name);
      }
      
-    LoadInst* BuilderGfxMem::LOAD(Value *Ptr, bool isVolatile, const Twine &Name, JIT_MEM_CLIENT usage)
+    LoadInst* BuilderGfxMem::LOAD(Value *Ptr, bool isVolatile, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
      {
+        AssertGFXMemoryParams(Ptr, usage);
+
+        Ptr = TranslationHelper(Ptr, Ty);
          return Builder::LOAD(Ptr, isVolatile, Name);
      }
  
-    LoadInst *BuilderGfxMem::LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& name, JIT_MEM_CLIENT usage)
+    LoadInst *BuilderGfxMem::LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& name, Type *Ty, JIT_MEM_CLIENT usage)
      {
-        return Builder::LOAD(BasePtr, offset, name);
+        AssertGFXMemoryParams(BasePtr, usage);
+
+        // This call is just a pass through to the base class.
+        // It needs to be here to compile due to the combination of virtual overrides and signature overloads.
+        // It doesn't do anything meaningful because the implementation in the base class is going to call 
+        // another version of LOAD inside itself where the actual per offset translation will take place 
+        // and we can't just translate the BasePtr once, each address needs individual translation.
+        return Builder::LOAD(BasePtr, offset, name, Ty, usage);
+    }
+
+    CallInst* BuilderGfxMem::MASKED_LOAD(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
+    {
+        AssertGFXMemoryParams(Ptr, usage);
+
+        Ptr = TranslationHelper(Ptr, Ty);
+        return Builder::MASKED_LOAD(Ptr, Align, Mask, PassThru, Name, Ty, usage);
      }
  
      Value* BuilderGfxMem::TranslateGfxAddress(Value* xpGfxAddress)
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h

index ab53583c61ce15f66bf24a1a40570cbb9223ccd4..f8ec0acdec3ec264d9b1b884377c776bce97fb29 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
@@ -41,11 +41,18 @@ namespace SwrJit
          BuilderGfxMem(JitManager* pJitMgr);
          virtual ~BuilderGfxMem() {}
  
-        virtual LoadInst* LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-        virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+        virtual Value *GEP(Value *Ptr, Value *Idx, Type *Ty = nullptr, const Twine &Name = "");
+        virtual Value *GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = "");
+        virtual Value *GEP(Value* Ptr, const std::initializer_list<Value*> &indexList, Type *Ty = nullptr);
+        virtual Value *GEP(Value* Ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty = nullptr);
+
+        virtual LoadInst* LOAD(Value *Ptr, const char *Name, Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+        virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
          virtual LoadInst* LOAD(Type *Ty, Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-        virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-        virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+        virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+        virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+
+        virtual CallInst* MASKED_LOAD(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru = nullptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
  
          virtual Value *GATHERPS(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
  
@@ -62,8 +69,15 @@ namespace SwrJit
  
          virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant *offset);
  
+        Value* TranslationHelper(Value *Ptr, Type *Ty);
+
+        FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; }
+        Value* GetTranslationFunction() { return mpfnTranslateGfxAddress; }
+        Value* GetParamSimDC() { return mpParamSimDC; }
+
      private:
-        
+
+        FunctionType* mpTranslationFuncTy;
          Value* mpfnTranslateGfxAddress;
          Value* mpParamSimDC;
      };
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp

index 4be5f29061ef4a49e8c552ba793df37cc80f1ef9..c5f0b2b9fe0a97b7256cdfee05da720258cb7eb2 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -41,7 +41,17 @@ namespace SwrJit
          SWR_ASSERT(ptr->getType() != mInt64Ty, "Address appears to be GFX access.  Requires translation through BuilderGfxMem.");
      }
  
-    Value *Builder::GEP(Value* ptr, const std::initializer_list<Value*> &indexList)
+    Value *Builder::GEP(Value *Ptr, Value *Idx, Type *Ty, const Twine &Name)
+    {
+        return IRB()->CreateGEP(Ptr, Idx, Name);
+    }
+
+    Value *Builder::GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name)
+    {
+        return IRB()->CreateGEP(Ty, Ptr, Idx, Name);
+    }
+
+    Value *Builder::GEP(Value* ptr, const std::initializer_list<Value*> &indexList, Type *Ty)
      {
          std::vector<Value*> indices;
          for (auto i : indexList)
@@ -49,7 +59,7 @@ namespace SwrJit
          return GEPA(ptr, indices);
      }
  
-    Value *Builder::GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList)
+    Value *Builder::GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty)
      {
          std::vector<Value*> indices;
          for (auto i : indexList)
@@ -57,6 +67,16 @@ namespace SwrJit
          return GEPA(ptr, indices);
      }
  
+    Value *Builder::GEPA(Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name)
+    {
+        return IRB()->CreateGEP(Ptr, IdxList, Name);
+    }
+
+    Value *Builder::GEPA(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name)
+    {
+        return IRB()->CreateGEP(Ty, Ptr, IdxList, Name);
+    }
+
      Value *Builder::IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*> &indexList)
      {
          std::vector<Value*> indices;
@@ -73,13 +93,13 @@ namespace SwrJit
          return IN_BOUNDS_GEP(ptr, indices);
      }
  
-    LoadInst* Builder::LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage)
+    LoadInst* Builder::LOAD(Value *Ptr, const char *Name, Type *Ty, JIT_MEM_CLIENT usage)
      {
          AssertMemoryUsageParams(Ptr, usage);
          return IRB()->CreateLoad(Ptr, Name);
      }
  
-    LoadInst* Builder::LOAD(Value *Ptr, const Twine &Name, JIT_MEM_CLIENT usage)
+    LoadInst* Builder::LOAD(Value *Ptr, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
      {
          AssertMemoryUsageParams(Ptr, usage);
          return IRB()->CreateLoad(Ptr, Name);
@@ -91,19 +111,18 @@ namespace SwrJit
          return IRB()->CreateLoad(Ty, Ptr, Name);
      }
  
-    LoadInst* Builder::LOAD(Value *Ptr, bool isVolatile, const Twine &Name, JIT_MEM_CLIENT usage)
+    LoadInst* Builder::LOAD(Value *Ptr, bool isVolatile, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
      {
          AssertMemoryUsageParams(Ptr, usage);
          return IRB()->CreateLoad(Ptr, isVolatile, Name);
      }
  
-    LoadInst *Builder::LOAD(Value *basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name, JIT_MEM_CLIENT usage)
+    LoadInst *Builder::LOAD(Value *basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name, Type *Ty, JIT_MEM_CLIENT usage)
      {
-        AssertMemoryUsageParams(basePtr, usage);
          std::vector<Value*> valIndices;
          for (auto i : indices)
              valIndices.push_back(C(i));
-        return LOAD(GEPA(basePtr, valIndices), name);
+        return Builder::LOAD(GEPA(basePtr, valIndices), name);
      }
  
      LoadInst *Builder::LOADV(Value *basePtr, const std::initializer_list<Value*> &indices, const llvm::Twine& name)
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h

index 5ca96e7d86c59b2d89a8ba705e9ffd081de05172..f229da38a9404a64ff396e1ae01bafd0babd30bb 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
@@ -45,16 +45,27 @@ void AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage);
  
  public:
  
-Value *GEP(Value* ptr, const std::initializer_list<Value*> &indexList);
-Value *GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList);
+virtual Value *GEP(Value *Ptr, Value *Idx, Type *Ty = nullptr, const Twine &Name = "");
+virtual Value *GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = "");
+virtual Value *GEP(Value* ptr, const std::initializer_list<Value*> &indexList, Type *Ty = nullptr);
+virtual Value *GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty = nullptr);
+
+Value *GEPA(Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name = "");
+Value *GEPA(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name = "");
+
  Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*> &indexList);
  Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList);
  
-virtual LoadInst* LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+virtual LoadInst* LOAD(Value *Ptr, const char *Name, Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
  virtual LoadInst* LOAD(Type *Ty, Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+
+virtual CallInst* MASKED_LOAD(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru = nullptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL)
+{
+    return IRB()->CreateMaskedLoad(Ptr, Align, Mask, PassThru, Name);
+}
  
  LoadInst *LOADV(Value *BasePtr, const std::initializer_list<Value*> &offset, const llvm::Twine& name = "");
  StoreInst *STORE(Value *Val, Value *BasePtr, const std::initializer_list<uint32_t> &offset);
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp

index c6ff6a8b8ac11163fd24ed5378ff7f6e08e529ac..0fbfd211ef7b176d689c2dcd6adcd7d2ecc672bd 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -133,11 +133,11 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
      streams->setName("pStreams");
  
      // SWR_FETCH_CONTEXT::pIndices
-    Value*    indices = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_pIndices});
+    Value*    indices = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_xpIndices});
      indices->setName("pIndices");
  
      // SWR_FETCH_CONTEXT::pLastIndex
-    Value*    pLastIndex = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_pLastIndex});
+    Value*    pLastIndex = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_xpLastIndex});
      pLastIndex->setName("pLastIndex");
  
      Value* vIndices;
@@ -152,12 +152,10 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
              }
              else
              {
-                pLastIndex = BITCAST(pLastIndex, Type::getInt8PtrTy(JM()->mContext, 0));
                  vIndices = GetSimdValid8bitIndices(indices, pLastIndex);
              }
              break;
          case R16_UINT: 
-            indices = BITCAST(indices, Type::getInt16PtrTy(JM()->mContext, 0)); 
              if(fetchState.bDisableIndexOOBCheck)
              {
                  vIndices = LOAD(BITCAST(indices, PointerType::get(VectorType::get(mInt16Ty, mpJitMgr->mVWidth), 0)), {(uint32_t)0});
@@ -165,12 +163,11 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
              }
              else
              {
-                pLastIndex = BITCAST(pLastIndex, Type::getInt16PtrTy(JM()->mContext, 0));
                  vIndices = GetSimdValid16bitIndices(indices, pLastIndex);
              }
              break;
          case R32_UINT:
-            (fetchState.bDisableIndexOOBCheck) ? vIndices = LOAD(BITCAST(indices, PointerType::get(mSimdInt32Ty,0)),{(uint32_t)0})
+            (fetchState.bDisableIndexOOBCheck) ? vIndices = LOAD(indices, "", PointerType::get(mSimdInt32Ty, 0), GFX_MEM_CLIENT_FETCH)
                                                 : vIndices = GetSimdValid32bitIndices(indices, pLastIndex);
              break; // incoming type is already 32bit int
          default:
@@ -967,6 +964,10 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
      }
  }
  
+typedef void*(*PFN_TRANSLATEGFXADDRESS_FUNC)(void* pdc, gfxptr_t va);
+extern "C" void GetSimdValid8bitIndicesGfx(gfxptr_t indices, gfxptr_t lastIndex, uint32_t vWidth, PFN_TRANSLATEGFXADDRESS_FUNC pfnTranslate, void* pdc, uint32_t* outIndices);
+extern "C" void GetSimdValid16bitIndicesGfx(gfxptr_t indices, gfxptr_t lastIndex, uint32_t vWidth, PFN_TRANSLATEGFXADDRESS_FUNC pfnTranslate, void* pdc, uint32_t* outIndices);
+
  //////////////////////////////////////////////////////////////////////////
  /// @brief Loads a simd of valid indices. OOB indices are set to 0
  /// *Note* have to do 16bit index checking in scalar until we have AVX-512
@@ -975,30 +976,36 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
  /// @param pLastIndex - pointer to last valid index
  Value* FetchJit::GetSimdValid8bitIndices(Value* pIndices, Value* pLastIndex)
  {
-    // can fit 2 16 bit integers per vWidth lane
-    Value* vIndices =  VUNDEF_I();
+    SWR_ASSERT(pIndices->getType() == mInt64Ty && pLastIndex->getType() == mInt64Ty, "Function expects gfxptr_t for both input parameters.");
  
-    // store 0 index on stack to be used to conditionally load from if index address is OOB
-    Value* pZeroIndex = ALLOCA(mInt8Ty);
-    STORE(C((uint8_t)0), pZeroIndex);
+    Value* vIndices = VUNDEF_I();
  
-    // Load a SIMD of index pointers
-    for(int64_t lane = 0; lane < mVWidth; lane++)
      {
-        // Calculate the address of the requested index
-        Value *pIndex = GEP(pIndices, C(lane));
+        // store 0 index on stack to be used to conditionally load from if index address is OOB
+        Value* pZeroIndex = ALLOCA(mInt8Ty);
+        STORE(C((uint8_t)0), pZeroIndex);
+
+        // Load a SIMD of index pointers
+        for (int64_t lane = 0; lane < mVWidth; lane++)
+        {
+            // Calculate the address of the requested index
+            Value *pIndex = GEP(pIndices, C(lane), mInt8PtrTy);
  
-        // check if the address is less than the max index, 
-        Value* mask = ICMP_ULT(pIndex, pLastIndex);
+            pLastIndex = INT_TO_PTR(pLastIndex, mInt8PtrTy);
  
-        // if valid, load the index. if not, load 0 from the stack
-        Value* pValid = SELECT(mask, pIndex, pZeroIndex);
-        Value *index = LOAD(pValid, "valid index");
+            // check if the address is less than the max index, 
+            Value* mask = ICMP_ULT(pIndex, pLastIndex);
  
-        // zero extended index to 32 bits and insert into the correct simd lane
-        index = Z_EXT(index, mInt32Ty);
-        vIndices = VINSERT(vIndices, index, lane);
+            // if valid, load the index. if not, load 0 from the stack
+            Value* pValid = SELECT(mask, pIndex, pZeroIndex);
+            Value *index = LOAD(pValid, "valid index", PointerType::get(mInt8Ty, 0), GFX_MEM_CLIENT_FETCH);
+
+            // zero extended index to 32 bits and insert into the correct simd lane
+            index = Z_EXT(index, mInt32Ty);
+            vIndices = VINSERT(vIndices, index, lane);
+        }
      }
+
      return vIndices;
  }
  
@@ -1010,30 +1017,36 @@ Value* FetchJit::GetSimdValid8bitIndices(Value* pIndices, Value* pLastIndex)
  /// @param pLastIndex - pointer to last valid index
  Value* FetchJit::GetSimdValid16bitIndices(Value* pIndices, Value* pLastIndex)
  {
-    // can fit 2 16 bit integers per vWidth lane
-    Value* vIndices =  VUNDEF_I();
+    SWR_ASSERT(pIndices->getType() == mInt64Ty && pLastIndex->getType() == mInt64Ty, "Function expects gfxptr_t for both input parameters.");
  
-    // store 0 index on stack to be used to conditionally load from if index address is OOB
-    Value* pZeroIndex = ALLOCA(mInt16Ty);
-    STORE(C((uint16_t)0), pZeroIndex);
+    Value* vIndices = VUNDEF_I();
  
-    // Load a SIMD of index pointers
-    for(int64_t lane = 0; lane < mVWidth; lane++)
      {
-        // Calculate the address of the requested index
-        Value *pIndex = GEP(pIndices, C(lane));
+        // store 0 index on stack to be used to conditionally load from if index address is OOB
+        Value* pZeroIndex = ALLOCA(mInt16Ty);
+        STORE(C((uint16_t)0), pZeroIndex);
  
-        // check if the address is less than the max index, 
-        Value* mask = ICMP_ULT(pIndex, pLastIndex);
+        // Load a SIMD of index pointers
+        for (int64_t lane = 0; lane < mVWidth; lane++)
+        {
+            // Calculate the address of the requested index
+            Value *pIndex = GEP(pIndices, C(lane), mInt16PtrTy);
+
+            pLastIndex = INT_TO_PTR(pLastIndex, mInt16PtrTy);
+
+            // check if the address is less than the max index, 
+            Value* mask = ICMP_ULT(pIndex, pLastIndex);
  
-        // if valid, load the index. if not, load 0 from the stack
-        Value* pValid = SELECT(mask, pIndex, pZeroIndex);
-        Value *index = LOAD(pValid, "valid index", GFX_MEM_CLIENT_FETCH);
+            // if valid, load the index. if not, load 0 from the stack
+            Value* pValid = SELECT(mask, pIndex, pZeroIndex);
+            Value *index = LOAD(pValid, "valid index", PointerType::get(mInt16Ty, 0), GFX_MEM_CLIENT_FETCH);
  
-        // zero extended index to 32 bits and insert into the correct simd lane
-        index = Z_EXT(index, mInt32Ty);
-        vIndices = VINSERT(vIndices, index, lane);
+            // zero extended index to 32 bits and insert into the correct simd lane
+            index = Z_EXT(index, mInt32Ty);
+            vIndices = VINSERT(vIndices, index, lane);
+        }
      }
+
      return vIndices;
  }
  
@@ -1045,8 +1058,8 @@ Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex)
  {
      DataLayout dL(JM()->mpCurrentModule);
      unsigned int ptrSize = dL.getPointerSize() * 8;  // ptr size in bits
-    Value* iLastIndex = PTR_TO_INT(pLastIndex, Type::getIntNTy(JM()->mContext, ptrSize));
-    Value* iIndices = PTR_TO_INT(pIndices, Type::getIntNTy(JM()->mContext, ptrSize));
+    Value* iLastIndex = pLastIndex; 
+    Value* iIndices = pIndices;
  
      // get the number of indices left in the buffer (endPtr - curPtr) / sizeof(index)
      Value* numIndicesLeft = SUB(iLastIndex,iIndices);
@@ -1918,7 +1931,7 @@ void FetchJit::StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, con
  #endif
          // outputElt * 4 = offsetting by the size of a simdvertex
          // + c offsets to a 32bit x vWidth row within the current vertex
-        Value* dest = GEP(pVtxOut, C(outputElt * 4 + c), "destGEP");
+        Value* dest = GEP(pVtxOut, C(outputElt * 4 + c), nullptr, "destGEP");
          STORE(vVertexElements[c], dest);
      }
  }
diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp

index d11323ef2e78fe0a3f12924e30623188804e1c3c..c5d755d72009282fec5865445b7123966d771d05 100644 (file)
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -1375,7 +1375,7 @@ swr_update_derived(struct pipe_context *pipe,
  
           SWR_INDEX_BUFFER_STATE swrIndexBuffer;
           swrIndexBuffer.format = swr_convert_index_type(info.index_size);
-         swrIndexBuffer.pIndices = p_data;
+         swrIndexBuffer.xpIndices = (gfxptr_t) p_data;
           swrIndexBuffer.size = size;
  
           ctx->api.pfnSwrSetIndexBuffer(ctx->swrContext, &swrIndexBuffer);
author	George Kyriazis <george.kyriazis@intel.com>
	Thu, 5 Apr 2018 17:08:15 +0000 (12:08 -0500)
committer	George Kyriazis <george.kyriazis@intel.com>
	Wed, 18 Apr 2018 15:51:38 +0000 (10:51 -0500)
src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/api.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/context.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/frontend.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/state.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp		patch \| blob \| history
src/gallium/drivers/swr/swr_state.cpp		patch \| blob \| history