swr/rasterizer: enable size accumulation in mem stats
authorJan Zielinski <jan.zielinski@intel.com>
Fri, 26 Jul 2019 14:43:58 +0000 (16:43 +0200)
committerJan Zielinski <jan.zielinski@intel.com>
Thu, 8 Aug 2019 08:16:20 +0000 (10:16 +0200)
Small refactoring is also performed

Reviewed-by: Alok Hota <alok.hota@intel.com>
src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
src/gallium/drivers/swr/rasterizer/archrast/events.proto
src/gallium/drivers/swr/rasterizer/core/backend.cpp
src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
src/gallium/drivers/swr/rasterizer/jitter/jit_api.h
src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp

index 06e0f616f707e18d8018a9b91134f5c1b9cc54d5..ba99391ae76a8b7faefeab1d45dde06fe8a718ac 100644 (file)
@@ -98,6 +98,8 @@ namespace ArchRast
         {
             uint32_t accessCountRead;
             uint32_t accessCountWrite;
+            uint32_t totalSizeRead;
+            uint32_t totalSizeWrite;
             uint64_t tscMin;
             uint64_t tscMax;
         };
@@ -113,7 +115,7 @@ namespace ArchRast
         typedef std::map<MemoryTrackerKey, MemoryTrackerData, AddressRangeComparator> MemoryTrackerMap;
         MemoryTrackerMap trackedMemory = {};
 
-        void TrackMemoryAccess(uint64_t address, uint64_t addressMask, uint8_t isRead, uint64_t tsc)
+        void TrackMemoryAccess(uint64_t address, uint64_t addressMask, uint8_t isRead, uint64_t tsc, uint32_t size)
         {
             MemoryTrackerKey key;
             key.address = address;
@@ -126,10 +128,12 @@ namespace ArchRast
                 if (isRead)
                 {
                     i->second.accessCountRead++;
+                    i->second.totalSizeRead += size;
                 }
                 else
                 {
                     i->second.accessCountWrite++;
+                    i->second.totalSizeWrite += size;
                 }
                 i->second.tscMax = tsc;
             }
@@ -140,12 +144,16 @@ namespace ArchRast
                 if (isRead)
                 {
                     data.accessCountRead = 1;
+                    data.totalSizeRead = size;
                     data.accessCountWrite = 0;
+                    data.totalSizeWrite = 0;
                 }
                 else
                 {
                     data.accessCountRead = 0;
+                    data.totalSizeRead = 0;
                     data.accessCountWrite = 1;
+                    data.totalSizeWrite = size;
                 }
                 data.tscMin = tsc;
                 data.tscMax = tsc;
@@ -258,6 +266,7 @@ namespace ArchRast
                 mAddressMask = (mAddressMask << 1) | 1;
                 addressRangeBytes = addressRangeBytes >> 1;
             }
+            mMemGranularity = mAddressMask + 1;
             mAddressMask = ~mAddressMask;
         }
 
@@ -666,7 +675,19 @@ namespace ArchRast
 
         virtual void Handle(const MemoryAccessEvent& event)
         {
-            mMemoryStats.TrackMemoryAccess(event.data.ptr, mAddressMask, event.data.isRead, event.data.tsc);
+            uint64_t trackAddr = event.data.ptr;
+            uint64_t nextAddr = (trackAddr & mAddressMask);
+            uint32_t sizeTracked = 0;
+
+            while (sizeTracked < event.data.size)
+            {
+                nextAddr += mMemGranularity;
+                uint32_t size = nextAddr - trackAddr;
+                size = std::min(event.data.size, size);
+                mMemoryStats.TrackMemoryAccess(trackAddr, mAddressMask, event.data.isRead, event.data.tsc, size);
+                sizeTracked += size;
+                trackAddr = nextAddr;
+            }            
         }
 
         virtual void Handle(const MemoryStatsEndEvent& event)
@@ -678,6 +699,8 @@ namespace ArchRast
                                      i->first.address & mAddressMask, 
                                      i->second.accessCountRead, 
                                      i->second.accessCountWrite, 
+                                     i->second.totalSizeRead, 
+                                     i->second.totalSizeWrite, 
                                      i->second.tscMin, 
                                      i->second.tscMax);
                 EventHandlerFile::Handle(mse);
@@ -734,6 +757,7 @@ namespace ArchRast
 
         MemoryStats      mMemoryStats     = {};
         uint64_t         mAddressMask     = 0;
+        uint64_t         mMemGranularity  = 0;
 
     };
 
index 1618e5faa4a31a409a1733f349bd6eef644a86f6..471bd0e286a4b6e363c57744696b74cd7e55cb2d 100644 (file)
@@ -480,6 +480,8 @@ event MemoryStatsEvent
     uint64_t baseAddr;
     uint32_t accessCountRead;
     uint32_t accessCountWrite;
+       uint32_t totalSizeRead;
+       uint32_t totalSizeWrite;
     uint64_t tscMin;
     uint64_t tscMax;
 };
index a435fa359981152e7773f8d35f8d0f7b91dc996a..8cf508797260d99b994cf00434379e3831ff6cbe 100644 (file)
@@ -233,7 +233,17 @@ void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT* pDC,
                                                         numSamples);
             if (pHotTile)
             {
-                pHotTile->state = (HOTTILE_STATE)pDesc->newTileState;
+                HOTTILE_STATE newState = (HOTTILE_STATE)pDesc->newTileState;;
+                if (pHotTile->state == HOTTILE_DIRTY || pHotTile->state == HOTTILE_CLEAR)
+                {
+                    if (newState == HOTTILE_INVALID)
+                    {
+                        // This is OK for APIs that explicitly allow discards
+                        // (for e.g. depth / stencil data)
+                        //SWR_INVALID("Discarding valid data!");
+                    }
+                }
+                pHotTile->state = newState;
             }
         }
     }
index 21e3d47cf9dac66f7130226e644fa546a877990b..5f359ed2113e2981b71085e8ace5c4e50bff36a9 100644 (file)
@@ -32,7 +32,6 @@
 #include "common/rdtsc_buckets.h"
 #include "builder_gfx_mem.h"
 
-
 namespace SwrJit
 {
     using namespace llvm;
@@ -45,20 +44,18 @@ namespace SwrJit
         mpfnTrackMemAccess              = nullptr;
         mpParamSimDC                    = nullptr;
         mpWorkerData                    = nullptr;
-
     }
 
     void BuilderGfxMem::NotifyPrivateContextSet()
     {
     }
 
-    void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage)
+    void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage)
     {
-        SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL),
+        SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT::MEM_CLIENT_INTERNAL),
                    "Internal memory should not be gfxptr_t.");
     }
 
-
     //////////////////////////////////////////////////////////////////////////
     /// @brief Generate a masked gather operation in LLVM IR.  If not
     /// supported on the underlying platform, emulate it with loads
@@ -72,7 +69,7 @@ namespace SwrJit
                                    Value*         vIndices,
                                    Value*         vMask,
                                    uint8_t        scale,
-                                   JIT_MEM_CLIENT usage)
+                                   MEM_CLIENT     usage)
     {
        // address may be coming in as 64bit int now so get the pointer
         if (pBase->getType() == mInt64Ty)
@@ -97,9 +94,8 @@ namespace SwrJit
                                    Value*         vIndices,
                                    Value*         vMask,
                                    uint8_t        scale,
-                                   JIT_MEM_CLIENT usage)
+                                   MEM_CLIENT     usage)
     {
-
         // address may be coming in as 64bit int now so get the pointer
         if (pBase->getType() == mInt64Ty)
         {
@@ -111,19 +107,17 @@ namespace SwrJit
     }
 
     void BuilderGfxMem::SCATTERPS(
-        Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage)
+        Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage)
     {
-
         // address may be coming in as 64bit int now so get the pointer
         if (pDst->getType() == mInt64Ty)
         {
             pDst = INT_TO_PTR(pDst, PointerType::get(mInt8Ty, 0));
         }
 
-        Builder::SCATTERPS(pDst, vSrc, vOffsets, vMask, usage);
+        Builder::SCATTERPS(pDst, BITCAST(vSrc, mSimdFP32Ty), vOffsets, vMask, usage);
     }
 
-
     Value* BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset)
     {
         return ADD(base, offset);
@@ -159,7 +153,6 @@ namespace SwrJit
         SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr),
                    "Access of GFX pointers must have non-null type specified.");
 
-
         // address may be coming in as 64bit int now so get the pointer
         if (Ptr->getType() == mInt64Ty)
         {
@@ -169,7 +162,7 @@ namespace SwrJit
         return Ptr;
     }
 
-    void BuilderGfxMem::TrackerHelper(Value* Ptr, Type* Ty, JIT_MEM_CLIENT usage, bool isRead)
+    void BuilderGfxMem::TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead)
     {
 #if defined(KNOB_ENABLE_AR)
         if (!KNOB_TRACK_MEMORY_WORKING_SET)
@@ -216,7 +209,7 @@ namespace SwrJit
         return;
     }
 
-    LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage)
+    LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage)
     {
         AssertGFXMemoryParams(Ptr, usage);
         TrackerHelper(Ptr, Ty, usage, true);
@@ -225,7 +218,7 @@ namespace SwrJit
         return Builder::LOAD(Ptr, Name);
     }
 
-    LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
+    LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage)
     {
         AssertGFXMemoryParams(Ptr, usage);
         TrackerHelper(Ptr, Ty, usage, true);
@@ -234,9 +227,8 @@ namespace SwrJit
         return Builder::LOAD(Ptr, Name);
     }
 
-
     LoadInst* BuilderGfxMem::LOAD(
-        Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
+        Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage)
     {
         AssertGFXMemoryParams(Ptr, usage);
         TrackerHelper(Ptr, Ty, usage, true);
@@ -249,7 +241,7 @@ namespace SwrJit
                                   const std::initializer_list<uint32_t>& offset,
                                   const llvm::Twine&                     name,
                                   Type*                                  Ty,
-                                  JIT_MEM_CLIENT                         usage)
+                                  MEM_CLIENT                             usage)
     {
         AssertGFXMemoryParams(BasePtr, usage);
 
@@ -274,14 +266,13 @@ namespace SwrJit
         return LOAD(BasePtr, name, Ty, usage);
     }
 
-
     CallInst* BuilderGfxMem::MASKED_LOAD(Value*         Ptr,
                                          unsigned       Align,
                                          Value*         Mask,
                                          Value*         PassThru,
                                          const Twine&   Name,
                                          Type*          Ty,
-                                         JIT_MEM_CLIENT usage)
+                                         MEM_CLIENT     usage)
     {
         AssertGFXMemoryParams(Ptr, usage);
         TrackerHelper(Ptr, Ty, usage, true);
@@ -291,7 +282,7 @@ namespace SwrJit
     }
 
     StoreInst*
-    BuilderGfxMem::STORE(Value* Val, Value* Ptr, bool isVolatile, Type* Ty, JIT_MEM_CLIENT usage)
+    BuilderGfxMem::STORE(Value* Val, Value* Ptr, bool isVolatile, Type* Ty, MEM_CLIENT usage)
     {
         AssertGFXMemoryParams(Ptr, usage);
         TrackerHelper(Ptr, Ty, usage, false);
@@ -304,7 +295,7 @@ namespace SwrJit
                                     Value*                                 BasePtr,
                                     const std::initializer_list<uint32_t>& offset,
                                     Type*                                  Ty,
-                                    JIT_MEM_CLIENT                         usage)
+                                    MEM_CLIENT                             usage)
     {
         AssertGFXMemoryParams(BasePtr, usage);
         TrackerHelper(BasePtr, Ty, usage, false);
@@ -314,7 +305,7 @@ namespace SwrJit
     }
 
     CallInst* BuilderGfxMem::MASKED_STORE(
-        Value* Val, Value* Ptr, unsigned Align, Value* Mask, Type* Ty, JIT_MEM_CLIENT usage)
+        Value* Val, Value* Ptr, unsigned Align, Value* Mask, Type* Ty, MEM_CLIENT usage)
     {
         AssertGFXMemoryParams(Ptr, usage);
 
@@ -327,7 +318,7 @@ namespace SwrJit
     Value* BuilderGfxMem::TranslateGfxAddressForRead(Value*       xpGfxAddress,
                                                      Type*        PtrTy,
                                                      const Twine& Name,
-                                                     JIT_MEM_CLIENT /* usage */)
+                                                     MEM_CLIENT /* usage */)
     {
         if (PtrTy == nullptr)
         {
@@ -339,7 +330,7 @@ namespace SwrJit
     Value* BuilderGfxMem::TranslateGfxAddressForWrite(Value*       xpGfxAddress,
                                                       Type*        PtrTy,
                                                       const Twine& Name,
-                                                      JIT_MEM_CLIENT /* usage */)
+                                                      MEM_CLIENT /* usage */)
     {
         if (PtrTy == nullptr)
         {
index 52bd3ac226cf46eba1e27d51e7e6cdad1e85a1da..b6e8ed1d760e145b88e07cad638bff8ebf9079e7 100644 (file)
@@ -51,22 +51,21 @@ namespace SwrJit
         virtual LoadInst* LOAD(Value*         Ptr,
                                const char*    Name,
                                Type*          Ty    = nullptr,
-                               JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+                               MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
         virtual LoadInst* LOAD(Value*         Ptr,
                                const Twine&   Name  = "",
                                Type*          Ty    = nullptr,
-                               JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+                               MEM_CLIENT     usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
         virtual LoadInst* LOAD(Value*         Ptr,
                                bool           isVolatile,
                                const Twine&   Name  = "",
                                Type*          Ty    = nullptr,
-                               JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+                               MEM_CLIENT     usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
         virtual LoadInst* LOAD(Value*                                 BasePtr,
                                const std::initializer_list<uint32_t>& offset,
                                const llvm::Twine&                     Name  = "",
                                Type*                                  Ty    = nullptr,
-                               JIT_MEM_CLIENT                         usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
+                               MEM_CLIENT                         usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
         virtual CallInst* MASKED_LOAD(Value*         Ptr,
                                       unsigned       Align,
@@ -74,61 +73,57 @@ namespace SwrJit
                                       Value*         PassThru = nullptr,
                                       const Twine&   Name     = "",
                                       Type*          Ty       = nullptr,
-                                      JIT_MEM_CLIENT usage    = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+                                      MEM_CLIENT     usage    = MEM_CLIENT::MEM_CLIENT_INTERNAL);
+
+        virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
-        virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
-        
-        virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+        virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
-        virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+        virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
         virtual Value* GATHERPS(Value*         src,
                                 Value*         pBase,
                                 Value*         indices,
                                 Value*         mask,
                                 uint8_t        scale = 1,
-                                JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+                                MEM_CLIENT     usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
         virtual Value* GATHERDD(Value*         src,
                                 Value*         pBase,
                                 Value*         indices,
                                 Value*         mask,
                                 uint8_t        scale = 1,
-                                JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+                                MEM_CLIENT     usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
         virtual void SCATTERPS(Value*         pDst,
                                Value*         vSrc,
                                Value*         vOffsets,
                                Value*         vMask,
-                               JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
+                               MEM_CLIENT     usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
         Value* TranslateGfxAddressForRead(Value*         xpGfxAddress,
                                           Type*          PtrTy = nullptr,
                                           const Twine&   Name  = "",
-                                          JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+                                          MEM_CLIENT     usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
         Value* TranslateGfxAddressForWrite(Value*         xpGfxAddress,
                                            Type*          PtrTy = nullptr,
                                            const Twine&   Name  = "",
-                                           JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
-        
+                                           MEM_CLIENT     usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
     protected:
-        void AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage);
+        void AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage);
 
         virtual void NotifyPrivateContextSet();
 
         virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
 
         Value* TranslationHelper(Value* Ptr, Type* Ty);
-        void   TrackerHelper(Value* Ptr, Type* Ty, JIT_MEM_CLIENT usage, bool isRead);
-
+        void   TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead);
 
         FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; }
         Value*        GetTranslationFunctionForRead() { return mpfnTranslateGfxAddressForRead; }
         Value*        GetTranslationFunctionForWrite() { return mpfnTranslateGfxAddressForWrite; }
         Value*        GetParamSimDC() { return mpParamSimDC; }
 
-
         Value*        mpWorkerData;
 
     private:
index 267c5442d2a3bf7c5d215644f5f7c42c2c83fd0d..b32686c7583d0e4a23819d8777e9b788d2a69bd3 100644 (file)
@@ -34,7 +34,7 @@
 
 namespace SwrJit
 {
-    void Builder::AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage)
+    void Builder::AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage)
     {
         SWR_ASSERT(
             ptr->getType() != mInt64Ty,
@@ -93,26 +93,26 @@ namespace SwrJit
         return IN_BOUNDS_GEP(ptr, indices);
     }
 
-    LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage)
+    LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage)
     {
         AssertMemoryUsageParams(Ptr, usage);
         return IRB()->CreateLoad(Ptr, Name);
     }
 
-    LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
+    LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage)
     {
         AssertMemoryUsageParams(Ptr, usage);
         return IRB()->CreateLoad(Ptr, Name);
     }
 
-    LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, JIT_MEM_CLIENT usage)
+    LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, MEM_CLIENT usage)
     {
         AssertMemoryUsageParams(Ptr, usage);
         return IRB()->CreateLoad(Ty, Ptr, Name);
     }
 
     LoadInst*
-    Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
+    Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage)
     {
         AssertMemoryUsageParams(Ptr, usage);
         return IRB()->CreateLoad(Ptr, isVolatile, Name);
@@ -122,7 +122,7 @@ namespace SwrJit
                             const std::initializer_list<uint32_t>& indices,
                             const llvm::Twine&                     name,
                             Type*                                  Ty,
-                            JIT_MEM_CLIENT                         usage)
+                            MEM_CLIENT                             usage)
     {
         std::vector<Value*> valIndices;
         for (auto i : indices)
@@ -141,7 +141,7 @@ namespace SwrJit
     }
 
     StoreInst*
-    Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices, Type* Ty, JIT_MEM_CLIENT usage)
+    Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices, Type* Ty, MEM_CLIENT usage)
     {
         std::vector<Value*> valIndices;
         for (auto i : indices)
@@ -186,7 +186,7 @@ namespace SwrJit
                              Value*         vIndices,
                              Value*         vMask,
                              uint8_t        scale,
-                             JIT_MEM_CLIENT usage)
+                             MEM_CLIENT     usage)
     {
         AssertMemoryUsageParams(pBase, usage);
 
@@ -206,7 +206,7 @@ namespace SwrJit
                              Value*         vIndices,
                              Value*         vMask,
                              uint8_t        scale,
-                             JIT_MEM_CLIENT usage)
+                             MEM_CLIENT     usage)
     {
         AssertMemoryUsageParams(pBase, usage);
 
@@ -243,7 +243,7 @@ namespace SwrJit
                           Value*           mask,
                           Value*           vGatherComponents[],
                           bool             bPackedOutput,
-                          JIT_MEM_CLIENT   usage)
+                          MEM_CLIENT       usage)
     {
         const SWR_FORMAT_INFO& info = GetFormatInfo(format);
         if (info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32)
@@ -262,7 +262,7 @@ namespace SwrJit
                             Value*                 vMask,
                             Value*                 vGatherComponents[],
                             bool                   bPackedOutput,
-                            JIT_MEM_CLIENT         usage)
+                            MEM_CLIENT             usage)
     {
         switch (info.bpp / info.numComps)
         {
@@ -336,7 +336,7 @@ namespace SwrJit
                             Value*                 vMask,
                             Value*                 vGatherComponents[],
                             bool                   bPackedOutput,
-                            JIT_MEM_CLIENT         usage)
+                            MEM_CLIENT             usage)
     {
         switch (info.bpp / info.numComps)
         {
@@ -643,7 +643,7 @@ namespace SwrJit
     /// @param vOffsets - vector of byte offsets from pDst
     /// @param vMask - mask of valid lanes
     void Builder::SCATTERPS(
-        Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage)
+        Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage)
     {
         AssertMemoryUsageParams(pDst, usage);
 
index ccf42c8dab00a24fd710ff2c900d60843b03b202..49e132e3756537164ff99417135aea710e652fae 100644 (file)
@@ -30,7 +30,7 @@
 #pragma once
 
 public:
-enum class JIT_MEM_CLIENT
+enum class MEM_CLIENT
 {
     MEM_CLIENT_INTERNAL,
     GFX_MEM_CLIENT_FETCH,
@@ -41,7 +41,7 @@ enum class JIT_MEM_CLIENT
 
 protected:
 virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
-void           AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage);
+void           AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage);
 
 public:
 virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = "");
@@ -57,23 +57,23 @@ Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*>& indexList)
 Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList);
 
 virtual LoadInst*
-                  LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+                  LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
 virtual LoadInst* LOAD(Value*         Ptr,
                        const Twine&   Name  = "",
                        Type*          Ty    = nullptr,
-                       JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+                       MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
 virtual LoadInst*
-                  LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+                  LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
 virtual LoadInst* LOAD(Value*         Ptr,
                        bool           isVolatile,
                        const Twine&   Name  = "",
                        Type*          Ty    = nullptr,
-                       JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+                       MEM_CLIENT     usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
 virtual LoadInst* LOAD(Value*                                 BasePtr,
                        const std::initializer_list<uint32_t>& offset,
                        const llvm::Twine&                     Name  = "",
                        Type*                                  Ty    = nullptr,
-                       JIT_MEM_CLIENT                         usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+                       MEM_CLIENT                             usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
 virtual CallInst* MASKED_LOAD(Value*         Ptr,
                               unsigned       Align,
@@ -81,19 +81,19 @@ virtual CallInst* MASKED_LOAD(Value*         Ptr,
                               Value*         PassThru = nullptr,
                               const Twine&   Name     = "",
                               Type*          Ty       = nullptr,
-                              JIT_MEM_CLIENT usage    = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL)
+                              MEM_CLIENT usage    = MEM_CLIENT::MEM_CLIENT_INTERNAL)
 {
     return IRB()->CreateMaskedLoad(Ptr, Align, Mask, PassThru, Name);
 }
 
-virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL)
+virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
 {
     return IRB()->CreateStore(Val, Ptr, isVolatile);
 }
 
-virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
-virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL)
+virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
 {
     return IRB()->CreateMaskedStore(Val, Ptr, Align, Mask);
 }
@@ -112,14 +112,14 @@ void Gather4(const SWR_FORMAT format,
              Value*           mask,
              Value*           vGatherComponents[],
              bool             bPackedOutput,
-             JIT_MEM_CLIENT   usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+             MEM_CLIENT       usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
 virtual Value* GATHERPS(Value*         src,
                         Value*         pBase,
                         Value*         indices,
                         Value*         mask,
                         uint8_t        scale = 1,
-                        JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+                        MEM_CLIENT     usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
 void GATHER4PS(const SWR_FORMAT_INFO& info,
                Value*                 pSrcBase,
@@ -127,14 +127,14 @@ void GATHER4PS(const SWR_FORMAT_INFO& info,
                Value*                 mask,
                Value*                 vGatherComponents[],
                bool                   bPackedOutput,
-               JIT_MEM_CLIENT         usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+               MEM_CLIENT             usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
 virtual Value* GATHERDD(Value*         src,
                         Value*         pBase,
                         Value*         indices,
                         Value*         mask,
                         uint8_t        scale = 1,
-                        JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+                        MEM_CLIENT     usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
 void GATHER4DD(const SWR_FORMAT_INFO& info,
                Value*                 pSrcBase,
@@ -142,7 +142,7 @@ void GATHER4DD(const SWR_FORMAT_INFO& info,
                Value*                 mask,
                Value*                 vGatherComponents[],
                bool                   bPackedOutput,
-               JIT_MEM_CLIENT         usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+               MEM_CLIENT             usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
 Value* GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
 
@@ -152,7 +152,7 @@ virtual void SCATTERPS(Value*         pDst,
                        Value*         vSrc,
                        Value*         vOffsets,
                        Value*         vMask,
-                       JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+                       MEM_CLIENT     usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
 void Shuffle8bpcGather4(const SWR_FORMAT_INFO& info,
                         Value*                 vGatherInput,
index ed6cac04d018c21b6d6ebce5d82ea192d4f91e18..5b06de352dcce6cdae7701dc1e520db8c930fc7d 100644 (file)
@@ -774,14 +774,15 @@ namespace SwrJit
     {
         SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
         Value* fixed = nullptr;
-#if 0
-        // This doesn't work for negative numbers!!
+
+#if 0   // This doesn't work for negative numbers!!
         {
             fixed = FP_TO_SI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
                                     C(_MM_FROUND_TO_NEAREST_INT)),
                              mSimdInt32Ty);
         }
-#else
+        else
+#endif
         {
             // Do round to nearest int on fractional bits first
             // Not entirely perfect for negative numbers, but close enough
@@ -804,7 +805,7 @@ namespace SwrJit
 
             fixed = ASHR(vFixed, vExtraBits, name);
         }
-#endif
+
         return fixed;
     }
 
@@ -845,8 +846,7 @@ namespace SwrJit
     {
         SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
         Value* fixed = nullptr;
-#if 1
-        // KNOB_SIM_FAST_MATH?  Below works correctly from a precision
+#if 1   // KNOB_SIM_FAST_MATH?  Below works correctly from a precision
         // standpoint...
         {
             fixed = FP_TO_UI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
index 8601d0529bcecabf2d438b69c32b72197981f9c0..fe5b48e584bd86f1afe2a3d3d24fc9b399eb7f40 100644 (file)
@@ -205,7 +205,7 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
             ? vIndices = LOAD(indices,
                               "",
                               PointerType::get(mSimdInt32Ty, 0),
-                              JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH)
+                              MEM_CLIENT::GFX_MEM_CLIENT_FETCH)
             : vIndices = GetSimdValid32bitIndices(indices, pLastIndex);
         break; // incoming type is already 32bit int
     default:
@@ -382,7 +382,7 @@ void FetchJit::CreateGatherOddFormats(
     if (info.bpp == 32)
     {
         pGather =
-            GATHERDD(VIMMED1(0), xpBase, pOffsets, pMask, 1, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+            GATHERDD(VIMMED1(0), xpBase, pOffsets, pMask, 1, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
     }
     else
     {
@@ -416,7 +416,7 @@ void FetchJit::CreateGatherOddFormats(
             {
                 Value* pDst  = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt8Ty, 0));
                 Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType()));
-                STORE(LOAD(xpSrc, "", mInt8PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
+                STORE(LOAD(xpSrc, "", mInt8PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
                 break;
             }
 
@@ -424,7 +424,7 @@ void FetchJit::CreateGatherOddFormats(
             {
                 Value* pDst  = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0));
                 Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType()));
-                STORE(LOAD(xpSrc, "", mInt16PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
+                STORE(LOAD(xpSrc, "", mInt16PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
                 break;
             }
             break;
@@ -434,12 +434,12 @@ void FetchJit::CreateGatherOddFormats(
                 // First 16-bits of data
                 Value* pDst  = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0));
                 Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType()));
-                STORE(LOAD(xpSrc, "", mInt16PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
+                STORE(LOAD(xpSrc, "", mInt16PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
 
                 // Last 8-bits of data
                 pDst  = BITCAST(GEP(pDst, C(1)), PointerType::get(mInt8Ty, 0));
-                xpSrc = ADD(xpSrc, C(2));
-                STORE(LOAD(xpSrc, "", mInt8PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
+                xpSrc = ADD(xpSrc, C((int64_t)2));
+                STORE(LOAD(xpSrc, "", mInt8PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
                 break;
             }
 
@@ -750,7 +750,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
                 // if we have at least one component out of x or y to fetch
                 if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
                 {
-                    vGatherResult[0] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask);
+                    vGatherResult[0] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
                     // e.g. result of first 8x32bit integer gather for 16bit components
                     // 256i - 0    1    2    3    4    5    6    7
                     //        xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
@@ -763,7 +763,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
                     // offset base to the next components(zw) in the vertex to gather
                     pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4));
 
-                    vGatherResult[1] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask);
+                    vGatherResult[1] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
                     // e.g. result of second 8x32bit integer gather for 16bit components
                     // 256i - 0    1    2    3    4    5    6    7
                     //        zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
@@ -811,7 +811,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
                                          vNewOffsets,
                                          vGatherMask,
                                          1,
-                                         JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+                                         MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
                         }
                         else
                         {
@@ -957,7 +957,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
                                                     vOffsets,
                                                     vGatherMask,
                                                     1,
-                                                    JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+                                                    MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
                     // e.g. result of an 8x32bit integer gather for 8bit components
                     // 256i - 0    1    2    3    4    5    6    7
                     //        xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
@@ -991,7 +991,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
                                                 vOffsets,
                                                 vGatherMask,
                                                 1,
-                                                JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+                                                MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
                     // e.g. result of first 8x32bit integer gather for 16bit components
                     // 256i - 0    1    2    3    4    5    6    7
                     //        xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
@@ -1009,7 +1009,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
                                                 vOffsets,
                                                 vGatherMask,
                                                 1,
-                                                JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+                                                MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
                     // e.g. result of second 8x32bit integer gather for 16bit components
                     // 256i - 0    1    2    3    4    5    6    7
                     //        zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
@@ -1050,7 +1050,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
                                                       vOffsets,
                                                       vGatherMask,
                                                       1,
-                                                      JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+                                                      MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
 
                             if (conversionType == CONVERT_USCALED)
                             {
@@ -1147,7 +1147,7 @@ Value* FetchJit::GetSimdValidIndicesHelper(Value* pIndices, Value* pLastIndex)
 
             // if valid, load the index. if not, load 0 from the stack
             Value* pValid = SELECT(mask, pIndex, pZeroIndex);
-            Value* index  = LOAD(pValid, "valid index", Ty, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+            Value* index  = LOAD(pValid, "valid index", Ty, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
 
             // zero extended index to 32 bits and insert into the correct simd lane
             index    = Z_EXT(index, mInt32Ty);
@@ -1222,7 +1222,7 @@ Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex)
                        VIMMED1(0),
                        "vIndices",
                        PointerType::get(mSimdInt32Ty, 0),
-                       JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
+                       MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
 }
 
 //////////////////////////////////////////////////////////////////////////
index cc986a78e0aabbaf21e3dc9631326849fad6cd49..dcb051c3b530628e8976e5f68acf6346fcd3f4d7 100644 (file)
@@ -83,6 +83,8 @@ void JITCALL JitDestroyContext(HANDLE hJitContext);
 /// @param output - Output containing information about JIT shader
 ShaderInfo* JITCALL JitCompileShader(HANDLE hJitContext, const JIT_COMPILE_INPUT& input);
 
+ShaderInfo* JITCALL JitGetShader(HANDLE hJitContext, const char* name);
+
 //////////////////////////////////////////////////////////////////////////
 /// @brief JIT destroy shader.
 /// @param hJitContext - Jit Context
index c47acf73228ac7ca85204b45838e236e5ba739da..13e70a7f90af87e5c3264043a1fcb36ef7bd5eab 100644 (file)
@@ -155,7 +155,7 @@ struct StreamOutJit : public BuilderGfxMem
 
             // cast mask to <4xi1>
             Value* mask = ToMask(packedMask);
-            MASKED_STORE(src, pOut, 4, mask, PointerType::get(simd4Ty, 0), JIT_MEM_CLIENT::GFX_MEM_CLIENT_STREAMOUT);
+            MASKED_STORE(src, pOut, 4, mask, PointerType::get(simd4Ty, 0), MEM_CLIENT::GFX_MEM_CLIENT_STREAMOUT);
         }
 
         // increment SO buffer
@@ -223,7 +223,7 @@ struct StreamOutJit : public BuilderGfxMem
             Value* pBuf              = getSOBuffer(pSoCtx, b);
             Value* pData             = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pBuffer});
             Value* streamOffset      = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_streamOffset});
-            pOutBuffer[b] = GEP(pData, streamOffset, PointerType::get(IRB()->getInt32Ty(), 0)); 
+            pOutBuffer[b] = GEP(pData, streamOffset, PointerType::get(IRB()->getInt32Ty(), 0));
             pOutBufferStartVertex[b] = pOutBuffer[b];
 
             outBufferPitch[b] = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pitch});