swr: [rasterizer] more arena work

author Tim Rowley <timothy.o.rowley@intel.com>

Fri, 18 Mar 2016 18:11:20 +0000 (12:11 -0600)

committer Tim Rowley <timothy.o.rowley@intel.com>

Fri, 25 Mar 2016 19:45:39 +0000 (14:45 -0500)
author Tim Rowley <timothy.o.rowley@intel.com>
Fri, 18 Mar 2016 18:11:20 +0000 (12:11 -0600)
committer Tim Rowley <timothy.o.rowley@intel.com>
Fri, 25 Mar 2016 19:45:39 +0000 (14:45 -0500)
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp

index c3c603d294c80b3cfdd6518547fdc4ce7a80ff28..453d0295b54ff107ed9c872b8fdaf3300cda25fc 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -189,7 +189,7 @@ void QueueWork(SWR_CONTEXT *pContext)
  
          if (IsDraw)
          {
-            std::unordered_set<uint32_t> lockedTiles;
+            static TileSet lockedTiles;
              uint64_t curDraw[2] = { pContext->pCurDrawContext->drawId, pContext->pCurDrawContext->drawId };
              WorkOnFifoFE(pContext, 0, curDraw[0], 0);
              WorkOnFifoBE(pContext, 0, curDraw[1], lockedTiles);
diff --git a/src/gallium/drivers/swr/rasterizer/core/arena.h b/src/gallium/drivers/swr/rasterizer/core/arena.h

index b6b4d829576457756cd326a814a797ebaf79bc36..4cdb728e1ef41ff4362efc59ff861a29fd54a9bc 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/arena.h
+++ b/src/gallium/drivers/swr/rasterizer/core/arena.h
@@ -33,6 +33,9 @@
  #pragma once
  
  #include <mutex>
+#include <algorithm>
+#include <atomic>
+#include "core/utils.h"
  
  class DefaultAllocator
  {
@@ -48,7 +51,7 @@ public:
      }
  };
  
-template<typename T = DefaultAllocator>
+template<typename MutexT = std::mutex, typename T = DefaultAllocator>
  class TArena
  {
  public:
@@ -79,7 +82,7 @@ public:
          }
  
          static const size_t ArenaBlockSize = 1024 * 1024;
-        size_t blockSize = std::max(m_size + ArenaBlockSize, std::max(size, ArenaBlockSize));
+        size_t blockSize = std::max<size_t>(m_size + ArenaBlockSize, std::max(size, ArenaBlockSize));
  
          // Add in one BLOCK_ALIGN unit to store ArenaBlock in.
          blockSize = AlignUp(blockSize + BLOCK_ALIGN, BLOCK_ALIGN);
@@ -111,8 +114,9 @@ public:
      {
          void* pAlloc = nullptr;
  
-        std::unique_lock<std::mutex> l(m_mutex);
+        m_mutex.lock();
          pAlloc = AllocAligned(size, align);
+        m_mutex.unlock();
  
          return pAlloc;
      }
@@ -121,8 +125,9 @@ public:
      {
          void* pAlloc = nullptr;
  
-        std::unique_lock<std::mutex> l(m_mutex);
+        m_mutex.lock();
          pAlloc = Alloc(size);
+        m_mutex.unlock();
  
          return pAlloc;
      }
@@ -175,7 +180,96 @@ private:
      size_t          m_size      = 0;
  
      /// @note Mutex is only used by sync allocation functions.
-    std::mutex      m_mutex;
+    MutexT          m_mutex;
  };
  
  typedef TArena<> Arena;
+
+struct NullMutex
+{
+    void lock() {}
+    void unlock() {}
+};
+
+// Ref counted Arena for ArenaAllocator
+// NOT THREAD SAFE!!
+struct RefArena : TArena<NullMutex>
+{
+    uint32_t AddRef() { return ++m_refCount; }
+    uint32_t Release() { if (--m_refCount) { return m_refCount; } delete this; return 0; }
+
+    void* allocate(std::size_t n)
+    {
+        ++m_numAllocations;
+        return Alloc(n);
+    }
+
+    void deallocate(void* p) { --m_numAllocations; }
+    void clear() { SWR_ASSERT(0 == m_numAllocations); Reset(); }
+
+private:
+    uint32_t m_refCount = 0;
+    uint32_t m_numAllocations = 0;
+};
+
+#if 0 // THIS DOESN'T WORK!!!
+// Arena based replacement for std::allocator
+template <typename T>
+struct ArenaAllocator
+{
+    typedef T value_type;
+    ArenaAllocator()
+    {
+        m_pArena = new RefArena();
+        m_pArena->AddRef();
+    }
+    ~ArenaAllocator()
+    {
+        m_pArena->Release(); m_pArena = nullptr;
+    }
+    ArenaAllocator(const ArenaAllocator& copy)
+    {
+        m_pArena = const_cast<RefArena*>(copy.m_pArena); m_pArena->AddRef();
+    }
+
+
+    template <class U> ArenaAllocator(const ArenaAllocator<U>& copy)
+    {
+        m_pArena = const_cast<RefArena*>(copy.m_pArena); m_pArena->AddRef();
+    }
+    T* allocate(std::size_t n)
+    {
+#if defined(_DEBUG)
+        char buf[32];
+        sprintf_s(buf, "Alloc: %lld\n", n);
+        OutputDebugStringA(buf);
+#endif
+        void* p = m_pArena->allocate(n * sizeof(T));
+        return static_cast<T*>(p);
+    }
+    void deallocate(T* p, std::size_t n)
+    {
+#if defined(_DEBUG)
+        char buf[32];
+        sprintf_s(buf, "Dealloc: %lld\n", n);
+        OutputDebugStringA(buf);
+#endif
+        m_pArena->deallocate(p);
+    }
+    void clear() { m_pArena->clear(); }
+
+    RefArena* m_pArena = nullptr;
+};
+
+template <class T, class U>
+bool operator== (const ArenaAllocator<T>&, const ArenaAllocator<U>&)
+{
+    return true;
+}
+
+template <class T, class U>
+bool operator!= (const ArenaAllocator<T>&, const ArenaAllocator<U>&)
+{
+    return false;
+}
+#endif
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp

index 57408049d03f18c1dfe5f864e34b3178f81026a1..ff25e82f0feb0ad5b461722dc03608a554e88f68 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -24,7 +24,6 @@
  #include <stdio.h>
  #include <thread>
  #include <algorithm>
-#include <unordered_set>
  #include <float.h>
  #include <vector>
  #include <utility>
@@ -345,7 +344,7 @@ void WorkOnFifoBE(
      SWR_CONTEXT *pContext,
      uint32_t workerId,
      uint64_t &curDrawBE,
-    std::unordered_set<uint32_t>& lockedTiles)
+    TileSet& lockedTiles)
  {
      // Find the first incomplete draw that has pending work. If no such draw is found then
      // return. FindFirstIncompleteDraw is responsible for incrementing the curDrawBE.
@@ -550,7 +549,7 @@ DWORD workerThreadMain(LPVOID pData)
  
      // Track tiles locked by other threads. If we try to lock a macrotile and find its already
      // locked then we'll add it to this list so that we don't try and lock it again.
-    std::unordered_set<uint32_t> lockedTiles;
+    TileSet lockedTiles;
  
      // each worker has the ability to work on any of the queued draws as long as certain
      // conditions are met. the data associated
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.h b/src/gallium/drivers/swr/rasterizer/core/threads.h

index ec0b735a4ec7baab7cf34e767946897bcb989875..6b37e3ac1792556f534f65e65b7ab3a2c7238fc4 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/threads.h
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.h
@@ -54,10 +54,12 @@ struct THREAD_POOL
      THREAD_DATA *pThreadData;
  };
  
+typedef std::unordered_set<uint32_t> TileSet;
+
  void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool);
  void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool);
  
  // Expose FE and BE worker functions to the API thread if single threaded
  void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE, int numaNode);
-void WorkOnFifoBE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE, std::unordered_set<uint32_t> &usedTiles);
+void WorkOnFifoBE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE, TileSet &usedTiles);
  void WorkOnCompute(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE);
diff --git a/src/gallium/drivers/swr/rasterizer/core/utils.cpp b/src/gallium/drivers/swr/rasterizer/core/utils.cpp

index f36452f2cecb3225c8d7283b6befc6001a871525..a1d665e77cc61d3d26b5c431d8e67165957f9fe8 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/utils.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/utils.cpp
@@ -27,6 +27,11 @@
  ******************************************************************************/
  #if defined(_WIN32)
  
+#if defined(NOMINMAX)
+// GDI Plus requires non-std min / max macros be defined :(
+#undef NOMINMAX
+#endif
+
  #include<Windows.h>
  #include <Gdiplus.h>
  #include <Gdiplusheaders.h>
author	Tim Rowley <timothy.o.rowley@intel.com>
	Fri, 18 Mar 2016 18:11:20 +0000 (12:11 -0600)
committer	Tim Rowley <timothy.o.rowley@intel.com>
	Fri, 25 Mar 2016 19:45:39 +0000 (14:45 -0500)
src/gallium/drivers/swr/rasterizer/core/api.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/arena.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/threads.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/threads.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/utils.cpp		patch \| blob \| history