swr: [rasterizer core] Affinitize thread scratch space to numa node of worker

author Tim Rowley <timothy.o.rowley@intel.com>

Thu, 24 Mar 2016 22:20:02 +0000 (16:20 -0600)

committer Tim Rowley <timothy.o.rowley@intel.com>

Tue, 12 Apr 2016 16:52:04 +0000 (11:52 -0500)
author Tim Rowley <timothy.o.rowley@intel.com>
Thu, 24 Mar 2016 22:20:02 +0000 (16:20 -0600)
committer Tim Rowley <timothy.o.rowley@intel.com>
Tue, 12 Apr 2016 16:52:04 +0000 (11:52 -0500)
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp

index f0f7956b5903941caf34237653301bbab73e8f82..442cdd420f4caa617dfbaa1e428362862c7cce5d 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -93,8 +93,16 @@ HANDLE SwrCreateContext(
      ///@note We could lazily allocate this but its rather small amount of memory.
      for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
      {
-        ///@todo Use numa API for allocations using numa information from thread data (if exists).
-        pContext->pScratch[i] = (uint8_t*)_aligned_malloc((32 * 1024), KNOB_SIMD_WIDTH * 4);
+#if defined(_WIN32)
+        uint32_t numaNode = pContext->threadPool.pThreadData ?
+            pContext->threadPool.pThreadData[i].numaId : 0;
+        pContext->pScratch[i] = (uint8_t*)VirtualAllocExNuma(
+            GetCurrentProcess(), nullptr, 32 * sizeof(KILOBYTE),
+            MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE,
+            numaNode);
+#else
+        pContext->pScratch[i] = (uint8_t*)_aligned_malloc(32 * sizeof(KILOBYTE), KNOB_SIMD_WIDTH * 4);
+#endif
      }
  
      // State setup AFTER context is fully initialized
@@ -138,7 +146,11 @@ void SwrDestroyContext(HANDLE hContext)
      // Free scratch space.
      for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
      {
+#if defined(_WIN32)
+        VirtualFree(pContext->pScratch[i], 0, MEM_RELEASE);
+#else
          _aligned_free(pContext->pScratch[i]);
+#endif
      }
  
      delete(pContext->pHotTileMgr);
diff --git a/src/gallium/drivers/swr/rasterizer/core/arena.h b/src/gallium/drivers/swr/rasterizer/core/arena.h

index 67d81a44347bba7b29a935c809b0f9ba2a418b88..0241f5b900dd193007e721a5f4518aafb4a0e4c0 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/arena.h
+++ b/src/gallium/drivers/swr/rasterizer/core/arena.h
@@ -209,7 +209,7 @@ struct CachingAllocatorT : DefaultAllocator
  };
  typedef CachingAllocatorT<> CachingAllocator;
  
-template<typename T = DefaultAllocator, size_t BlockSizeT = (128 * 1024)>
+template<typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)>
  class TArena
  {
  public:
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp

index 7fb83edf1699ed3ab0c262c4796c22907811e728..ad0a5a070325311b6197e2c0ceb07798561704ae 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -83,7 +83,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
      if (pDC->pSpillFill[workerId] == nullptr)
      {
          ///@todo Add state which indicates the spill fill size.
-        pDC->pSpillFill[workerId] = (uint8_t*)pDC->pArena->AllocAlignedSync(4096 * 1024, sizeof(float) * 8);
+        pDC->pSpillFill[workerId] = (uint8_t*)pDC->pArena->AllocAlignedSync(4 * sizeof(MEGABYTE), sizeof(float) * 8);
      }
  
      const API_STATE& state = GetApiState(pDC);
author	Tim Rowley <timothy.o.rowley@intel.com>
	Thu, 24 Mar 2016 22:20:02 +0000 (16:20 -0600)
committer	Tim Rowley <timothy.o.rowley@intel.com>
	Tue, 12 Apr 2016 16:52:04 +0000 (11:52 -0500)
src/gallium/drivers/swr/rasterizer/core/api.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/arena.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/backend.cpp		patch \| blob \| history