From 794be41f91fbaf0645e422525b464627e2fbeef2 Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Mon, 4 Apr 2016 14:33:26 -0600 Subject: [PATCH] swr: [rasterizer core] Fix global arena allocator bug - Plus some minor code refactoring Reviewed-by: Bruce Cherniak --- .../drivers/swr/rasterizer/core/arena.h | 88 ++++++++++--------- .../drivers/swr/rasterizer/core/backend.cpp | 5 +- 2 files changed, 51 insertions(+), 42 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/arena.h b/src/gallium/drivers/swr/rasterizer/core/arena.h index 64184e16865..d0c0123b19b 100644 --- a/src/gallium/drivers/swr/rasterizer/core/arena.h +++ b/src/gallium/drivers/swr/rasterizer/core/arena.h @@ -37,47 +37,54 @@ #include #include "core/utils.h" +static const size_t ARENA_BLOCK_ALIGN = 64; + +struct ArenaBlock +{ + size_t blockSize = 0; + ArenaBlock* pNext = nullptr; +}; +static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN, + "Increase BLOCK_ALIGN size"); + class DefaultAllocator { public: - void* AllocateAligned(size_t size, size_t align) + ArenaBlock* AllocateAligned(size_t size, size_t align) { - void* p = _aligned_malloc(size, align); + SWR_ASSERT(size >= sizeof(ArenaBlock)); + + ArenaBlock* p = new (_aligned_malloc(size, align)) ArenaBlock(); + p->blockSize = size; return p; } - void Free(void* pMem) + + void Free(ArenaBlock* pMem) { - _aligned_free(pMem); + if (pMem) + { + SWR_ASSERT(pMem->blockSize < size_t(0xdddddddd)); + _aligned_free(pMem); + } } }; -static const size_t ARENA_BLOCK_ALIGN = 64; - -struct ArenaBlock -{ - size_t blockSize = 0; - ArenaBlock* pNext = nullptr; -}; -static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN, - "Increase BLOCK_ALIGN size"); - // Caching Allocator for Arena -template +template struct CachingAllocatorT : DefaultAllocator { - void* AllocateAligned(size_t size, size_t align) + ArenaBlock* AllocateAligned(size_t size, size_t align) { SWR_ASSERT(size >= sizeof(ArenaBlock)); SWR_ASSERT(size <= uint32_t(-1)); - size_t blockSize = size - ARENA_BLOCK_ALIGN; - uint32_t bucket = GetBucketId(blockSize); + uint32_t bucket = GetBucketId(size); { // search cached blocks std::lock_guard l(m_mutex); ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket]; - ArenaBlock* pBlock = SearchBlocks(pPrevBlock, blockSize, align); + ArenaBlock* pBlock = SearchBlocks(pPrevBlock, size, align); if (pBlock) { @@ -89,15 +96,15 @@ struct CachingAllocatorT : DefaultAllocator } else { - pPrevBlock = &m_oldCachedBlocks[GetBucketId(blockSize)]; - pBlock = SearchBlocks(pPrevBlock, blockSize, align); + pPrevBlock = &m_oldCachedBlocks[bucket]; + pBlock = SearchBlocks(pPrevBlock, size, align); if (pBlock) { m_oldCachedSize -= pBlock->blockSize; if (pBlock == m_pOldLastCachedBlocks[bucket]) { - m_pLastCachedBlocks[bucket] = pPrevBlock; + m_pOldLastCachedBlocks[bucket] = pPrevBlock; } } } @@ -126,15 +133,14 @@ struct CachingAllocatorT : DefaultAllocator return this->DefaultAllocator::AllocateAligned(size, align); } - void Free(void* pMem) + void Free(ArenaBlock* pMem) { if (pMem) { - ArenaBlock* pNewBlock = reinterpret_cast(pMem); - SWR_ASSERT(pNewBlock->blockSize >= 0); + SWR_ASSERT(pMem->blockSize >= 0); std::unique_lock l(m_mutex); - InsertCachedBlock(GetBucketId(pNewBlock->blockSize), pNewBlock); + InsertCachedBlock(GetBucketId(pMem->blockSize), pMem); } } @@ -154,7 +160,7 @@ struct CachingAllocatorT : DefaultAllocator { ArenaBlock* pNext = pBlock->pNext; m_oldCachedSize -= pBlock->blockSize; - m_totalAllocated -= (pBlock->blockSize + ARENA_BLOCK_ALIGN); + m_totalAllocated -= pBlock->blockSize; this->DefaultAllocator::Free(pBlock); pBlock = pNext; } @@ -175,6 +181,11 @@ struct CachingAllocatorT : DefaultAllocator } } + if (doFree) + { + SWR_REL_ASSERT(m_oldCachedSize == 0, "Unknown whereabouts of 0x%llx bytes", (uint64_t)m_oldCachedSize); + } + m_oldCachedSize += m_cachedSize; m_cachedSize = 0; } @@ -304,7 +315,7 @@ private: // buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ... static const uint32_t CACHE_NUM_BUCKETS = NumBucketsT; static const uint32_t CACHE_START_BUCKET_BIT = StartBucketBitT; - static const size_t MAX_UNUSED_SIZE = 20 * sizeof(MEGABYTE); + static const size_t MAX_UNUSED_SIZE = sizeof(MEGABYTE); ArenaBlock m_cachedBlocks[CACHE_NUM_BUCKETS]; ArenaBlock* m_pLastCachedBlocks[CACHE_NUM_BUCKETS]; @@ -346,7 +357,7 @@ public: if ((offset + size) <= pCurBlock->blockSize) { - void* pMem = PtrAdd(pCurBlock, offset + ARENA_BLOCK_ALIGN); + void* pMem = PtrAdd(pCurBlock, offset); m_offset = offset + size; return pMem; } @@ -355,24 +366,21 @@ public: // a new block } - static const size_t ArenaBlockSize = BlockSizeT - ARENA_BLOCK_ALIGN; - size_t blockSize = std::max(size, ArenaBlockSize); + static const size_t ArenaBlockSize = BlockSizeT; + size_t blockSize = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize); // Add in one BLOCK_ALIGN unit to store ArenaBlock in. blockSize = AlignUp(blockSize, ARENA_BLOCK_ALIGN); - void *pMem = m_allocator.AllocateAligned(blockSize + ARENA_BLOCK_ALIGN, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned. - SWR_ASSERT(pMem != nullptr); - - ArenaBlock* pNewBlock = new (pMem) ArenaBlock(); + ArenaBlock* pNewBlock = m_allocator.AllocateAligned(blockSize, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned. + SWR_ASSERT(pNewBlock != nullptr); if (pNewBlock != nullptr) { - m_offset = 0; + m_offset = ARENA_BLOCK_ALIGN; pNewBlock->pNext = m_pCurBlock; m_pCurBlock = pNewBlock; - m_pCurBlock->blockSize = blockSize; } return AllocAligned(size, align); @@ -407,7 +415,7 @@ public: void Reset(bool removeAll = false) { - m_offset = 0; + m_offset = ARENA_BLOCK_ALIGN; if (m_pCurBlock) { @@ -431,13 +439,13 @@ public: bool IsEmpty() { - return (m_pCurBlock == nullptr) || (m_offset == 0 && m_pCurBlock->pNext == nullptr); + return (m_pCurBlock == nullptr) || (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr); } private: ArenaBlock* m_pCurBlock = nullptr; - size_t m_offset = 0; + size_t m_offset = ARENA_BLOCK_ALIGN; /// @note Mutex is only used by sync allocation functions. std::mutex m_mutex; diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp index d8ffdaa3ab4..a2212ba8aa4 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp @@ -80,9 +80,10 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup SWR_ASSERT(pTaskData != nullptr); // Ensure spill fill memory has been allocated. - if (pSpillFillBuffer == nullptr) + size_t spillFillSize = pDC->pState->state.totalSpillFillSize; + if (spillFillSize && pSpillFillBuffer == nullptr) { - pSpillFillBuffer = pDC->pArena->AllocAlignedSync(pDC->pState->state.totalSpillFillSize, sizeof(float) * 8); + pSpillFillBuffer = pDC->pArena->AllocAlignedSync(spillFillSize, KNOB_SIMD_BYTES); } const API_STATE& state = GetApiState(pDC); -- 2.30.2