From 700a5b06e036d7515c6d5f2f9e2d40e5a65eb964 Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Thu, 17 Mar 2016 18:10:25 -0600 Subject: [PATCH] swr: [rasterizer core] Arena optimizations - preparing for global allocator. --- src/gallium/drivers/swr/Makefile.sources-arch | 1 - .../drivers/swr/rasterizer/core/arena.cpp | 166 ------------------ .../drivers/swr/rasterizer/core/arena.h | 134 ++++++++++++-- .../drivers/swr/rasterizer/core/tilemgr.cpp | 8 - .../drivers/swr/rasterizer/core/tilemgr.h | 9 +- 5 files changed, 131 insertions(+), 187 deletions(-) delete mode 100644 src/gallium/drivers/swr/rasterizer/core/arena.cpp diff --git a/src/gallium/drivers/swr/Makefile.sources-arch b/src/gallium/drivers/swr/Makefile.sources-arch index 7544f8efccc..a04b1203c7c 100644 --- a/src/gallium/drivers/swr/Makefile.sources-arch +++ b/src/gallium/drivers/swr/Makefile.sources-arch @@ -59,7 +59,6 @@ COMMON_CXX_SOURCES := \ CORE_CXX_SOURCES := \ rasterizer/core/api.cpp \ rasterizer/core/api.h \ - rasterizer/core/arena.cpp \ rasterizer/core/arena.h \ rasterizer/core/backend.cpp \ rasterizer/core/backend.h \ diff --git a/src/gallium/drivers/swr/rasterizer/core/arena.cpp b/src/gallium/drivers/swr/rasterizer/core/arena.cpp deleted file mode 100644 index 8184c8d3f4c..00000000000 --- a/src/gallium/drivers/swr/rasterizer/core/arena.cpp +++ /dev/null @@ -1,166 +0,0 @@ -/**************************************************************************** -* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice (including the next -* paragraph) shall be included in all copies or substantial portions of the -* Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -* IN THE SOFTWARE. -* -* @file arena.cpp -* -* @brief Arena memory manager -* The arena is convenient and fast for managing allocations for any of -* our allocations that are associated with operations and can all be freed -* once when their operation has completed. Allocations are cheap since -* most of the time its simply an increment of an offset. Also, no need to -* free individual allocations. All of the arena memory can be freed at once. -* -******************************************************************************/ - -#include "context.h" -#include "arena.h" - -#include - -Arena::Arena() - : m_pCurBlock(nullptr), m_size(0) -{ - m_pMutex = new std::mutex(); -} - -Arena::~Arena() -{ - Reset(); // Reset just in case to avoid leaking memory. - - if (m_pCurBlock) - { - _aligned_free(m_pCurBlock->pMem); - delete m_pCurBlock; - } - - delete m_pMutex; -} - -///@todo Remove this when all users have stopped using this. -void Arena::Init() -{ - m_size = 0; - m_pCurBlock = nullptr; - - m_pMutex = new std::mutex(); -} - -void* Arena::AllocAligned(size_t size, size_t align) -{ - if (m_pCurBlock) - { - ArenaBlock* pCurBlock = m_pCurBlock; - pCurBlock->offset = AlignUp(pCurBlock->offset, align); - - if ((pCurBlock->offset + size) <= pCurBlock->blockSize) - { - void* pMem = PtrAdd(pCurBlock->pMem, pCurBlock->offset); - pCurBlock->offset += size; - m_size += size; - return pMem; - } - - // Not enough memory in this block, fall through to allocate - // a new block - } - - static const size_t ArenaBlockSize = 1024*1024; - size_t blockSize = std::max(m_size + ArenaBlockSize, std::max(size, ArenaBlockSize)); - blockSize = AlignUp(blockSize, KNOB_SIMD_WIDTH*4); - - void *pMem = _aligned_malloc(blockSize, KNOB_SIMD_WIDTH*4); // Arena blocks are always simd byte aligned. - SWR_ASSERT(pMem != nullptr); - - ArenaBlock* pNewBlock = new (std::nothrow) ArenaBlock(); - SWR_ASSERT(pNewBlock != nullptr); - - if (pNewBlock != nullptr) - { - pNewBlock->pNext = m_pCurBlock; - - m_pCurBlock = pNewBlock; - m_pCurBlock->pMem = pMem; - m_pCurBlock->blockSize = blockSize; - - } - - return AllocAligned(size, align); -} - -void* Arena::Alloc(size_t size) -{ - return AllocAligned(size, 1); -} - -void* Arena::AllocAlignedSync(size_t size, size_t align) -{ - void* pAlloc = nullptr; - - SWR_ASSERT(m_pMutex != nullptr); - - m_pMutex->lock(); - pAlloc = AllocAligned(size, align); - m_pMutex->unlock(); - - return pAlloc; -} - -void* Arena::AllocSync(size_t size) -{ - void* pAlloc = nullptr; - - SWR_ASSERT(m_pMutex != nullptr); - - m_pMutex->lock(); - pAlloc = Alloc(size); - m_pMutex->unlock(); - - return pAlloc; -} - -void Arena::Reset(bool removeAll) -{ - if (m_pCurBlock) - { - m_pCurBlock->offset = 0; - - ArenaBlock *pUsedBlocks = m_pCurBlock->pNext; - m_pCurBlock->pNext = nullptr; - while(pUsedBlocks) - { - ArenaBlock* pBlock = pUsedBlocks; - pUsedBlocks = pBlock->pNext; - - _aligned_free(pBlock->pMem); - delete pBlock; - } - - if (removeAll) - { - _aligned_free(m_pCurBlock->pMem); - delete m_pCurBlock; - m_pCurBlock = nullptr; - } - } - - m_size = 0; -} diff --git a/src/gallium/drivers/swr/rasterizer/core/arena.h b/src/gallium/drivers/swr/rasterizer/core/arena.h index 76eee11fb08..b6b4d829576 100644 --- a/src/gallium/drivers/swr/rasterizer/core/arena.h +++ b/src/gallium/drivers/swr/rasterizer/core/arena.h @@ -34,25 +34,134 @@ #include -class Arena +class DefaultAllocator { public: - Arena(); - ~Arena(); + void* AllocateAligned(size_t size, size_t align) + { + void* p = _aligned_malloc(size, align); + return p; + } + void Free(void* pMem) + { + _aligned_free(pMem); + } +}; + +template +class TArena +{ +public: + TArena(T& in_allocator) : m_allocator(in_allocator) {} + TArena() : m_allocator(m_defAllocator) {} + ~TArena() + { + Reset(true); + } + + void* AllocAligned(size_t size, size_t align) + { + if (m_pCurBlock) + { + ArenaBlock* pCurBlock = m_pCurBlock; + pCurBlock->offset = AlignUp(pCurBlock->offset, align); - void Init(); + if ((pCurBlock->offset + size) <= pCurBlock->blockSize) + { + void* pMem = PtrAdd(pCurBlock->pMem, pCurBlock->offset); + pCurBlock->offset += size; + m_size += size; + return pMem; + } - void* AllocAligned(size_t size, size_t align); - void* Alloc(size_t size); + // Not enough memory in this block, fall through to allocate + // a new block + } - void* AllocAlignedSync(size_t size, size_t align); - void* AllocSync(size_t size); + static const size_t ArenaBlockSize = 1024 * 1024; + size_t blockSize = std::max(m_size + ArenaBlockSize, std::max(size, ArenaBlockSize)); - void Reset(bool removeAll = false); - size_t Size() { return m_size; } + // Add in one BLOCK_ALIGN unit to store ArenaBlock in. + blockSize = AlignUp(blockSize + BLOCK_ALIGN, BLOCK_ALIGN); + + void *pMem = m_allocator.AllocateAligned(blockSize, BLOCK_ALIGN); // Arena blocks are always simd byte aligned. + SWR_ASSERT(pMem != nullptr); + + ArenaBlock* pNewBlock = new (pMem) ArenaBlock(); + + if (pNewBlock != nullptr) + { + pNewBlock->pNext = m_pCurBlock; + + m_pCurBlock = pNewBlock; + m_pCurBlock->pMem = PtrAdd(pMem, BLOCK_ALIGN); + m_pCurBlock->blockSize = blockSize - BLOCK_ALIGN; + + } + + return AllocAligned(size, align); + } + + void* Alloc(size_t size) + { + return AllocAligned(size, 1); + } + + void* AllocAlignedSync(size_t size, size_t align) + { + void* pAlloc = nullptr; + + std::unique_lock l(m_mutex); + pAlloc = AllocAligned(size, align); + + return pAlloc; + } + + void* AllocSync(size_t size) + { + void* pAlloc = nullptr; + + std::unique_lock l(m_mutex); + pAlloc = Alloc(size); + + return pAlloc; + } + + void Reset(bool removeAll = false) + { + if (m_pCurBlock) + { + m_pCurBlock->offset = 0; + + ArenaBlock *pUsedBlocks = m_pCurBlock->pNext; + m_pCurBlock->pNext = nullptr; + while (pUsedBlocks) + { + ArenaBlock* pBlock = pUsedBlocks; + pUsedBlocks = pBlock->pNext; + + m_allocator.Free(pBlock); + } + + if (removeAll) + { + m_allocator.Free(m_pCurBlock); + m_pCurBlock = nullptr; + } + } + + m_size = 0; + } + + size_t Size() const { return m_size; } private: + static const size_t BLOCK_ALIGN = KNOB_SIMD_WIDTH * 4; + + DefaultAllocator m_defAllocator; + T& m_allocator; + struct ArenaBlock { void* pMem = nullptr; @@ -60,10 +169,13 @@ private: size_t offset = 0; ArenaBlock* pNext = nullptr; }; + static_assert(sizeof(ArenaBlock) <= BLOCK_ALIGN, "Increase BLOCK_ALIGN size"); ArenaBlock* m_pCurBlock = nullptr; size_t m_size = 0; /// @note Mutex is only used by sync allocation functions. - std::mutex* m_pMutex; + std::mutex m_mutex; }; + +typedef TArena<> Arena; diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp index ac2117bf4a4..f3c24dacb48 100644 --- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp @@ -60,14 +60,6 @@ MacroTileMgr::MacroTileMgr(Arena& arena) : mArena(arena) { } -void MacroTileMgr::initialize() -{ - mWorkItemsProduced = 0; - mWorkItemsConsumed = 0; - - mDirtyTiles.clear(); -} - void MacroTileMgr::enqueue(uint32_t x, uint32_t y, BE_WORK *pWork) { // Should not enqueue more then what we have backing for in the hot tile manager. diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h index 30f80ce4247..f3e1373b00f 100644 --- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h +++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h @@ -113,7 +113,14 @@ public: } } - void initialize(); + INLINE void initialize() + { + mWorkItemsProduced = 0; + mWorkItemsConsumed = 0; + + mDirtyTiles.clear(); + } + INLINE std::vector& getDirtyTiles() { return mDirtyTiles; } INLINE MacroTileQueue& getMacroTileQueue(uint32_t id) { return mTiles[id]; } void markTileComplete(uint32_t id); -- 2.30.2