for (uint32_t dc = 0; dc < KNOB_MAX_DRAWS_IN_FLIGHT; ++dc)
{
- pContext->dcRing[dc].pArena = new Arena();
+ pContext->dcRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator);
pContext->dcRing[dc].pTileMgr = new MacroTileMgr(*(pContext->dcRing[dc].pArena));
pContext->dcRing[dc].pDispatch = new DispatchQueue(); /// @todo Could lazily allocate this if Dispatch seen.
- pContext->dsRing[dc].pArena = new Arena();
+ pContext->dsRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator);
}
if (!KNOB_SINGLE_THREADED)
uint32_t dsIndex = pContext->curStateId % KNOB_MAX_DRAWS_IN_FLIGHT;
pCurDrawContext->pState = &pContext->dsRing[dsIndex];
- Arena& stateArena = *(pCurDrawContext->pState->pArena);
+ auto& stateArena = *(pCurDrawContext->pState->pArena);
// Copy previous state to current state.
if (pContext->pPrevDrawContext)
}
};
+static const size_t ARENA_BLOCK_ALIGN = KNOB_SIMD_WIDTH * 4;
+
+struct ArenaBlock
+{
+ void* pMem = nullptr;
+ size_t blockSize = 0;
+ ArenaBlock* pNext = nullptr;
+};
+static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN, "Increase BLOCK_ALIGN size");
+
template<typename MutexT = std::mutex, typename T = DefaultAllocator>
class TArena
{
if (m_pCurBlock)
{
ArenaBlock* pCurBlock = m_pCurBlock;
- pCurBlock->offset = AlignUp(pCurBlock->offset, align);
+ m_offset = AlignUp(m_offset, align);
- if ((pCurBlock->offset + size) <= pCurBlock->blockSize)
+ if ((m_offset + size) <= pCurBlock->blockSize)
{
- void* pMem = PtrAdd(pCurBlock->pMem, pCurBlock->offset);
- pCurBlock->offset += size;
+ void* pMem = PtrAdd(pCurBlock->pMem, m_offset);
+ m_offset += size;
m_size += size;
return pMem;
}
size_t blockSize = std::max<size_t>(m_size + ArenaBlockSize, std::max(size, ArenaBlockSize));
// Add in one BLOCK_ALIGN unit to store ArenaBlock in.
- blockSize = AlignUp(blockSize + BLOCK_ALIGN, BLOCK_ALIGN);
+ blockSize = AlignUp(blockSize + ARENA_BLOCK_ALIGN, ARENA_BLOCK_ALIGN);
- void *pMem = m_allocator.AllocateAligned(blockSize, BLOCK_ALIGN); // Arena blocks are always simd byte aligned.
+ void *pMem = m_allocator.AllocateAligned(blockSize, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned.
SWR_ASSERT(pMem != nullptr);
ArenaBlock* pNewBlock = new (pMem) ArenaBlock();
if (pNewBlock != nullptr)
{
+ m_offset = 0;
pNewBlock->pNext = m_pCurBlock;
m_pCurBlock = pNewBlock;
- m_pCurBlock->pMem = PtrAdd(pMem, BLOCK_ALIGN);
- m_pCurBlock->blockSize = blockSize - BLOCK_ALIGN;
-
+ m_pCurBlock->pMem = PtrAdd(pMem, ARENA_BLOCK_ALIGN);
+ m_pCurBlock->blockSize = blockSize - ARENA_BLOCK_ALIGN;
}
return AllocAligned(size, align);
void Reset(bool removeAll = false)
{
+ m_offset = 0;
+
if (m_pCurBlock)
{
- m_pCurBlock->offset = 0;
-
ArenaBlock *pUsedBlocks = m_pCurBlock->pNext;
m_pCurBlock->pNext = nullptr;
while (pUsedBlocks)
private:
- static const size_t BLOCK_ALIGN = KNOB_SIMD_WIDTH * 4;
+ ArenaBlock* m_pCurBlock = nullptr;
+ size_t m_offset = 0;
+ size_t m_size = 0;
+
+ /// @note Mutex is only used by sync allocation functions.
+ MutexT m_mutex;
DefaultAllocator m_defAllocator;
T& m_allocator;
-
- struct ArenaBlock
- {
- void* pMem = nullptr;
- size_t blockSize = 0;
- size_t offset = 0;
- ArenaBlock* pNext = nullptr;
- };
- static_assert(sizeof(ArenaBlock) <= BLOCK_ALIGN, "Increase BLOCK_ALIGN size");
-
- ArenaBlock* m_pCurBlock = nullptr;
- size_t m_size = 0;
-
- /// @note Mutex is only used by sync allocation functions.
- MutexT m_mutex;
};
-typedef TArena<> Arena;
+template<typename T>
+using Arena = TArena<std::mutex, T>;
+using StdArena = Arena<DefaultAllocator>;
struct NullMutex
{
PFN_OUTPUT_MERGER pfnOutputMerger;
};
+// Caching Allocator for Arena
+struct CachingAllocator : DefaultAllocator
+{
+ void* AllocateAligned(size_t size, size_t align)
+ {
+ SWR_ASSERT(size >= sizeof(ArenaBlock));
+
+ {
+ // search cached blocks
+ std::lock_guard<std::mutex> l(m_mutex);
+ ArenaBlock* pPrevBlock = &m_cachedBlocks;
+ ArenaBlock* pBlock = m_cachedBlocks.pNext;
+ ArenaBlock* pPotentialBlock = nullptr;
+ ArenaBlock* pPotentialPrev = nullptr;
+
+ while (pBlock)
+ {
+ if (pBlock->blockSize >= (size - ARENA_BLOCK_ALIGN))
+ {
+ if (pBlock == AlignUp(pBlock, align))
+ {
+ if (pBlock->blockSize == size)
+ {
+ // Won't find a better match
+ break;
+ }
+
+ // We could use this as it is larger than we wanted, but
+ // continue to search for a better match
+ pPotentialBlock = pBlock;
+ pPotentialPrev = pPrevBlock;
+ }
+ }
+ else
+ {
+ // Blocks are sorted by size (biggest first)
+ // So, if we get here, there are no blocks
+ // large enough, fall through to allocation.
+ pBlock = nullptr;
+ break;
+ }
+
+ pPrevBlock = pBlock;
+ pBlock = pBlock->pNext;
+ }
+
+ if (!pBlock)
+ {
+ // Couldn't find an exact match, use next biggest size
+ pBlock = pPotentialBlock;
+ pPrevBlock = pPotentialPrev;
+ }
+
+ if (pBlock)
+ {
+ SWR_ASSERT(pPrevBlock && pPrevBlock->pNext == pBlock);
+ pPrevBlock->pNext = pBlock->pNext;
+ pBlock->pNext = nullptr;
+
+ return pBlock;
+ }
+ }
+
+ return this->DefaultAllocator::AllocateAligned(size, align);
+ }
+
+ void Free(void* pMem)
+ {
+ if (pMem)
+ {
+ ArenaBlock* pNewBlock = reinterpret_cast<ArenaBlock*>(pMem);
+ SWR_ASSERT(pNewBlock->blockSize >= 0 && pNewBlock->pMem != nullptr);
+
+ std::unique_lock<std::mutex> l(m_mutex);
+ ArenaBlock* pPrevBlock = &m_cachedBlocks;
+ ArenaBlock* pBlock = m_cachedBlocks.pNext;
+
+ while (pBlock)
+ {
+ if (pNewBlock->blockSize >= pBlock->blockSize)
+ {
+ // Insert here
+ break;
+ }
+ pPrevBlock = pBlock;
+ pBlock = pBlock->pNext;
+ }
+
+ // Insert into list
+ SWR_ASSERT(pPrevBlock);
+ pPrevBlock->pNext = pNewBlock;
+ pNewBlock->pNext = pBlock;
+ }
+ }
+
+ ~CachingAllocator()
+ {
+ // Free all cached blocks
+ ArenaBlock* pBlock = m_cachedBlocks.pNext;
+ while (pBlock)
+ {
+ ArenaBlock* pNext = pBlock->pNext;
+ this->DefaultAllocator::Free(pBlock);
+ pBlock = pNext;
+ }
+ }
+
+ ArenaBlock m_cachedBlocks;
+ std::mutex m_mutex;
+
+};
+
+using CachingArena = Arena<CachingAllocator>;
+
// Draw State
struct DRAW_STATE
{
BACKEND_FUNCS backendFuncs;
PFN_PROCESS_PRIMS pfnProcessPrims;
- Arena* pArena; // This should only be used by API thread.
+ CachingArena* pArena; // This should only be used by API thread.
};
// Draw Context
DispatchQueue* pDispatch; // Queue for thread groups. (isCompute)
DRAW_STATE* pState;
- Arena* pArena;
+ CachingArena* pArena;
uint8_t* pSpillFill[KNOB_MAX_NUM_THREADS]; // Scratch space used for spill fills.
};
// Scratch space for workers.
uint8_t* pScratch[KNOB_MAX_NUM_THREADS];
+
+ CachingAllocator cachingArenaAllocator;
};
void WaitForDependencies(SWR_CONTEXT *pContext, uint64_t drawId);
static const uint32_t mBlockSizeShift = 6;
static const uint32_t mBlockSize = 1 << mBlockSizeShift;
- void clear(Arena& arena)
+ template <typename ArenaT>
+ void clear(ArenaT& arena)
{
mHead = 0;
mTail = 0;
mNumEntries --;
}
- bool enqueue_try_nosync(Arena& arena, const T* entry)
+ template <typename ArenaT>
+ bool enqueue_try_nosync(ArenaT& arena, const T* entry)
{
memcpy(&mCurBlock[mTail], entry, sizeof(T));
static INLINE void AllocateGsBuffers(DRAW_CONTEXT* pDC, const API_STATE& state, void** ppGsOut, void** ppCutBuffer,
void **ppStreamCutBuffer)
{
- Arena* pArena = pDC->pArena;
+ auto pArena = pDC->pArena;
SWR_ASSERT(pArena != nullptr);
SWR_ASSERT(state.gsState.gsEnable);
// allocate arena space to hold GS output verts
work.pfnWork = gRasterizerTable[rastState.scissorEnable][SWR_MULTISAMPLE_1X];
}
- Arena* pArena = pDC->pArena;
+ auto pArena = pDC->pArena;
SWR_ASSERT(pArena != nullptr);
// store active attribs
work.pfnWork = RasterizeSimplePoint;
- Arena* pArena = pDC->pArena;
+ auto pArena = pDC->pArena;
SWR_ASSERT(pArena != nullptr);
// store attributes
work.pfnWork = RasterizeTriPoint;
- Arena* pArena = pDC->pArena;
+ auto pArena = pDC->pArena;
SWR_ASSERT(pArena != nullptr);
// store active attribs
work.pfnWork = RasterizeLine;
- Arena* pArena = pDC->pArena;
+ auto pArena = pDC->pArena;
SWR_ASSERT(pArena != nullptr);
// store active attribs
_ReadWriteBarrier();
// Cleanup memory allocations
- pDC->pArena->Reset();
+ pDC->pArena->Reset(true);
pDC->pTileMgr->initialize();
pContext->dcRing.Dequeue(); // Remove from tail
_aligned_free(p);
}
-MacroTileMgr::MacroTileMgr(Arena& arena) : mArena(arena)
+MacroTileMgr::MacroTileMgr(CachingArena& arena) : mArena(arena)
{
}
//////////////////////////////////////////////////////////////////////////
/// @brief Clear fifo and unlock it.
- void clear(Arena& arena)
+ template <typename ArenaT>
+ void clear(ArenaT& arena)
{
mFifo.clear(arena);
}
return mFifo.peek();
}
- bool enqueue_try_nosync(Arena& arena, const BE_WORK* entry)
+ template <typename ArenaT>
+ bool enqueue_try_nosync(ArenaT& arena, const BE_WORK* entry)
{
return mFifo.enqueue_try_nosync(arena, entry);
}
class MacroTileMgr
{
public:
- MacroTileMgr(Arena& arena);
+ MacroTileMgr(CachingArena& arena);
~MacroTileMgr()
{
for (auto &tile : mTiles)
void operator delete (void *p);
private:
- Arena& mArena;
+ CachingArena& mArena;
std::unordered_map<uint32_t, MacroTileQueue> mTiles;
// Any tile that has work queued to it is a dirty tile.