1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Arena memory manager
26 * The arena is convenient and fast for managing allocations for any of
27 * our allocations that are associated with operations and can all be freed
28 * once when their operation has completed. Allocations are cheap since
29 * most of the time its simply an increment of an offset. Also, no need to
30 * free individual allocations. All of the arena memory can be freed at once.
32 ******************************************************************************/
38 #include "core/utils.h"
40 static const size_t ARENA_BLOCK_ALIGN
= 64;
45 ArenaBlock
* pNext
= nullptr;
47 static_assert(sizeof(ArenaBlock
) <= ARENA_BLOCK_ALIGN
,
48 "Increase BLOCK_ALIGN size");
50 class DefaultAllocator
53 ArenaBlock
* AllocateAligned(size_t size
, size_t align
)
55 SWR_ASSUME_ASSERT(size
>= sizeof(ArenaBlock
));
57 ArenaBlock
* p
= new (AlignedMalloc(size
, align
)) ArenaBlock();
62 void Free(ArenaBlock
* pMem
)
66 SWR_ASSUME_ASSERT(pMem
->blockSize
< size_t(0xdddddddd));
72 // Caching Allocator for Arena
73 template<uint32_t NumBucketsT
= 8, uint32_t StartBucketBitT
= 12>
74 struct CachingAllocatorT
: DefaultAllocator
76 ArenaBlock
* AllocateAligned(size_t size
, size_t align
)
78 SWR_ASSUME_ASSERT(size
>= sizeof(ArenaBlock
));
79 SWR_ASSUME_ASSERT(size
<= uint32_t(-1));
81 uint32_t bucket
= GetBucketId(size
);
84 // search cached blocks
85 std::lock_guard
<std::mutex
> l(m_mutex
);
86 ArenaBlock
* pPrevBlock
= &m_cachedBlocks
[bucket
];
87 ArenaBlock
* pBlock
= SearchBlocks(pPrevBlock
, size
, align
);
91 m_cachedSize
-= pBlock
->blockSize
;
92 if (pBlock
== m_pLastCachedBlocks
[bucket
])
94 m_pLastCachedBlocks
[bucket
] = pPrevBlock
;
99 pPrevBlock
= &m_oldCachedBlocks
[bucket
];
100 pBlock
= SearchBlocks(pPrevBlock
, size
, align
);
104 m_oldCachedSize
-= pBlock
->blockSize
;
105 if (pBlock
== m_pOldLastCachedBlocks
[bucket
])
107 m_pOldLastCachedBlocks
[bucket
] = pPrevBlock
;
114 SWR_ASSUME_ASSERT(pPrevBlock
&& pPrevBlock
->pNext
== pBlock
);
115 pPrevBlock
->pNext
= pBlock
->pNext
;
116 pBlock
->pNext
= nullptr;
121 m_totalAllocated
+= size
;
125 static uint32_t count
= 0;
127 sprintf_s(buf
, "Arena Alloc %d 0x%llx bytes - 0x%llx total\n", ++count
, uint64_t(size
), uint64_t(m_totalAllocated
));
128 OutputDebugStringA(buf
);
133 if (bucket
&& bucket
< (CACHE_NUM_BUCKETS
- 1))
135 // Make all blocks in this bucket the same size
136 size
= size_t(1) << (bucket
+ 1 + CACHE_START_BUCKET_BIT
);
139 return this->DefaultAllocator::AllocateAligned(size
, align
);
142 void Free(ArenaBlock
* pMem
)
146 std::unique_lock
<std::mutex
> l(m_mutex
);
147 InsertCachedBlock(GetBucketId(pMem
->blockSize
), pMem
);
153 if (!m_cachedSize
) { return; }
154 std::lock_guard
<std::mutex
> l(m_mutex
);
156 bool doFree
= (m_oldCachedSize
> MAX_UNUSED_SIZE
);
158 for (uint32_t i
= 0; i
< CACHE_NUM_BUCKETS
; ++i
)
162 ArenaBlock
* pBlock
= m_oldCachedBlocks
[i
].pNext
;
165 ArenaBlock
* pNext
= pBlock
->pNext
;
166 m_oldCachedSize
-= pBlock
->blockSize
;
167 m_totalAllocated
-= pBlock
->blockSize
;
168 this->DefaultAllocator::Free(pBlock
);
171 m_oldCachedBlocks
[i
].pNext
= nullptr;
172 m_pOldLastCachedBlocks
[i
] = &m_oldCachedBlocks
[i
];
175 if (m_pLastCachedBlocks
[i
] != &m_cachedBlocks
[i
])
177 if (i
&& i
< (CACHE_NUM_BUCKETS
- 1))
179 // We know that all blocks are the same size.
180 // Just move the list over.
181 m_pLastCachedBlocks
[i
]->pNext
= m_oldCachedBlocks
[i
].pNext
;
182 m_oldCachedBlocks
[i
].pNext
= m_cachedBlocks
[i
].pNext
;
183 m_cachedBlocks
[i
].pNext
= nullptr;
184 if (m_pOldLastCachedBlocks
[i
]->pNext
)
186 m_pOldLastCachedBlocks
[i
] = m_pLastCachedBlocks
[i
];
188 m_pLastCachedBlocks
[i
] = &m_cachedBlocks
[i
];
192 // The end buckets can have variable sized lists.
193 // Insert each block based on size
194 ArenaBlock
* pBlock
= m_cachedBlocks
[i
].pNext
;
197 ArenaBlock
* pNext
= pBlock
->pNext
;
198 pBlock
->pNext
= nullptr;
199 m_cachedSize
-= pBlock
->blockSize
;
200 InsertCachedBlock
<true>(i
, pBlock
);
204 m_pLastCachedBlocks
[i
] = &m_cachedBlocks
[i
];
205 m_cachedBlocks
[i
].pNext
= nullptr;
210 m_oldCachedSize
+= m_cachedSize
;
216 for (uint32_t i
= 0; i
< CACHE_NUM_BUCKETS
; ++i
)
218 m_pLastCachedBlocks
[i
] = &m_cachedBlocks
[i
];
219 m_pOldLastCachedBlocks
[i
] = &m_oldCachedBlocks
[i
];
225 // Free all cached blocks
226 for (uint32_t i
= 0; i
< CACHE_NUM_BUCKETS
; ++i
)
228 ArenaBlock
* pBlock
= m_cachedBlocks
[i
].pNext
;
231 ArenaBlock
* pNext
= pBlock
->pNext
;
232 this->DefaultAllocator::Free(pBlock
);
235 pBlock
= m_oldCachedBlocks
[i
].pNext
;
238 ArenaBlock
* pNext
= pBlock
->pNext
;
239 this->DefaultAllocator::Free(pBlock
);
246 static uint32_t GetBucketId(size_t blockSize
)
248 uint32_t bucketId
= 0;
250 #if defined(BitScanReverseSizeT)
251 BitScanReverseSizeT((unsigned long*)&bucketId
, (blockSize
- 1) >> CACHE_START_BUCKET_BIT
);
252 bucketId
= std::min
<uint32_t>(bucketId
, CACHE_NUM_BUCKETS
- 1);
258 template <bool OldBlockT
= false>
259 void InsertCachedBlock(uint32_t bucketId
, ArenaBlock
* pNewBlock
)
261 SWR_ASSUME_ASSERT(bucketId
< CACHE_NUM_BUCKETS
);
263 ArenaBlock
* pPrevBlock
= OldBlockT
? &m_oldCachedBlocks
[bucketId
] : &m_cachedBlocks
[bucketId
];
264 ArenaBlock
* pBlock
= pPrevBlock
->pNext
;
268 if (pNewBlock
->blockSize
>= pBlock
->blockSize
)
274 pBlock
= pBlock
->pNext
;
278 SWR_ASSUME_ASSERT(pPrevBlock
);
279 pPrevBlock
->pNext
= pNewBlock
;
280 pNewBlock
->pNext
= pBlock
;
284 if (m_pOldLastCachedBlocks
[bucketId
] == pPrevBlock
)
286 m_pOldLastCachedBlocks
[bucketId
] = pNewBlock
;
289 m_oldCachedSize
+= pNewBlock
->blockSize
;
293 if (m_pLastCachedBlocks
[bucketId
] == pPrevBlock
)
295 m_pLastCachedBlocks
[bucketId
] = pNewBlock
;
298 m_cachedSize
+= pNewBlock
->blockSize
;
302 static ArenaBlock
* SearchBlocks(ArenaBlock
*& pPrevBlock
, size_t blockSize
, size_t align
)
304 ArenaBlock
* pBlock
= pPrevBlock
->pNext
;
305 ArenaBlock
* pPotentialBlock
= nullptr;
306 ArenaBlock
* pPotentialPrev
= nullptr;
310 if (pBlock
->blockSize
>= blockSize
)
312 if (pBlock
== AlignUp(pBlock
, align
))
314 if (pBlock
->blockSize
== blockSize
)
316 // Won't find a better match
320 // We could use this as it is larger than we wanted, but
321 // continue to search for a better match
322 pPotentialBlock
= pBlock
;
323 pPotentialPrev
= pPrevBlock
;
328 // Blocks are sorted by size (biggest first)
329 // So, if we get here, there are no blocks
330 // large enough, fall through to allocation.
336 pBlock
= pBlock
->pNext
;
341 // Couldn't find an exact match, use next biggest size
342 pBlock
= pPotentialBlock
;
343 pPrevBlock
= pPotentialPrev
;
349 // buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ...
350 static const uint32_t CACHE_NUM_BUCKETS
= NumBucketsT
;
351 static const uint32_t CACHE_START_BUCKET_BIT
= StartBucketBitT
;
352 static const size_t MAX_UNUSED_SIZE
= sizeof(MEGABYTE
);
354 ArenaBlock m_cachedBlocks
[CACHE_NUM_BUCKETS
];
355 ArenaBlock
* m_pLastCachedBlocks
[CACHE_NUM_BUCKETS
];
356 ArenaBlock m_oldCachedBlocks
[CACHE_NUM_BUCKETS
];
357 ArenaBlock
* m_pOldLastCachedBlocks
[CACHE_NUM_BUCKETS
];
360 size_t m_totalAllocated
= 0;
362 size_t m_cachedSize
= 0;
363 size_t m_oldCachedSize
= 0;
365 typedef CachingAllocatorT
<> CachingAllocator
;
367 template<typename T
= DefaultAllocator
, size_t BlockSizeT
= 128 * sizeof(KILOBYTE
)>
371 TArena(T
& in_allocator
) : m_allocator(in_allocator
) {}
372 TArena() : m_allocator(m_defAllocator
) {}
378 void* AllocAligned(size_t size
, size_t align
)
385 SWR_ASSERT(align
<= ARENA_BLOCK_ALIGN
);
389 ArenaBlock
* pCurBlock
= m_pCurBlock
;
390 size_t offset
= AlignUp(m_offset
, align
);
392 if ((offset
+ size
) <= pCurBlock
->blockSize
)
394 void* pMem
= PtrAdd(pCurBlock
, offset
);
395 m_offset
= offset
+ size
;
399 // Not enough memory in this block, fall through to allocate
403 static const size_t ArenaBlockSize
= BlockSizeT
;
404 size_t blockSize
= std::max(size
+ ARENA_BLOCK_ALIGN
, ArenaBlockSize
);
406 // Add in one BLOCK_ALIGN unit to store ArenaBlock in.
407 blockSize
= AlignUp(blockSize
, ARENA_BLOCK_ALIGN
);
409 ArenaBlock
* pNewBlock
= m_allocator
.AllocateAligned(blockSize
, ARENA_BLOCK_ALIGN
); // Arena blocks are always simd byte aligned.
410 SWR_ASSERT(pNewBlock
!= nullptr);
412 if (pNewBlock
!= nullptr)
414 m_offset
= ARENA_BLOCK_ALIGN
;
415 pNewBlock
->pNext
= m_pCurBlock
;
417 m_pCurBlock
= pNewBlock
;
420 return AllocAligned(size
, align
);
423 void* Alloc(size_t size
)
425 return AllocAligned(size
, 1);
428 void* AllocAlignedSync(size_t size
, size_t align
)
430 void* pAlloc
= nullptr;
433 pAlloc
= AllocAligned(size
, align
);
439 void* AllocSync(size_t size
)
441 void* pAlloc
= nullptr;
444 pAlloc
= Alloc(size
);
450 void Reset(bool removeAll
= false)
452 m_offset
= ARENA_BLOCK_ALIGN
;
456 ArenaBlock
*pUsedBlocks
= m_pCurBlock
->pNext
;
457 m_pCurBlock
->pNext
= nullptr;
460 ArenaBlock
* pBlock
= pUsedBlocks
;
461 pUsedBlocks
= pBlock
->pNext
;
463 m_allocator
.Free(pBlock
);
468 m_allocator
.Free(m_pCurBlock
);
469 m_pCurBlock
= nullptr;
476 return (m_pCurBlock
== nullptr) || (m_offset
== ARENA_BLOCK_ALIGN
&& m_pCurBlock
->pNext
== nullptr);
481 ArenaBlock
* m_pCurBlock
= nullptr;
482 size_t m_offset
= ARENA_BLOCK_ALIGN
;
484 /// @note Mutex is only used by sync allocation functions.
487 DefaultAllocator m_defAllocator
;
491 using StdArena
= TArena
<DefaultAllocator
>;
492 using CachingArena
= TArena
<CachingAllocator
>;