swr/rast: Move clip/cull enables in API
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / arena.h
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file arena.h
24 *
25 * @brief Arena memory manager
26 * The arena is convenient and fast for managing allocations for any of
27 * our allocations that are associated with operations and can all be freed
28 * once when their operation has completed. Allocations are cheap since
29 * most of the time its simply an increment of an offset. Also, no need to
30 * free individual allocations. All of the arena memory can be freed at once.
31 *
32 ******************************************************************************/
33 #pragma once
34
35 #include <mutex>
36 #include <algorithm>
37 #include <atomic>
38 #include "core/utils.h"
39
40 static const size_t ARENA_BLOCK_ALIGN = 64;
41
42 struct ArenaBlock
43 {
44 size_t blockSize = 0;
45 ArenaBlock* pNext = nullptr;
46 };
47 static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN,
48 "Increase BLOCK_ALIGN size");
49
50 class DefaultAllocator
51 {
52 public:
53 ArenaBlock* AllocateAligned(size_t size, size_t align)
54 {
55 SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
56
57 ArenaBlock* p = new (AlignedMalloc(size, align)) ArenaBlock();
58 p->blockSize = size;
59 return p;
60 }
61
62 void Free(ArenaBlock* pMem)
63 {
64 if (pMem)
65 {
66 SWR_ASSUME_ASSERT(pMem->blockSize < size_t(0xdddddddd));
67 AlignedFree(pMem);
68 }
69 }
70 };
71
72 // Caching Allocator for Arena
73 template<uint32_t NumBucketsT = 8, uint32_t StartBucketBitT = 12>
74 struct CachingAllocatorT : DefaultAllocator
75 {
76 ArenaBlock* AllocateAligned(size_t size, size_t align)
77 {
78 SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
79 SWR_ASSUME_ASSERT(size <= uint32_t(-1));
80
81 uint32_t bucket = GetBucketId(size);
82
83 {
84 // search cached blocks
85 std::lock_guard<std::mutex> l(m_mutex);
86 ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket];
87 ArenaBlock* pBlock = SearchBlocks(pPrevBlock, size, align);
88
89 if (pBlock)
90 {
91 m_cachedSize -= pBlock->blockSize;
92 if (pBlock == m_pLastCachedBlocks[bucket])
93 {
94 m_pLastCachedBlocks[bucket] = pPrevBlock;
95 }
96 }
97 else
98 {
99 pPrevBlock = &m_oldCachedBlocks[bucket];
100 pBlock = SearchBlocks(pPrevBlock, size, align);
101
102 if (pBlock)
103 {
104 m_oldCachedSize -= pBlock->blockSize;
105 if (pBlock == m_pOldLastCachedBlocks[bucket])
106 {
107 m_pOldLastCachedBlocks[bucket] = pPrevBlock;
108 }
109 }
110 }
111
112 if (pBlock)
113 {
114 SWR_ASSUME_ASSERT(pPrevBlock && pPrevBlock->pNext == pBlock);
115 pPrevBlock->pNext = pBlock->pNext;
116 pBlock->pNext = nullptr;
117
118 return pBlock;
119 }
120
121 m_totalAllocated += size;
122
123 #if 0
124 {
125 static uint32_t count = 0;
126 char buf[128];
127 sprintf_s(buf, "Arena Alloc %d 0x%llx bytes - 0x%llx total\n", ++count, uint64_t(size), uint64_t(m_totalAllocated));
128 OutputDebugStringA(buf);
129 }
130 #endif
131 }
132
133 if (bucket && bucket < (CACHE_NUM_BUCKETS - 1))
134 {
135 // Make all blocks in this bucket the same size
136 size = size_t(1) << (bucket + 1 + CACHE_START_BUCKET_BIT);
137 }
138
139 return this->DefaultAllocator::AllocateAligned(size, align);
140 }
141
142 void Free(ArenaBlock* pMem)
143 {
144 if (pMem)
145 {
146 std::unique_lock<std::mutex> l(m_mutex);
147 InsertCachedBlock(GetBucketId(pMem->blockSize), pMem);
148 }
149 }
150
151 void FreeOldBlocks()
152 {
153 if (!m_cachedSize) { return; }
154 std::lock_guard<std::mutex> l(m_mutex);
155
156 bool doFree = (m_oldCachedSize > MAX_UNUSED_SIZE);
157
158 for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
159 {
160 if (doFree)
161 {
162 ArenaBlock* pBlock = m_oldCachedBlocks[i].pNext;
163 while (pBlock)
164 {
165 ArenaBlock* pNext = pBlock->pNext;
166 m_oldCachedSize -= pBlock->blockSize;
167 m_totalAllocated -= pBlock->blockSize;
168 this->DefaultAllocator::Free(pBlock);
169 pBlock = pNext;
170 }
171 m_oldCachedBlocks[i].pNext = nullptr;
172 m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
173 }
174
175 if (m_pLastCachedBlocks[i] != &m_cachedBlocks[i])
176 {
177 if (i && i < (CACHE_NUM_BUCKETS - 1))
178 {
179 // We know that all blocks are the same size.
180 // Just move the list over.
181 m_pLastCachedBlocks[i]->pNext = m_oldCachedBlocks[i].pNext;
182 m_oldCachedBlocks[i].pNext = m_cachedBlocks[i].pNext;
183 m_cachedBlocks[i].pNext = nullptr;
184 if (m_pOldLastCachedBlocks[i]->pNext)
185 {
186 m_pOldLastCachedBlocks[i] = m_pLastCachedBlocks[i];
187 }
188 m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
189 }
190 else
191 {
192 // The end buckets can have variable sized lists.
193 // Insert each block based on size
194 ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
195 while (pBlock)
196 {
197 ArenaBlock* pNext = pBlock->pNext;
198 pBlock->pNext = nullptr;
199 m_cachedSize -= pBlock->blockSize;
200 InsertCachedBlock<true>(i, pBlock);
201 pBlock = pNext;
202 }
203
204 m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
205 m_cachedBlocks[i].pNext = nullptr;
206 }
207 }
208 }
209
210 m_oldCachedSize += m_cachedSize;
211 m_cachedSize = 0;
212 }
213
214 CachingAllocatorT()
215 {
216 for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
217 {
218 m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
219 m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
220 }
221 }
222
223 ~CachingAllocatorT()
224 {
225 // Free all cached blocks
226 for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
227 {
228 ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
229 while (pBlock)
230 {
231 ArenaBlock* pNext = pBlock->pNext;
232 this->DefaultAllocator::Free(pBlock);
233 pBlock = pNext;
234 }
235 pBlock = m_oldCachedBlocks[i].pNext;
236 while (pBlock)
237 {
238 ArenaBlock* pNext = pBlock->pNext;
239 this->DefaultAllocator::Free(pBlock);
240 pBlock = pNext;
241 }
242 }
243 }
244
245 private:
246 static uint32_t GetBucketId(size_t blockSize)
247 {
248 uint32_t bucketId = 0;
249
250 #if defined(BitScanReverseSizeT)
251 BitScanReverseSizeT((unsigned long*)&bucketId, (blockSize - 1) >> CACHE_START_BUCKET_BIT);
252 bucketId = std::min<uint32_t>(bucketId, CACHE_NUM_BUCKETS - 1);
253 #endif
254
255 return bucketId;
256 }
257
258 template <bool OldBlockT = false>
259 void InsertCachedBlock(uint32_t bucketId, ArenaBlock* pNewBlock)
260 {
261 SWR_ASSUME_ASSERT(bucketId < CACHE_NUM_BUCKETS);
262
263 ArenaBlock* pPrevBlock = OldBlockT ? &m_oldCachedBlocks[bucketId] : &m_cachedBlocks[bucketId];
264 ArenaBlock* pBlock = pPrevBlock->pNext;
265
266 while (pBlock)
267 {
268 if (pNewBlock->blockSize >= pBlock->blockSize)
269 {
270 // Insert here
271 break;
272 }
273 pPrevBlock = pBlock;
274 pBlock = pBlock->pNext;
275 }
276
277 // Insert into list
278 SWR_ASSUME_ASSERT(pPrevBlock);
279 pPrevBlock->pNext = pNewBlock;
280 pNewBlock->pNext = pBlock;
281
282 if (OldBlockT)
283 {
284 if (m_pOldLastCachedBlocks[bucketId] == pPrevBlock)
285 {
286 m_pOldLastCachedBlocks[bucketId] = pNewBlock;
287 }
288
289 m_oldCachedSize += pNewBlock->blockSize;
290 }
291 else
292 {
293 if (m_pLastCachedBlocks[bucketId] == pPrevBlock)
294 {
295 m_pLastCachedBlocks[bucketId] = pNewBlock;
296 }
297
298 m_cachedSize += pNewBlock->blockSize;
299 }
300 }
301
302 static ArenaBlock* SearchBlocks(ArenaBlock*& pPrevBlock, size_t blockSize, size_t align)
303 {
304 ArenaBlock* pBlock = pPrevBlock->pNext;
305 ArenaBlock* pPotentialBlock = nullptr;
306 ArenaBlock* pPotentialPrev = nullptr;
307
308 while (pBlock)
309 {
310 if (pBlock->blockSize >= blockSize)
311 {
312 if (pBlock == AlignUp(pBlock, align))
313 {
314 if (pBlock->blockSize == blockSize)
315 {
316 // Won't find a better match
317 break;
318 }
319
320 // We could use this as it is larger than we wanted, but
321 // continue to search for a better match
322 pPotentialBlock = pBlock;
323 pPotentialPrev = pPrevBlock;
324 }
325 }
326 else
327 {
328 // Blocks are sorted by size (biggest first)
329 // So, if we get here, there are no blocks
330 // large enough, fall through to allocation.
331 pBlock = nullptr;
332 break;
333 }
334
335 pPrevBlock = pBlock;
336 pBlock = pBlock->pNext;
337 }
338
339 if (!pBlock)
340 {
341 // Couldn't find an exact match, use next biggest size
342 pBlock = pPotentialBlock;
343 pPrevBlock = pPotentialPrev;
344 }
345
346 return pBlock;
347 }
348
349 // buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ...
350 static const uint32_t CACHE_NUM_BUCKETS = NumBucketsT;
351 static const uint32_t CACHE_START_BUCKET_BIT = StartBucketBitT;
352 static const size_t MAX_UNUSED_SIZE = sizeof(MEGABYTE);
353
354 ArenaBlock m_cachedBlocks[CACHE_NUM_BUCKETS];
355 ArenaBlock* m_pLastCachedBlocks[CACHE_NUM_BUCKETS];
356 ArenaBlock m_oldCachedBlocks[CACHE_NUM_BUCKETS];
357 ArenaBlock* m_pOldLastCachedBlocks[CACHE_NUM_BUCKETS];
358 std::mutex m_mutex;
359
360 size_t m_totalAllocated = 0;
361
362 size_t m_cachedSize = 0;
363 size_t m_oldCachedSize = 0;
364 };
365 typedef CachingAllocatorT<> CachingAllocator;
366
367 template<typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)>
368 class TArena
369 {
370 public:
371 TArena(T& in_allocator) : m_allocator(in_allocator) {}
372 TArena() : m_allocator(m_defAllocator) {}
373 ~TArena()
374 {
375 Reset(true);
376 }
377
378 void* AllocAligned(size_t size, size_t align)
379 {
380 if (0 == size)
381 {
382 return nullptr;
383 }
384
385 SWR_ASSERT(align <= ARENA_BLOCK_ALIGN);
386
387 if (m_pCurBlock)
388 {
389 ArenaBlock* pCurBlock = m_pCurBlock;
390 size_t offset = AlignUp(m_offset, align);
391
392 if ((offset + size) <= pCurBlock->blockSize)
393 {
394 void* pMem = PtrAdd(pCurBlock, offset);
395 m_offset = offset + size;
396 return pMem;
397 }
398
399 // Not enough memory in this block, fall through to allocate
400 // a new block
401 }
402
403 static const size_t ArenaBlockSize = BlockSizeT;
404 size_t blockSize = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize);
405
406 // Add in one BLOCK_ALIGN unit to store ArenaBlock in.
407 blockSize = AlignUp(blockSize, ARENA_BLOCK_ALIGN);
408
409 ArenaBlock* pNewBlock = m_allocator.AllocateAligned(blockSize, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned.
410 SWR_ASSERT(pNewBlock != nullptr);
411
412 if (pNewBlock != nullptr)
413 {
414 m_offset = ARENA_BLOCK_ALIGN;
415 pNewBlock->pNext = m_pCurBlock;
416
417 m_pCurBlock = pNewBlock;
418 }
419
420 return AllocAligned(size, align);
421 }
422
423 void* Alloc(size_t size)
424 {
425 return AllocAligned(size, 1);
426 }
427
428 void* AllocAlignedSync(size_t size, size_t align)
429 {
430 void* pAlloc = nullptr;
431
432 m_mutex.lock();
433 pAlloc = AllocAligned(size, align);
434 m_mutex.unlock();
435
436 return pAlloc;
437 }
438
439 void* AllocSync(size_t size)
440 {
441 void* pAlloc = nullptr;
442
443 m_mutex.lock();
444 pAlloc = Alloc(size);
445 m_mutex.unlock();
446
447 return pAlloc;
448 }
449
450 void Reset(bool removeAll = false)
451 {
452 m_offset = ARENA_BLOCK_ALIGN;
453
454 if (m_pCurBlock)
455 {
456 ArenaBlock *pUsedBlocks = m_pCurBlock->pNext;
457 m_pCurBlock->pNext = nullptr;
458 while (pUsedBlocks)
459 {
460 ArenaBlock* pBlock = pUsedBlocks;
461 pUsedBlocks = pBlock->pNext;
462
463 m_allocator.Free(pBlock);
464 }
465
466 if (removeAll)
467 {
468 m_allocator.Free(m_pCurBlock);
469 m_pCurBlock = nullptr;
470 }
471 }
472 }
473
474 bool IsEmpty()
475 {
476 return (m_pCurBlock == nullptr) || (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr);
477 }
478
479 private:
480
481 ArenaBlock* m_pCurBlock = nullptr;
482 size_t m_offset = ARENA_BLOCK_ALIGN;
483
484 /// @note Mutex is only used by sync allocation functions.
485 std::mutex m_mutex;
486
487 DefaultAllocator m_defAllocator;
488 T& m_allocator;
489 };
490
491 using StdArena = TArena<DefaultAllocator>;
492 using CachingArena = TArena<CachingAllocator>;