swr: [rasterizer core] Globally cache allocated arena blocks for fast re-allocation.
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / arena.h
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file arena.h
24 *
25 * @brief Arena memory manager
26 * The arena is convenient and fast for managing allocations for any of
27 * our allocations that are associated with operations and can all be freed
28 * once when their operation has completed. Allocations are cheap since
29 * most of the time its simply an increment of an offset. Also, no need to
30 * free individual allocations. All of the arena memory can be freed at once.
31 *
32 ******************************************************************************/
33 #pragma once
34
35 #include <mutex>
36 #include <algorithm>
37 #include <atomic>
38 #include "core/utils.h"
39
40 class DefaultAllocator
41 {
42 public:
43 void* AllocateAligned(size_t size, size_t align)
44 {
45 void* p = _aligned_malloc(size, align);
46 return p;
47 }
48 void Free(void* pMem)
49 {
50 _aligned_free(pMem);
51 }
52 };
53
54 static const size_t ARENA_BLOCK_ALIGN = KNOB_SIMD_WIDTH * 4;
55
56 struct ArenaBlock
57 {
58 void* pMem = nullptr;
59 size_t blockSize = 0;
60 ArenaBlock* pNext = nullptr;
61 };
62 static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN, "Increase BLOCK_ALIGN size");
63
64 template<typename MutexT = std::mutex, typename T = DefaultAllocator>
65 class TArena
66 {
67 public:
68 TArena(T& in_allocator) : m_allocator(in_allocator) {}
69 TArena() : m_allocator(m_defAllocator) {}
70 ~TArena()
71 {
72 Reset(true);
73 }
74
75 void* AllocAligned(size_t size, size_t align)
76 {
77 if (m_pCurBlock)
78 {
79 ArenaBlock* pCurBlock = m_pCurBlock;
80 m_offset = AlignUp(m_offset, align);
81
82 if ((m_offset + size) <= pCurBlock->blockSize)
83 {
84 void* pMem = PtrAdd(pCurBlock->pMem, m_offset);
85 m_offset += size;
86 m_size += size;
87 return pMem;
88 }
89
90 // Not enough memory in this block, fall through to allocate
91 // a new block
92 }
93
94 static const size_t ArenaBlockSize = 1024 * 1024;
95 size_t blockSize = std::max<size_t>(m_size + ArenaBlockSize, std::max(size, ArenaBlockSize));
96
97 // Add in one BLOCK_ALIGN unit to store ArenaBlock in.
98 blockSize = AlignUp(blockSize + ARENA_BLOCK_ALIGN, ARENA_BLOCK_ALIGN);
99
100 void *pMem = m_allocator.AllocateAligned(blockSize, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned.
101 SWR_ASSERT(pMem != nullptr);
102
103 ArenaBlock* pNewBlock = new (pMem) ArenaBlock();
104
105 if (pNewBlock != nullptr)
106 {
107 m_offset = 0;
108 pNewBlock->pNext = m_pCurBlock;
109
110 m_pCurBlock = pNewBlock;
111 m_pCurBlock->pMem = PtrAdd(pMem, ARENA_BLOCK_ALIGN);
112 m_pCurBlock->blockSize = blockSize - ARENA_BLOCK_ALIGN;
113 }
114
115 return AllocAligned(size, align);
116 }
117
118 void* Alloc(size_t size)
119 {
120 return AllocAligned(size, 1);
121 }
122
123 void* AllocAlignedSync(size_t size, size_t align)
124 {
125 void* pAlloc = nullptr;
126
127 m_mutex.lock();
128 pAlloc = AllocAligned(size, align);
129 m_mutex.unlock();
130
131 return pAlloc;
132 }
133
134 void* AllocSync(size_t size)
135 {
136 void* pAlloc = nullptr;
137
138 m_mutex.lock();
139 pAlloc = Alloc(size);
140 m_mutex.unlock();
141
142 return pAlloc;
143 }
144
145 void Reset(bool removeAll = false)
146 {
147 m_offset = 0;
148
149 if (m_pCurBlock)
150 {
151 ArenaBlock *pUsedBlocks = m_pCurBlock->pNext;
152 m_pCurBlock->pNext = nullptr;
153 while (pUsedBlocks)
154 {
155 ArenaBlock* pBlock = pUsedBlocks;
156 pUsedBlocks = pBlock->pNext;
157
158 m_allocator.Free(pBlock);
159 }
160
161 if (removeAll)
162 {
163 m_allocator.Free(m_pCurBlock);
164 m_pCurBlock = nullptr;
165 }
166 }
167
168 m_size = 0;
169 }
170
171 size_t Size() const { return m_size; }
172
173 private:
174
175 ArenaBlock* m_pCurBlock = nullptr;
176 size_t m_offset = 0;
177 size_t m_size = 0;
178
179 /// @note Mutex is only used by sync allocation functions.
180 MutexT m_mutex;
181
182 DefaultAllocator m_defAllocator;
183 T& m_allocator;
184 };
185
186 template<typename T>
187 using Arena = TArena<std::mutex, T>;
188 using StdArena = Arena<DefaultAllocator>;
189
190 struct NullMutex
191 {
192 void lock() {}
193 void unlock() {}
194 };
195
196 // Ref counted Arena for ArenaAllocator
197 // NOT THREAD SAFE!!
198 struct RefArena : TArena<NullMutex>
199 {
200 uint32_t AddRef() { return ++m_refCount; }
201 uint32_t Release() { if (--m_refCount) { return m_refCount; } delete this; return 0; }
202
203 void* allocate(std::size_t n)
204 {
205 ++m_numAllocations;
206 return Alloc(n);
207 }
208
209 void deallocate(void* p) { --m_numAllocations; }
210 void clear() { SWR_ASSERT(0 == m_numAllocations); Reset(); }
211
212 private:
213 uint32_t m_refCount = 0;
214 uint32_t m_numAllocations = 0;
215 };
216
217 #if 0 // THIS DOESN'T WORK!!!
218 // Arena based replacement for std::allocator
219 template <typename T>
220 struct ArenaAllocator
221 {
222 typedef T value_type;
223 ArenaAllocator()
224 {
225 m_pArena = new RefArena();
226 m_pArena->AddRef();
227 }
228 ~ArenaAllocator()
229 {
230 m_pArena->Release(); m_pArena = nullptr;
231 }
232 ArenaAllocator(const ArenaAllocator& copy)
233 {
234 m_pArena = const_cast<RefArena*>(copy.m_pArena); m_pArena->AddRef();
235 }
236
237
238 template <class U> ArenaAllocator(const ArenaAllocator<U>& copy)
239 {
240 m_pArena = const_cast<RefArena*>(copy.m_pArena); m_pArena->AddRef();
241 }
242 T* allocate(std::size_t n)
243 {
244 #if defined(_DEBUG)
245 char buf[32];
246 sprintf_s(buf, "Alloc: %lld\n", n);
247 OutputDebugStringA(buf);
248 #endif
249 void* p = m_pArena->allocate(n * sizeof(T));
250 return static_cast<T*>(p);
251 }
252 void deallocate(T* p, std::size_t n)
253 {
254 #if defined(_DEBUG)
255 char buf[32];
256 sprintf_s(buf, "Dealloc: %lld\n", n);
257 OutputDebugStringA(buf);
258 #endif
259 m_pArena->deallocate(p);
260 }
261 void clear() { m_pArena->clear(); }
262
263 RefArena* m_pArena = nullptr;
264 };
265
266 template <class T, class U>
267 bool operator== (const ArenaAllocator<T>&, const ArenaAllocator<U>&)
268 {
269 return true;
270 }
271
272 template <class T, class U>
273 bool operator!= (const ArenaAllocator<T>&, const ArenaAllocator<U>&)
274 {
275 return false;
276 }
277 #endif