src/gallium/drivers/swr/rasterizer/core/arena.h

   1 /****************************************************************************
   2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 * @file arena.h
  24 *
  25 * @brief Arena memory manager
  26 *        The arena is convenient and fast for managing allocations for any of
  27 *        our allocations that are associated with operations and can all be freed
  28 *        once when their operation has completed. Allocations are cheap since
  29 *        most of the time its simply an increment of an offset. Also, no need to
  30 *        free individual allocations. All of the arena memory can be freed at once.
  31 *
  32 ******************************************************************************/
  33 #pragma once
  34
  35 #include <mutex>
  36 #include <algorithm>
  37 #include <atomic>
  38 #include "core/utils.h"
  39
  40 class DefaultAllocator
  41 {
  42 public:
  43     void* AllocateAligned(size_t size, size_t align)
  44     {
  45         void* p = _aligned_malloc(size, align);
  46         return p;
  47     }
  48     void  Free(void* pMem)
  49     {
  50         _aligned_free(pMem);
  51     }
  52 };
  53
  54 static const size_t ARENA_BLOCK_ALIGN = KNOB_SIMD_WIDTH * 4;
  55
  56 struct ArenaBlock
  57 {
  58     void*       pMem = nullptr;
  59     size_t      blockSize = 0;
  60     ArenaBlock* pNext = nullptr;
  61 };
  62 static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN, "Increase BLOCK_ALIGN size");
  63
  64 template<typename MutexT = std::mutex, typename T = DefaultAllocator>
  65 class TArena
  66 {
  67 public:
  68     TArena(T& in_allocator)  : m_allocator(in_allocator) {}
  69     TArena()                 : m_allocator(m_defAllocator) {}
  70     ~TArena()
  71     {
  72         Reset(true);
  73     }
  74
  75     void* AllocAligned(size_t size, size_t  align)
  76     {
  77         if (m_pCurBlock)
  78         {
  79             ArenaBlock* pCurBlock = m_pCurBlock;
  80             m_offset = AlignUp(m_offset, align);
  81
  82             if ((m_offset + size) <= pCurBlock->blockSize)
  83             {
  84                 void* pMem = PtrAdd(pCurBlock->pMem, m_offset);
  85                 m_offset += size;
  86                 m_size += size;
  87                 return pMem;
  88             }
  89
  90             // Not enough memory in this block, fall through to allocate
  91             // a new block
  92         }
  93
  94         static const size_t ArenaBlockSize = 1024 * 1024;
  95         size_t blockSize = std::max<size_t>(m_size + ArenaBlockSize, std::max(size, ArenaBlockSize));
  96
  97         // Add in one BLOCK_ALIGN unit to store ArenaBlock in.
  98         blockSize = AlignUp(blockSize + ARENA_BLOCK_ALIGN, ARENA_BLOCK_ALIGN);
  99
 100         void *pMem = m_allocator.AllocateAligned(blockSize, ARENA_BLOCK_ALIGN);    // Arena blocks are always simd byte aligned.
 101         SWR_ASSERT(pMem != nullptr);
 102
 103         ArenaBlock* pNewBlock = new (pMem) ArenaBlock();
 104
 105         if (pNewBlock != nullptr)
 106         {
 107             m_offset = 0;
 108             pNewBlock->pNext = m_pCurBlock;
 109
 110             m_pCurBlock = pNewBlock;
 111             m_pCurBlock->pMem = PtrAdd(pMem, ARENA_BLOCK_ALIGN);
 112             m_pCurBlock->blockSize = blockSize - ARENA_BLOCK_ALIGN;
 113         }
 114
 115         return AllocAligned(size, align);
 116     }
 117
 118     void* Alloc(size_t  size)
 119     {
 120         return AllocAligned(size, 1);
 121     }
 122
 123     void* AllocAlignedSync(size_t size, size_t align)
 124     {
 125         void* pAlloc = nullptr;
 126
 127         m_mutex.lock();
 128         pAlloc = AllocAligned(size, align);
 129         m_mutex.unlock();
 130
 131         return pAlloc;
 132     }
 133
 134     void* AllocSync(size_t size)
 135     {
 136         void* pAlloc = nullptr;
 137
 138         m_mutex.lock();
 139         pAlloc = Alloc(size);
 140         m_mutex.unlock();
 141
 142         return pAlloc;
 143     }
 144
 145     void Reset(bool removeAll = false)
 146     {
 147         m_offset = 0;
 148
 149         if (m_pCurBlock)
 150         {
 151             ArenaBlock *pUsedBlocks = m_pCurBlock->pNext;
 152             m_pCurBlock->pNext = nullptr;
 153             while (pUsedBlocks)
 154             {
 155                 ArenaBlock* pBlock = pUsedBlocks;
 156                 pUsedBlocks = pBlock->pNext;
 157
 158                 m_allocator.Free(pBlock);
 159             }
 160
 161             if (removeAll)
 162             {
 163                 m_allocator.Free(m_pCurBlock);
 164                 m_pCurBlock = nullptr;
 165             }
 166         }
 167
 168         m_size = 0;
 169     }
 170
 171     size_t Size() const { return m_size; }
 172
 173 private:
 174
 175     ArenaBlock*         m_pCurBlock = nullptr;
 176     size_t              m_offset    = 0;
 177     size_t              m_size      = 0;
 178
 179     /// @note Mutex is only used by sync allocation functions.
 180     MutexT              m_mutex;
 181
 182     DefaultAllocator    m_defAllocator;
 183     T&                  m_allocator;
 184 };
 185
 186 template<typename T>
 187 using Arena     = TArena<std::mutex, T>;
 188 using StdArena  = Arena<DefaultAllocator>;
 189
 190 struct NullMutex
 191 {
 192     void lock() {}
 193     void unlock() {}
 194 };
 195
 196 // Ref counted Arena for ArenaAllocator
 197 // NOT THREAD SAFE!!
 198 struct RefArena : TArena<NullMutex>
 199 {
 200     uint32_t AddRef() { return ++m_refCount; }
 201     uint32_t Release() { if (--m_refCount) { return m_refCount; } delete this; return 0; }
 202
 203     void* allocate(std::size_t n)
 204     {
 205         ++m_numAllocations;
 206         return Alloc(n);
 207     }
 208
 209     void deallocate(void* p) { --m_numAllocations; }
 210     void clear() { SWR_ASSERT(0 == m_numAllocations); Reset(); }
 211
 212 private:
 213     uint32_t m_refCount = 0;
 214     uint32_t m_numAllocations = 0;
 215 };
 216
 217 #if 0 // THIS DOESN'T WORK!!!
 218 // Arena based replacement for std::allocator
 219 template <typename T>
 220 struct ArenaAllocator
 221 {
 222     typedef T value_type;
 223     ArenaAllocator()
 224     {
 225         m_pArena = new RefArena();
 226         m_pArena->AddRef();
 227     }
 228     ~ArenaAllocator()
 229     {
 230         m_pArena->Release(); m_pArena = nullptr;
 231     }
 232     ArenaAllocator(const ArenaAllocator& copy)
 233     {
 234         m_pArena = const_cast<RefArena*>(copy.m_pArena); m_pArena->AddRef();
 235     }
 236
 237
 238     template <class U> ArenaAllocator(const ArenaAllocator<U>& copy)
 239     {
 240         m_pArena = const_cast<RefArena*>(copy.m_pArena); m_pArena->AddRef();
 241     }
 242     T* allocate(std::size_t n)
 243     {
 244 #if defined(_DEBUG)
 245         char buf[32];
 246         sprintf_s(buf, "Alloc: %lld\n", n);
 247         OutputDebugStringA(buf);
 248 #endif
 249         void* p = m_pArena->allocate(n * sizeof(T));
 250         return static_cast<T*>(p);
 251     }
 252     void deallocate(T* p, std::size_t n)
 253     {
 254 #if defined(_DEBUG)
 255         char buf[32];
 256         sprintf_s(buf, "Dealloc: %lld\n", n);
 257         OutputDebugStringA(buf);
 258 #endif
 259         m_pArena->deallocate(p);
 260     }
 261     void clear() { m_pArena->clear(); }
 262
 263     RefArena* m_pArena = nullptr;
 264 };
 265
 266 template <class T, class U>
 267 bool operator== (const ArenaAllocator<T>&, const ArenaAllocator<U>&)
 268 {
 269     return true;
 270 }
 271
 272 template <class T, class U>
 273 bool operator!= (const ArenaAllocator<T>&, const ArenaAllocator<U>&)
 274 {
 275     return false;
 276 }
 277 #endif