src/gallium/drivers/swr/rasterizer/core/arena.h

   1 /****************************************************************************
   2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 * @file arena.h
  24 *
  25 * @brief Arena memory manager
  26 *        The arena is convenient and fast for managing allocations for any of
  27 *        our allocations that are associated with operations and can all be freed
  28 *        once when their operation has completed. Allocations are cheap since
  29 *        most of the time its simply an increment of an offset. Also, no need to
  30 *        free individual allocations. All of the arena memory can be freed at once.
  31 *
  32 ******************************************************************************/
  33 #pragma once
  34
  35 #include <mutex>
  36 #include <algorithm>
  37 #include <atomic>
  38 #include "core/utils.h"
  39
  40 static const size_t ARENA_BLOCK_ALIGN = 64;
  41
  42 struct ArenaBlock
  43 {
  44     size_t      blockSize = 0;
  45     ArenaBlock* pNext = nullptr;
  46 };
  47 static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN,
  48     "Increase BLOCK_ALIGN size");
  49
  50 class DefaultAllocator
  51 {
  52 public:
  53     ArenaBlock* AllocateAligned(size_t size, size_t align)
  54     {
  55         SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
  56
  57         ArenaBlock* p = new (AlignedMalloc(size, align)) ArenaBlock();
  58         p->blockSize = size;
  59         return p;
  60     }
  61
  62     void Free(ArenaBlock* pMem)
  63     {
  64         if (pMem)
  65         {
  66             SWR_ASSUME_ASSERT(pMem->blockSize < size_t(0xdddddddd));
  67             AlignedFree(pMem);
  68         }
  69     }
  70 };
  71
  72 // Caching Allocator for Arena
  73 template<uint32_t NumBucketsT = 8, uint32_t StartBucketBitT = 12>
  74 struct CachingAllocatorT : DefaultAllocator
  75 {
  76     ArenaBlock* AllocateAligned(size_t size, size_t align)
  77     {
  78         SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
  79         SWR_ASSUME_ASSERT(size <= uint32_t(-1));
  80
  81         uint32_t bucket = GetBucketId(size);
  82
  83         {
  84             // search cached blocks
  85             std::lock_guard<std::mutex> l(m_mutex);
  86             ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket];
  87             ArenaBlock* pBlock = SearchBlocks(pPrevBlock, size, align);
  88
  89             if (pBlock)
  90             {
  91                 m_cachedSize -= pBlock->blockSize;
  92                 if (pBlock == m_pLastCachedBlocks[bucket])
  93                 {
  94                     m_pLastCachedBlocks[bucket] = pPrevBlock;
  95                 }
  96             }
  97             else
  98             {
  99                 pPrevBlock = &m_oldCachedBlocks[bucket];
 100                 pBlock = SearchBlocks(pPrevBlock, size, align);
 101
 102                 if (pBlock)
 103                 {
 104                     m_oldCachedSize -= pBlock->blockSize;
 105                     if (pBlock == m_pOldLastCachedBlocks[bucket])
 106                     {
 107                         m_pOldLastCachedBlocks[bucket] = pPrevBlock;
 108                     }
 109                 }
 110             }
 111
 112             if (pBlock)
 113             {
 114                 SWR_ASSUME_ASSERT(pPrevBlock && pPrevBlock->pNext == pBlock);
 115                 pPrevBlock->pNext = pBlock->pNext;
 116                 pBlock->pNext = nullptr;
 117
 118                 return pBlock;
 119             }
 120
 121             m_totalAllocated += size;
 122
 123 #if 0
 124             {
 125                 static uint32_t count = 0;
 126                 char buf[128];
 127                 sprintf_s(buf, "Arena Alloc %d 0x%llx bytes - 0x%llx total\n", ++count, uint64_t(size), uint64_t(m_totalAllocated));
 128                 OutputDebugStringA(buf);
 129             }
 130 #endif
 131         }
 132
 133         if (bucket && bucket < (CACHE_NUM_BUCKETS - 1))
 134         {
 135             // Make all blocks in this bucket the same size
 136             size = size_t(1) << (bucket + 1 + CACHE_START_BUCKET_BIT);
 137         }
 138
 139         return this->DefaultAllocator::AllocateAligned(size, align);
 140     }
 141
 142     void Free(ArenaBlock* pMem)
 143     {
 144         if (pMem)
 145         {
 146             std::unique_lock<std::mutex> l(m_mutex);
 147             InsertCachedBlock(GetBucketId(pMem->blockSize), pMem);
 148         }
 149     }
 150
 151     void FreeOldBlocks()
 152     {
 153         if (!m_cachedSize) { return; }
 154         std::lock_guard<std::mutex> l(m_mutex);
 155
 156         bool doFree = (m_oldCachedSize > MAX_UNUSED_SIZE);
 157
 158         for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
 159         {
 160             if (doFree)
 161             {
 162                 ArenaBlock* pBlock = m_oldCachedBlocks[i].pNext;
 163                 while (pBlock)
 164                 {
 165                     ArenaBlock* pNext = pBlock->pNext;
 166                     m_oldCachedSize -= pBlock->blockSize;
 167                     m_totalAllocated -= pBlock->blockSize;
 168                     this->DefaultAllocator::Free(pBlock);
 169                     pBlock = pNext;
 170                 }
 171                 m_oldCachedBlocks[i].pNext = nullptr;
 172                 m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
 173             }
 174
 175             if (m_pLastCachedBlocks[i] != &m_cachedBlocks[i])
 176             {
 177                 if (i && i < (CACHE_NUM_BUCKETS - 1))
 178                 {
 179                     // We know that all blocks are the same size.
 180                     // Just move the list over.
 181                     m_pLastCachedBlocks[i]->pNext = m_oldCachedBlocks[i].pNext;
 182                     m_oldCachedBlocks[i].pNext = m_cachedBlocks[i].pNext;
 183                     m_cachedBlocks[i].pNext = nullptr;
 184                     if (m_pOldLastCachedBlocks[i]->pNext)
 185                     {
 186                         m_pOldLastCachedBlocks[i] = m_pLastCachedBlocks[i];
 187                     }
 188                     m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
 189                 }
 190                 else
 191                 {
 192                     // The end buckets can have variable sized lists.
 193                     // Insert each block based on size
 194                     ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
 195                     while (pBlock)
 196                     {
 197                         ArenaBlock* pNext = pBlock->pNext;
 198                         pBlock->pNext = nullptr;
 199                         m_cachedSize -= pBlock->blockSize;
 200                         InsertCachedBlock<true>(i, pBlock);
 201                         pBlock = pNext;
 202                     }
 203
 204                     m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
 205                     m_cachedBlocks[i].pNext = nullptr;
 206                 }
 207             }
 208         }
 209
 210         m_oldCachedSize += m_cachedSize;
 211         m_cachedSize = 0;
 212     }
 213
 214     CachingAllocatorT()
 215     {
 216         for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
 217         {
 218             m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
 219             m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
 220         }
 221     }
 222
 223     ~CachingAllocatorT()
 224     {
 225         // Free all cached blocks
 226         for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
 227         {
 228             ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
 229             while (pBlock)
 230             {
 231                 ArenaBlock* pNext = pBlock->pNext;
 232                 this->DefaultAllocator::Free(pBlock);
 233                 pBlock = pNext;
 234             }
 235             pBlock = m_oldCachedBlocks[i].pNext;
 236             while (pBlock)
 237             {
 238                 ArenaBlock* pNext = pBlock->pNext;
 239                 this->DefaultAllocator::Free(pBlock);
 240                 pBlock = pNext;
 241             }
 242         }
 243     }
 244
 245 private:
 246     static uint32_t GetBucketId(size_t blockSize)
 247     {
 248         uint32_t bucketId = 0;
 249
 250 #if defined(BitScanReverseSizeT)
 251         BitScanReverseSizeT((unsigned long*)&bucketId, (blockSize - 1) >> CACHE_START_BUCKET_BIT);
 252         bucketId = std::min<uint32_t>(bucketId, CACHE_NUM_BUCKETS - 1);
 253 #endif
 254
 255         return bucketId;
 256     }
 257
 258     template <bool OldBlockT = false>
 259     void InsertCachedBlock(uint32_t bucketId, ArenaBlock* pNewBlock)
 260     {
 261         SWR_ASSUME_ASSERT(bucketId < CACHE_NUM_BUCKETS);
 262
 263         ArenaBlock* pPrevBlock = OldBlockT ? &m_oldCachedBlocks[bucketId] : &m_cachedBlocks[bucketId];
 264         ArenaBlock* pBlock = pPrevBlock->pNext;
 265
 266         while (pBlock)
 267         {
 268             if (pNewBlock->blockSize >= pBlock->blockSize)
 269             {
 270                 // Insert here
 271                 break;
 272             }
 273             pPrevBlock = pBlock;
 274             pBlock = pBlock->pNext;
 275         }
 276
 277         // Insert into list
 278         SWR_ASSUME_ASSERT(pPrevBlock);
 279         pPrevBlock->pNext = pNewBlock;
 280         pNewBlock->pNext = pBlock;
 281
 282         if (OldBlockT)
 283         {
 284             if (m_pOldLastCachedBlocks[bucketId] == pPrevBlock)
 285             {
 286                 m_pOldLastCachedBlocks[bucketId] = pNewBlock;
 287             }
 288
 289             m_oldCachedSize += pNewBlock->blockSize;
 290         }
 291         else
 292         {
 293             if (m_pLastCachedBlocks[bucketId] == pPrevBlock)
 294             {
 295                 m_pLastCachedBlocks[bucketId] = pNewBlock;
 296             }
 297
 298             m_cachedSize += pNewBlock->blockSize;
 299         }
 300     }
 301
 302     static ArenaBlock* SearchBlocks(ArenaBlock*& pPrevBlock, size_t blockSize, size_t align)
 303     {
 304         ArenaBlock* pBlock = pPrevBlock->pNext;
 305         ArenaBlock* pPotentialBlock = nullptr;
 306         ArenaBlock* pPotentialPrev = nullptr;
 307
 308         while (pBlock)
 309         {
 310             if (pBlock->blockSize >= blockSize)
 311             {
 312                 if (pBlock == AlignUp(pBlock, align))
 313                 {
 314                     if (pBlock->blockSize == blockSize)
 315                     {
 316                         // Won't find a better match
 317                         break;
 318                     }
 319
 320                     // We could use this as it is larger than we wanted, but
 321                     // continue to search for a better match
 322                     pPotentialBlock = pBlock;
 323                     pPotentialPrev = pPrevBlock;
 324                 }
 325             }
 326             else
 327             {
 328                 // Blocks are sorted by size (biggest first)
 329                 // So, if we get here, there are no blocks
 330                 // large enough, fall through to allocation.
 331                 pBlock = nullptr;
 332                 break;
 333             }
 334
 335             pPrevBlock = pBlock;
 336             pBlock = pBlock->pNext;
 337         }
 338
 339         if (!pBlock)
 340         {
 341             // Couldn't find an exact match, use next biggest size
 342             pBlock = pPotentialBlock;
 343             pPrevBlock = pPotentialPrev;
 344         }
 345
 346         return pBlock;
 347     }
 348
 349     // buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ...
 350     static const uint32_t   CACHE_NUM_BUCKETS       = NumBucketsT;
 351     static const uint32_t   CACHE_START_BUCKET_BIT  = StartBucketBitT;
 352     static const size_t     MAX_UNUSED_SIZE         = sizeof(MEGABYTE);
 353
 354     ArenaBlock              m_cachedBlocks[CACHE_NUM_BUCKETS];
 355     ArenaBlock*             m_pLastCachedBlocks[CACHE_NUM_BUCKETS];
 356     ArenaBlock              m_oldCachedBlocks[CACHE_NUM_BUCKETS];
 357     ArenaBlock*             m_pOldLastCachedBlocks[CACHE_NUM_BUCKETS];
 358     std::mutex              m_mutex;
 359
 360     size_t                  m_totalAllocated = 0;
 361
 362     size_t                  m_cachedSize = 0;
 363     size_t                  m_oldCachedSize = 0;
 364 };
 365 typedef CachingAllocatorT<> CachingAllocator;
 366
 367 template<typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)>
 368 class TArena
 369 {
 370 public:
 371     TArena(T& in_allocator)  : m_allocator(in_allocator) {}
 372     TArena()                 : m_allocator(m_defAllocator) {}
 373     ~TArena()
 374     {
 375         Reset(true);
 376     }
 377
 378     void* AllocAligned(size_t size, size_t  align)
 379     {
 380         if (0 == size)
 381         {
 382             return nullptr;
 383         }
 384
 385         SWR_ASSERT(align <= ARENA_BLOCK_ALIGN);
 386
 387         if (m_pCurBlock)
 388         {
 389             ArenaBlock* pCurBlock = m_pCurBlock;
 390             size_t offset = AlignUp(m_offset, align);
 391
 392             if ((offset + size) <= pCurBlock->blockSize)
 393             {
 394                 void* pMem = PtrAdd(pCurBlock, offset);
 395                 m_offset = offset + size;
 396                 return pMem;
 397             }
 398
 399             // Not enough memory in this block, fall through to allocate
 400             // a new block
 401         }
 402
 403         static const size_t ArenaBlockSize = BlockSizeT;
 404         size_t blockSize = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize);
 405
 406         // Add in one BLOCK_ALIGN unit to store ArenaBlock in.
 407         blockSize = AlignUp(blockSize, ARENA_BLOCK_ALIGN);
 408
 409         ArenaBlock* pNewBlock = m_allocator.AllocateAligned(blockSize, ARENA_BLOCK_ALIGN);    // Arena blocks are always simd byte aligned.
 410         SWR_ASSERT(pNewBlock != nullptr);
 411
 412         if (pNewBlock != nullptr)
 413         {
 414             m_offset = ARENA_BLOCK_ALIGN;
 415             pNewBlock->pNext = m_pCurBlock;
 416
 417             m_pCurBlock = pNewBlock;
 418         }
 419
 420         return AllocAligned(size, align);
 421     }
 422
 423     void* Alloc(size_t  size)
 424     {
 425         return AllocAligned(size, 1);
 426     }
 427
 428     void* AllocAlignedSync(size_t size, size_t align)
 429     {
 430         void* pAlloc = nullptr;
 431
 432         m_mutex.lock();
 433         pAlloc = AllocAligned(size, align);
 434         m_mutex.unlock();
 435
 436         return pAlloc;
 437     }
 438
 439     void* AllocSync(size_t size)
 440     {
 441         void* pAlloc = nullptr;
 442
 443         m_mutex.lock();
 444         pAlloc = Alloc(size);
 445         m_mutex.unlock();
 446
 447         return pAlloc;
 448     }
 449
 450     void Reset(bool removeAll = false)
 451     {
 452         m_offset = ARENA_BLOCK_ALIGN;
 453
 454         if (m_pCurBlock)
 455         {
 456             ArenaBlock *pUsedBlocks = m_pCurBlock->pNext;
 457             m_pCurBlock->pNext = nullptr;
 458             while (pUsedBlocks)
 459             {
 460                 ArenaBlock* pBlock = pUsedBlocks;
 461                 pUsedBlocks = pBlock->pNext;
 462
 463                 m_allocator.Free(pBlock);
 464             }
 465
 466             if (removeAll)
 467             {
 468                 m_allocator.Free(m_pCurBlock);
 469                 m_pCurBlock = nullptr;
 470             }
 471         }
 472     }
 473
 474     bool IsEmpty()
 475     {
 476         return (m_pCurBlock == nullptr) || (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr);
 477     }
 478
 479 private:
 480
 481     ArenaBlock*         m_pCurBlock = nullptr;
 482     size_t              m_offset    = ARENA_BLOCK_ALIGN;
 483
 484     /// @note Mutex is only used by sync allocation functions.
 485     std::mutex          m_mutex;
 486
 487     DefaultAllocator    m_defAllocator;
 488     T&                  m_allocator;
 489 };
 490
 491 using StdArena      = TArena<DefaultAllocator>;
 492 using CachingArena  = TArena<CachingAllocator>;