- Change tilemgr TILE_ID encoding to use Morton-order (Z-order).
- Change locked tiles set to bitset. Makes clear, set, get much faster.
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
#include "core/tilemgr.h"
#include "core/clip.h"
#include "core/utils.h"
+#include "core/tileset.h"
#include "common/os.h"
BindApiThread(pContext, 0);
}
+ if (pContext->threadInfo.SINGLE_THREADED)
+ {
+ pContext->pSingleThreadLockedTiles = new TileSet();
+ }
+
pContext->ppScratch = new uint8_t*[pContext->NumWorkerThreads];
pContext->pStats = (SWR_STATS*)AlignedMalloc(sizeof(SWR_STATS) * pContext->NumWorkerThreads, 64);
{
uint32_t curDraw[2] = { pContext->pCurDrawContext->drawId, pContext->pCurDrawContext->drawId };
WorkOnFifoFE(pContext, 0, curDraw[0]);
- WorkOnFifoBE(pContext, 0, curDraw[1], pContext->singleThreadLockedTiles, 0, 0);
+ WorkOnFifoBE(pContext, 0, curDraw[1], *pContext->pSingleThreadLockedTiles, 0, 0);
}
else
{
delete[] pContext->ppScratch;
AlignedFree(pContext->pStats);
- delete(pContext->pHotTileMgr);
+ delete pContext->pHotTileMgr;
+ delete pContext->pSingleThreadLockedTiles;
pContext->~SWR_CONTEXT();
AlignedFree(GetContext(hContext));
uint32_t lastFrameChecked;
uint64_t lastDrawChecked;
- TileSet singleThreadLockedTiles;
+ TileSet* pSingleThreadLockedTiles;
// ArchRast thread contexts.
HANDLE* pArContext;
#include "rasterizer.h"
#include "rdtsc_core.h"
#include "tilemgr.h"
+#include "tileset.h"
}
// can only work on this draw if it's not in use by other threads
- if (lockedTiles.find(tileID) != lockedTiles.end())
+ if (lockedTiles.get(tileID))
{
continue;
}
else
{
// This tile is already locked. So let's add it to our locked tiles set. This way we don't try locking this one again.
- lockedTiles.insert(tileID);
+ lockedTiles.set(tileID);
}
}
}
THREAD_DATA *pApiThreadData;
};
-typedef std::unordered_set<uint32_t> TileSet;
+struct TileSet;
void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool);
void StartThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool);
#include "core/multisample.h"
#include "rdtsc_core.h"
-#define TILE_ID(x,y) ((x << 16 | y))
-
MacroTileMgr::MacroTileMgr(CachingArena& arena) : mArena(arena)
{
}
return;
}
- uint32_t id = TILE_ID(x, y);
+ uint32_t id = getTileId(x, y);
+
+ if (id >= mTiles.size())
+ {
+ mTiles.resize((16 + id) * 2);
+ }
- MacroTileQueue &tile = mTiles[id];
- tile.mWorkItemsFE++;
- tile.mId = id;
+ MacroTileQueue *pTile = mTiles[id];
+ if (!pTile)
+ {
+ pTile = mTiles[id] = new MacroTileQueue();
+ }
+ pTile->mWorkItemsFE++;
+ pTile->mId = id;
- if (tile.mWorkItemsFE == 1)
+ if (pTile->mWorkItemsFE == 1)
{
- tile.clear(mArena);
- mDirtyTiles.push_back(&tile);
+ pTile->clear(mArena);
+ mDirtyTiles.push_back(pTile);
}
mWorkItemsProduced++;
- tile.enqueue_try_nosync(mArena, pWork);
+ pTile->enqueue_try_nosync(mArena, pWork);
}
void MacroTileMgr::markTileComplete(uint32_t id)
{
- SWR_ASSERT(mTiles.find(id) != mTiles.end());
- MacroTileQueue &tile = mTiles[id];
+ SWR_ASSERT(mTiles.size() > id);
+ MacroTileQueue &tile = *mTiles[id];
uint32_t numTiles = tile.mWorkItemsFE;
InterlockedExchangeAdd(&mWorkItemsConsumed, numTiles);
#include <set>
#include <unordered_map>
#include "common/formats.h"
+#include "common/intrin.h"
#include "fifo.hpp"
#include "context.h"
#include "format_traits.h"
struct MacroTileQueue
{
MacroTileQueue() { }
- ~MacroTileQueue() { }
+ ~MacroTileQueue() { destroy(); }
//////////////////////////////////////////////////////////////////////////
/// @brief Returns number of work items queued for this tile.
MacroTileMgr(CachingArena& arena);
~MacroTileMgr()
{
- for (auto &tile : mTiles)
+ for (auto *pTile : mTiles)
{
- tile.second.destroy();
+ delete pTile;
}
}
static INLINE void getTileIndices(uint32_t tileID, uint32_t &x, uint32_t &y)
{
- y = tileID & 0xffff;
- x = (tileID >> 16) & 0xffff;
+ // Morton / Z order of tiles
+ x = pext_u32(tileID, 0x55555555);
+ y = pext_u32(tileID, 0xAAAAAAAA);
+ }
+
+ static INLINE uint32_t getTileId(uint32_t x, uint32_t y)
+ {
+ // Morton / Z order of tiles
+ return pdep_u32(x, 0x55555555) | pdep_u32(y, 0xAAAAAAAA);
}
private:
CachingArena& mArena;
- std::unordered_map<uint32_t, MacroTileQueue> mTiles;
+ std::vector<MacroTileQueue*> mTiles;
// Any tile that has work queued to it is a dirty tile.
std::vector<MacroTileQueue*> mDirtyTiles;
--- /dev/null
+/****************************************************************************
+* Copyright (C) 2018 Intel Corporation. All Rights Reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+*
+* @file tileset.h
+*
+* @brief Custom bitset class for managing locked tiles
+*
+******************************************************************************/
+#pragma once
+
+struct TileSet
+{
+ ~TileSet()
+ {
+ if (m_bits)
+ {
+ AlignedFree(m_bits);
+ }
+ }
+ INLINE void set(size_t idx)
+ {
+ _grow(idx);
+ size_t& word = _get_word(idx);
+ word |= (size_t(1) << (idx & BITS_OFFSET));
+ m_maxSet = std::max(m_maxSet, idx + 1);
+ }
+ INLINE bool get(size_t idx)
+ {
+ if (idx >= m_size)
+ {
+ return false;
+ }
+ size_t word = _get_word(idx);
+ return 0 != (word & (size_t(1) << (idx & BITS_OFFSET)));
+ }
+
+ INLINE void clear()
+ {
+ if (m_maxSet)
+ {
+ size_t num_words = (m_maxSet + BITS_OFFSET) / BITS_PER_WORD;
+ memset(m_bits, 0, sizeof(size_t) * num_words);
+ m_maxSet = 0;
+ }
+ }
+
+private:
+ static const size_t BITS_PER_WORD = sizeof(size_t) * 8;
+ static const size_t BITS_OFFSET = BITS_PER_WORD - 1;
+
+ size_t m_size = 0;
+ size_t m_maxSet = 0;
+ size_t* m_bits = nullptr;
+
+ INLINE size_t& _get_word(size_t idx)
+ {
+ return m_bits[idx / BITS_PER_WORD];
+ }
+
+ void _grow(size_t idx)
+ {
+ if (idx < m_size)
+ {
+ return;
+ }
+
+ size_t new_size = (1 + idx + BITS_OFFSET) & ~BITS_OFFSET;
+ size_t num_words = new_size / BITS_PER_WORD;
+ size_t* newBits = (size_t*)AlignedMalloc(sizeof(size_t) * num_words, 64);
+ size_t copy_words = 0;
+
+ if (m_bits)
+ {
+ copy_words = (m_size + BITS_OFFSET) / BITS_PER_WORD;
+ num_words -= copy_words;
+ memcpy(newBits, m_bits, copy_words * sizeof(size_t));
+
+ AlignedFree(m_bits);
+ }
+
+ m_bits = newBits;
+ m_size = new_size;
+
+ memset(&m_bits[copy_words], 0, sizeof(size_t) * num_words);
+ }
+};