#define TILE_ID(x,y) ((x << 16 | y))
-// override new/delete for alignment
-void *MacroTileMgr::operator new(size_t size)
+MacroTileMgr::MacroTileMgr(CachingArena& arena) : mArena(arena)
{
- return _aligned_malloc(size, 64);
-}
-
-void MacroTileMgr::operator delete(void *p)
-{
- _aligned_free(p);
-}
-
-void* DispatchQueue::operator new(size_t size)
-{
- return _aligned_malloc(size, 64);
-}
-
-void DispatchQueue::operator delete(void *p)
-{
- _aligned_free(p);
-}
-
-MacroTileMgr::MacroTileMgr(Arena& arena) : mArena(arena)
-{
-}
-
-void MacroTileMgr::initialize()
-{
- mWorkItemsProduced = 0;
- mWorkItemsConsumed = 0;
-
- mDirtyTiles.clear();
}
void MacroTileMgr::enqueue(uint32_t x, uint32_t y, BE_WORK *pWork)
SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
+ if ((x & ~(KNOB_NUM_HOT_TILES_X-1)) | (y & ~(KNOB_NUM_HOT_TILES_Y-1)))
+ {
+ return;
+ }
+
uint32_t id = TILE_ID(x, y);
MacroTileQueue &tile = mTiles[id];
tile.mWorkItemsFE++;
+ tile.mId = id;
if (tile.mWorkItemsFE == 1)
{
tile.clear(mArena);
- mDirtyTiles.push_back(id);
+ mDirtyTiles.push_back(&tile);
}
mWorkItemsProduced++;
if (create)
{
uint32_t size = numSamples * mHotTileSize[attachment];
- hotTile.pBuffer = (BYTE*)_aligned_malloc(size, KNOB_SIMD_WIDTH * 4);
+ uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask);
+ hotTile.pBuffer = (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE);
hotTile.state = HOTTILE_INVALID;
hotTile.numSamples = numSamples;
hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
SWR_ASSERT((hotTile.state == HOTTILE_INVALID) ||
(hotTile.state == HOTTILE_RESOLVED) ||
(hotTile.state == HOTTILE_CLEAR));
- _aligned_free(hotTile.pBuffer);
+ FreeHotTileMem(hotTile.pBuffer);
uint32_t size = numSamples * mHotTileSize[attachment];
- hotTile.pBuffer = (BYTE*)_aligned_malloc(size, KNOB_SIMD_WIDTH * 4);
+ uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask);
+ hotTile.pBuffer = (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE);
hotTile.state = HOTTILE_INVALID;
hotTile.numSamples = numSamples;
}
case SWR_ATTACHMENT_COLOR7: format = KNOB_COLOR_HOT_TILE_FORMAT; break;
case SWR_ATTACHMENT_DEPTH: format = KNOB_DEPTH_HOT_TILE_FORMAT; break;
case SWR_ATTACHMENT_STENCIL: format = KNOB_STENCIL_HOT_TILE_FORMAT; break;
- default: SWR_ASSERT(false, "Unknown attachment: %d", attachment); format = KNOB_COLOR_HOT_TILE_FORMAT; break;
+ default: SWR_INVALID("Unknown attachment: %d", attachment); format = KNOB_COLOR_HOT_TILE_FORMAT; break;
+ }
+
+ if (hotTile.state == HOTTILE_CLEAR)
+ {
+ if (attachment == SWR_ATTACHMENT_STENCIL)
+ ClearStencilHotTile(&hotTile);
+ else if (attachment == SWR_ATTACHMENT_DEPTH)
+ ClearDepthHotTile(&hotTile);
+ else
+ ClearColorHotTile(&hotTile);
+
+ hotTile.state = HOTTILE_DIRTY;
}
if (hotTile.state == HOTTILE_DIRTY)
return &tile.Attachment[attachment];
}
+HOTTILE* HotTileMgr::GetHotTileNoLoad(
+ SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID,
+ SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples)
+{
+ uint32_t x, y;
+ MacroTileMgr::getTileIndices(macroID, x, y);
+
+ SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
+ SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
+
+ HotTileSet &tile = mHotTiles[x][y];
+ HOTTILE& hotTile = tile.Attachment[attachment];
+ if (hotTile.pBuffer == NULL)
+ {
+ if (create)
+ {
+ uint32_t size = numSamples * mHotTileSize[attachment];
+ hotTile.pBuffer = (uint8_t*)AlignedMalloc(size, 64);
+ hotTile.state = HOTTILE_INVALID;
+ hotTile.numSamples = numSamples;
+ hotTile.renderTargetArrayIndex = 0;
+ }
+ else
+ {
+ return NULL;
+ }
+ }
+
+ return &hotTile;
+}
+
+#if USE_8x2_TILE_BACKEND
+void HotTileMgr::ClearColorHotTile(const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
+{
+ // Load clear color into SIMD register...
+ float *pClearData = (float *)(pHotTile->clearData);
+ simd16scalar valR = _simd16_broadcast_ss(&pClearData[0]);
+ simd16scalar valG = _simd16_broadcast_ss(&pClearData[1]);
+ simd16scalar valB = _simd16_broadcast_ss(&pClearData[2]);
+ simd16scalar valA = _simd16_broadcast_ss(&pClearData[3]);
+
+ float *pfBuf = (float *)pHotTile->pBuffer;
+ uint32_t numSamples = pHotTile->numSamples;
+
+ for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
+ {
+ for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
+ {
+ for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM)
+ {
+ _simd16_store_ps(pfBuf, valR);
+ pfBuf += KNOB_SIMD16_WIDTH;
+
+ _simd16_store_ps(pfBuf, valG);
+ pfBuf += KNOB_SIMD16_WIDTH;
+
+ _simd16_store_ps(pfBuf, valB);
+ pfBuf += KNOB_SIMD16_WIDTH;
+
+ _simd16_store_ps(pfBuf, valA);
+ pfBuf += KNOB_SIMD16_WIDTH;
+ }
+ }
+ }
+}
+
+void HotTileMgr::ClearDepthHotTile(const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
+{
+ // Load clear color into SIMD register...
+ float *pClearData = (float *)(pHotTile->clearData);
+ simd16scalar valZ = _simd16_broadcast_ss(&pClearData[0]);
+
+ float *pfBuf = (float *)pHotTile->pBuffer;
+ uint32_t numSamples = pHotTile->numSamples;
+
+ for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
+ {
+ for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
+ {
+ for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM)
+ {
+ _simd16_store_ps(pfBuf, valZ);
+ pfBuf += KNOB_SIMD16_WIDTH;
+ }
+ }
+ }
+}
+
+void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile)
+{
+ // convert from F32 to U8.
+ uint8_t clearVal = (uint8_t)(pHotTile->clearData[0]);
+ //broadcast 32x into __m256i...
+ simd16scalari valS = _simd16_set1_epi8(clearVal);
+
+ simd16scalari *pBuf = (simd16scalari *)pHotTile->pBuffer;
+ uint32_t numSamples = pHotTile->numSamples;
+
+ for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
+ {
+ for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
+ {
+ // We're putting 4 pixels in each of the 32-bit slots, so increment 4 times as quickly.
+ for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM * 4)
+ {
+ _simd16_store_si(pBuf, valS);
+ pBuf += 1;
+ }
+ }
+ }
+}
+
+#else
void HotTileMgr::ClearColorHotTile(const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
{
// Load clear color into SIMD register...
}
}
+#endif
//////////////////////////////////////////////////////////////////////////
/// @brief InitializeHotTiles
/// for draw calls, we initialize the active hot tiles and perform deferred
/// to avoid unnecessary setup every triangle
/// @todo support deferred clear
/// @param pCreateInfo - pointer to creation info.
-void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID)
+void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID)
{
const API_STATE& state = GetApiState(pDC);
- HotTileMgr *pHotTileMgr = pContext->pHotTileMgr;
uint32_t x, y;
MacroTileMgr::getTileIndices(macroID, x, y);
if (pHotTile->state == HOTTILE_INVALID)
{
- RDTSC_START(BELoadTiles);
+ RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
- RDTSC_STOP(BELoadTiles, 0, 0);
+ RDTSC_END(BELoadTiles, 0);
}
else if (pHotTile->state == HOTTILE_CLEAR)
{
- RDTSC_START(BELoadTiles);
+ RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// Clear the tile.
ClearColorHotTile(pHotTile);
pHotTile->state = HOTTILE_DIRTY;
- RDTSC_STOP(BELoadTiles, 0, 0);
+ RDTSC_END(BELoadTiles, 0);
}
colorHottileEnableMask &= ~(1 << rtSlot);
}
HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
if (pHotTile->state == HOTTILE_INVALID)
{
- RDTSC_START(BELoadTiles);
+ RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
- RDTSC_STOP(BELoadTiles, 0, 0);
+ RDTSC_END(BELoadTiles, 0);
}
else if (pHotTile->state == HOTTILE_CLEAR)
{
- RDTSC_START(BELoadTiles);
+ RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// Clear the tile.
ClearDepthHotTile(pHotTile);
pHotTile->state = HOTTILE_DIRTY;
- RDTSC_STOP(BELoadTiles, 0, 0);
+ RDTSC_END(BELoadTiles, 0);
}
}
HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
if (pHotTile->state == HOTTILE_INVALID)
{
- RDTSC_START(BELoadTiles);
+ RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
- RDTSC_STOP(BELoadTiles, 0, 0);
+ RDTSC_END(BELoadTiles, 0);
}
else if (pHotTile->state == HOTTILE_CLEAR)
{
- RDTSC_START(BELoadTiles);
+ RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// Clear the tile.
ClearStencilHotTile(pHotTile);
pHotTile->state = HOTTILE_DIRTY;
- RDTSC_STOP(BELoadTiles, 0, 0);
+ RDTSC_END(BELoadTiles, 0);
}
}
-}
\ No newline at end of file
+}