1 /****************************************************************************
2 * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Implementation for Macro Tile Manager which provides the facilities
26 * for threads to work on an macro tile.
28 ******************************************************************************/
29 #include <unordered_map>
32 #include "core/tilemgr.h"
33 #include "core/multisample.h"
34 #include "rdtsc_core.h"
36 MacroTileMgr::MacroTileMgr(CachingArena
& arena
) : mArena(arena
) {}
38 void MacroTileMgr::enqueue(uint32_t x
, uint32_t y
, BE_WORK
* pWork
)
40 // Should not enqueue more then what we have backing for in the hot tile manager.
41 SWR_ASSERT(x
< KNOB_NUM_HOT_TILES_X
);
42 SWR_ASSERT(y
< KNOB_NUM_HOT_TILES_Y
);
44 if ((x
& ~(KNOB_NUM_HOT_TILES_X
- 1)) | (y
& ~(KNOB_NUM_HOT_TILES_Y
- 1)))
49 uint32_t id
= getTileId(x
, y
);
51 if (id
>= mTiles
.size())
53 mTiles
.resize((16 + id
) * 2);
56 MacroTileQueue
* pTile
= mTiles
[id
];
59 pTile
= mTiles
[id
] = new MacroTileQueue();
61 pTile
->mWorkItemsFE
++;
64 if (pTile
->mWorkItemsFE
== 1)
67 mDirtyTiles
.push_back(pTile
);
71 pTile
->enqueue_try_nosync(mArena
, pWork
);
74 void MacroTileMgr::markTileComplete(uint32_t id
)
76 SWR_ASSERT(mTiles
.size() > id
);
77 MacroTileQueue
& tile
= *mTiles
[id
];
78 uint32_t numTiles
= tile
.mWorkItemsFE
;
79 InterlockedExchangeAdd(&mWorkItemsConsumed
, numTiles
);
82 tile
.mWorkItemsBE
+= numTiles
;
83 SWR_ASSERT(tile
.mWorkItemsFE
== tile
.mWorkItemsBE
);
85 // clear out tile, but defer fifo clear until the next DC first queues to it.
86 // this prevents worker threads from constantly locking a completed macro tile
87 tile
.mWorkItemsFE
= 0;
88 tile
.mWorkItemsBE
= 0;
91 HOTTILE
* HotTileMgr::GetHotTile(SWR_CONTEXT
* pContext
,
93 HANDLE hWorkerPrivateData
,
95 SWR_RENDERTARGET_ATTACHMENT attachment
,
98 uint32_t renderTargetArrayIndex
)
101 MacroTileMgr::getTileIndices(macroID
, x
, y
);
103 SWR_ASSERT(x
< KNOB_NUM_HOT_TILES_X
);
104 SWR_ASSERT(y
< KNOB_NUM_HOT_TILES_Y
);
106 HotTileSet
& tile
= mHotTiles
[x
][y
];
107 HOTTILE
& hotTile
= tile
.Attachment
[attachment
];
108 if (hotTile
.pBuffer
== NULL
)
112 uint32_t size
= numSamples
* mHotTileSize
[attachment
];
113 uint32_t numaNode
= ((x
^ y
) & pContext
->threadPool
.numaMask
);
115 (uint8_t*)AllocHotTileMem(size
, 64, numaNode
+ pContext
->threadInfo
.BASE_NUMA_NODE
);
116 hotTile
.state
= HOTTILE_INVALID
;
117 hotTile
.numSamples
= numSamples
;
118 hotTile
.renderTargetArrayIndex
= renderTargetArrayIndex
;
127 // free the old tile and create a new one with enough space to hold all samples
128 if (numSamples
> hotTile
.numSamples
)
130 // tile should be either uninitialized or resolved if we're deleting and switching to a
132 SWR_ASSERT((hotTile
.state
== HOTTILE_INVALID
) || (hotTile
.state
== HOTTILE_RESOLVED
) ||
133 (hotTile
.state
== HOTTILE_CLEAR
));
134 FreeHotTileMem(hotTile
.pBuffer
);
136 uint32_t size
= numSamples
* mHotTileSize
[attachment
];
137 uint32_t numaNode
= ((x
^ y
) & pContext
->threadPool
.numaMask
);
139 (uint8_t*)AllocHotTileMem(size
, 64, numaNode
+ pContext
->threadInfo
.BASE_NUMA_NODE
);
140 hotTile
.state
= HOTTILE_INVALID
;
141 hotTile
.numSamples
= numSamples
;
144 // if requested render target array index isn't currently loaded, need to store out the
145 // current hottile and load the requested array slice
146 if (renderTargetArrayIndex
!= hotTile
.renderTargetArrayIndex
)
151 case SWR_ATTACHMENT_COLOR0
:
152 case SWR_ATTACHMENT_COLOR1
:
153 case SWR_ATTACHMENT_COLOR2
:
154 case SWR_ATTACHMENT_COLOR3
:
155 case SWR_ATTACHMENT_COLOR4
:
156 case SWR_ATTACHMENT_COLOR5
:
157 case SWR_ATTACHMENT_COLOR6
:
158 case SWR_ATTACHMENT_COLOR7
:
159 format
= KNOB_COLOR_HOT_TILE_FORMAT
;
161 case SWR_ATTACHMENT_DEPTH
:
162 format
= KNOB_DEPTH_HOT_TILE_FORMAT
;
164 case SWR_ATTACHMENT_STENCIL
:
165 format
= KNOB_STENCIL_HOT_TILE_FORMAT
;
168 SWR_INVALID("Unknown attachment: %d", attachment
);
169 format
= KNOB_COLOR_HOT_TILE_FORMAT
;
173 if (hotTile
.state
== HOTTILE_CLEAR
)
175 if (attachment
== SWR_ATTACHMENT_STENCIL
)
176 ClearStencilHotTile(&hotTile
);
177 else if (attachment
== SWR_ATTACHMENT_DEPTH
)
178 ClearDepthHotTile(&hotTile
);
180 ClearColorHotTile(&hotTile
);
182 hotTile
.state
= HOTTILE_DIRTY
;
185 if (hotTile
.state
== HOTTILE_DIRTY
)
187 pContext
->pfnStoreTile(GetPrivateState(pDC
),
191 x
* KNOB_MACROTILE_X_DIM
,
192 y
* KNOB_MACROTILE_Y_DIM
,
193 hotTile
.renderTargetArrayIndex
,
197 pContext
->pfnLoadTile(GetPrivateState(pDC
),
201 x
* KNOB_MACROTILE_X_DIM
,
202 y
* KNOB_MACROTILE_Y_DIM
,
203 renderTargetArrayIndex
,
206 hotTile
.renderTargetArrayIndex
= renderTargetArrayIndex
;
207 hotTile
.state
= HOTTILE_DIRTY
;
210 return &tile
.Attachment
[attachment
];
213 HOTTILE
* HotTileMgr::GetHotTileNoLoad(SWR_CONTEXT
* pContext
,
216 SWR_RENDERTARGET_ATTACHMENT attachment
,
221 MacroTileMgr::getTileIndices(macroID
, x
, y
);
223 SWR_ASSERT(x
< KNOB_NUM_HOT_TILES_X
);
224 SWR_ASSERT(y
< KNOB_NUM_HOT_TILES_Y
);
226 HotTileSet
& tile
= mHotTiles
[x
][y
];
227 HOTTILE
& hotTile
= tile
.Attachment
[attachment
];
228 if (hotTile
.pBuffer
== NULL
)
232 uint32_t size
= numSamples
* mHotTileSize
[attachment
];
233 hotTile
.pBuffer
= (uint8_t*)AlignedMalloc(size
, 64);
234 hotTile
.state
= HOTTILE_INVALID
;
235 hotTile
.numSamples
= numSamples
;
236 hotTile
.renderTargetArrayIndex
= 0;
247 void HotTileMgr::ClearColorHotTile(
248 const HOTTILE
* pHotTile
) // clear a macro tile from float4 clear data.
250 // Load clear color into SIMD register...
251 float* pClearData
= (float*)(pHotTile
->clearData
);
252 simd16scalar valR
= _simd16_broadcast_ss(&pClearData
[0]);
253 simd16scalar valG
= _simd16_broadcast_ss(&pClearData
[1]);
254 simd16scalar valB
= _simd16_broadcast_ss(&pClearData
[2]);
255 simd16scalar valA
= _simd16_broadcast_ss(&pClearData
[3]);
257 float* pfBuf
= (float*)pHotTile
->pBuffer
;
258 uint32_t numSamples
= pHotTile
->numSamples
;
260 for (uint32_t row
= 0; row
< KNOB_MACROTILE_Y_DIM
; row
+= KNOB_TILE_Y_DIM
)
262 for (uint32_t col
= 0; col
< KNOB_MACROTILE_X_DIM
; col
+= KNOB_TILE_X_DIM
)
264 for (uint32_t si
= 0; si
< (KNOB_TILE_X_DIM
* KNOB_TILE_Y_DIM
* numSamples
);
265 si
+= SIMD16_TILE_X_DIM
* SIMD16_TILE_Y_DIM
)
267 _simd16_store_ps(pfBuf
, valR
);
268 pfBuf
+= KNOB_SIMD16_WIDTH
;
270 _simd16_store_ps(pfBuf
, valG
);
271 pfBuf
+= KNOB_SIMD16_WIDTH
;
273 _simd16_store_ps(pfBuf
, valB
);
274 pfBuf
+= KNOB_SIMD16_WIDTH
;
276 _simd16_store_ps(pfBuf
, valA
);
277 pfBuf
+= KNOB_SIMD16_WIDTH
;
283 void HotTileMgr::ClearDepthHotTile(
284 const HOTTILE
* pHotTile
) // clear a macro tile from float4 clear data.
286 // Load clear color into SIMD register...
287 float* pClearData
= (float*)(pHotTile
->clearData
);
288 simd16scalar valZ
= _simd16_broadcast_ss(&pClearData
[0]);
290 float* pfBuf
= (float*)pHotTile
->pBuffer
;
291 uint32_t numSamples
= pHotTile
->numSamples
;
293 for (uint32_t row
= 0; row
< KNOB_MACROTILE_Y_DIM
; row
+= KNOB_TILE_Y_DIM
)
295 for (uint32_t col
= 0; col
< KNOB_MACROTILE_X_DIM
; col
+= KNOB_TILE_X_DIM
)
297 for (uint32_t si
= 0; si
< (KNOB_TILE_X_DIM
* KNOB_TILE_Y_DIM
* numSamples
);
298 si
+= SIMD16_TILE_X_DIM
* SIMD16_TILE_Y_DIM
)
300 _simd16_store_ps(pfBuf
, valZ
);
301 pfBuf
+= KNOB_SIMD16_WIDTH
;
307 void HotTileMgr::ClearStencilHotTile(const HOTTILE
* pHotTile
)
309 // convert from F32 to U8.
310 uint8_t clearVal
= (uint8_t)(pHotTile
->clearData
[0]);
311 // broadcast 32x into __m256i...
312 simd16scalari valS
= _simd16_set1_epi8(clearVal
);
314 simd16scalari
* pBuf
= (simd16scalari
*)pHotTile
->pBuffer
;
315 uint32_t numSamples
= pHotTile
->numSamples
;
317 for (uint32_t row
= 0; row
< KNOB_MACROTILE_Y_DIM
; row
+= KNOB_TILE_Y_DIM
)
319 for (uint32_t col
= 0; col
< KNOB_MACROTILE_X_DIM
; col
+= KNOB_TILE_X_DIM
)
321 // We're putting 4 pixels in each of the 32-bit slots, so increment 4 times as quickly.
322 for (uint32_t si
= 0; si
< (KNOB_TILE_X_DIM
* KNOB_TILE_Y_DIM
* numSamples
);
323 si
+= SIMD16_TILE_X_DIM
* SIMD16_TILE_Y_DIM
* 4)
325 _simd16_store_si(pBuf
, valS
);
332 //////////////////////////////////////////////////////////////////////////
333 /// @brief InitializeHotTiles
334 /// for draw calls, we initialize the active hot tiles and perform deferred
335 /// load on them if tile is in invalid state. we do this in the outer thread
336 /// loop instead of inside the draw routine itself mainly for performance,
337 /// to avoid unnecessary setup every triangle
338 /// @todo support deferred clear
339 /// @param pCreateInfo - pointer to creation info.
340 void HotTileMgr::InitializeHotTiles(SWR_CONTEXT
* pContext
,
345 const API_STATE
& state
= GetApiState(pDC
);
346 HANDLE hWorkerPrivateData
= pDC
->pContext
->threadPool
.pThreadData
[workerId
].pWorkerPrivateData
;
349 MacroTileMgr::getTileIndices(macroID
, x
, y
);
350 x
*= KNOB_MACROTILE_X_DIM
;
351 y
*= KNOB_MACROTILE_Y_DIM
;
353 uint32_t numSamples
= GetNumSamples(state
.rastState
.sampleCount
);
355 // check RT if enabled
356 unsigned long rtSlot
= 0;
357 uint32_t colorHottileEnableMask
= state
.colorHottileEnable
;
358 while (_BitScanForward(&rtSlot
, colorHottileEnableMask
))
365 (SWR_RENDERTARGET_ATTACHMENT
)(SWR_ATTACHMENT_COLOR0
+ rtSlot
),
369 if (pHotTile
->state
== HOTTILE_INVALID
)
371 RDTSC_BEGIN(pContext
->pBucketMgr
, BELoadTiles
, pDC
->drawId
);
372 // invalid hottile before draw requires a load from surface before we can draw to it
373 pContext
->pfnLoadTile(GetPrivateState(pDC
),
375 KNOB_COLOR_HOT_TILE_FORMAT
,
376 (SWR_RENDERTARGET_ATTACHMENT
)(SWR_ATTACHMENT_COLOR0
+ rtSlot
),
379 pHotTile
->renderTargetArrayIndex
,
381 pHotTile
->state
= HOTTILE_DIRTY
;
382 RDTSC_END(pContext
->pBucketMgr
, BELoadTiles
, 0);
384 else if (pHotTile
->state
== HOTTILE_CLEAR
)
386 RDTSC_BEGIN(pContext
->pBucketMgr
, BELoadTiles
, pDC
->drawId
);
388 ClearColorHotTile(pHotTile
);
389 pHotTile
->state
= HOTTILE_DIRTY
;
390 RDTSC_END(pContext
->pBucketMgr
, BELoadTiles
, 0);
392 colorHottileEnableMask
&= ~(1 << rtSlot
);
395 // check depth if enabled
396 if (state
.depthHottileEnable
)
398 HOTTILE
* pHotTile
= GetHotTile(
399 pContext
, pDC
, hWorkerPrivateData
, macroID
, SWR_ATTACHMENT_DEPTH
, true, numSamples
);
400 if (pHotTile
->state
== HOTTILE_INVALID
)
402 RDTSC_BEGIN(pContext
->pBucketMgr
, BELoadTiles
, pDC
->drawId
);
403 // invalid hottile before draw requires a load from surface before we can draw to it
404 pContext
->pfnLoadTile(GetPrivateState(pDC
),
406 KNOB_DEPTH_HOT_TILE_FORMAT
,
407 SWR_ATTACHMENT_DEPTH
,
410 pHotTile
->renderTargetArrayIndex
,
412 pHotTile
->state
= HOTTILE_DIRTY
;
413 RDTSC_END(pContext
->pBucketMgr
, BELoadTiles
, 0);
415 else if (pHotTile
->state
== HOTTILE_CLEAR
)
417 RDTSC_BEGIN(pContext
->pBucketMgr
, BELoadTiles
, pDC
->drawId
);
419 ClearDepthHotTile(pHotTile
);
420 pHotTile
->state
= HOTTILE_DIRTY
;
421 RDTSC_END(pContext
->pBucketMgr
, BELoadTiles
, 0);
425 // check stencil if enabled
426 if (state
.stencilHottileEnable
)
428 HOTTILE
* pHotTile
= GetHotTile(
429 pContext
, pDC
, hWorkerPrivateData
, macroID
, SWR_ATTACHMENT_STENCIL
, true, numSamples
);
430 if (pHotTile
->state
== HOTTILE_INVALID
)
432 RDTSC_BEGIN(pContext
->pBucketMgr
, BELoadTiles
, pDC
->drawId
);
433 // invalid hottile before draw requires a load from surface before we can draw to it
434 pContext
->pfnLoadTile(GetPrivateState(pDC
),
436 KNOB_STENCIL_HOT_TILE_FORMAT
,
437 SWR_ATTACHMENT_STENCIL
,
440 pHotTile
->renderTargetArrayIndex
,
442 pHotTile
->state
= HOTTILE_DIRTY
;
443 RDTSC_END(pContext
->pBucketMgr
, BELoadTiles
, 0);
445 else if (pHotTile
->state
== HOTTILE_CLEAR
)
447 RDTSC_BEGIN(pContext
->pBucketMgr
, BELoadTiles
, pDC
->drawId
);
449 ClearStencilHotTile(pHotTile
);
450 pHotTile
->state
= HOTTILE_DIRTY
;
451 RDTSC_END(pContext
->pBucketMgr
, BELoadTiles
, 0);