87d9f42c032664e315b2845eaf75170ad0ca552a
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / tilemgr.cpp
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file tilemgr.cpp
24 *
25 * @brief Implementation for Macro Tile Manager which provides the facilities
26 * for threads to work on an macro tile.
27 *
28 ******************************************************************************/
29 #include <unordered_map>
30
31 #include "fifo.hpp"
32 #include "core/tilemgr.h"
33 #include "core/multisample.h"
34 #include "rdtsc_core.h"
35
36 #define TILE_ID(x,y) ((x << 16 | y))
37
38 MacroTileMgr::MacroTileMgr(CachingArena& arena) : mArena(arena)
39 {
40 }
41
42 void MacroTileMgr::enqueue(uint32_t x, uint32_t y, BE_WORK *pWork)
43 {
44 // Should not enqueue more then what we have backing for in the hot tile manager.
45 SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
46 SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
47
48 if ((x & ~(KNOB_NUM_HOT_TILES_X-1)) | (y & ~(KNOB_NUM_HOT_TILES_Y-1)))
49 {
50 return;
51 }
52
53 uint32_t id = TILE_ID(x, y);
54
55 MacroTileQueue &tile = mTiles[id];
56 tile.mWorkItemsFE++;
57
58 if (tile.mWorkItemsFE == 1)
59 {
60 tile.clear(mArena);
61 mDirtyTiles.push_back(id);
62 }
63
64 mWorkItemsProduced++;
65 tile.enqueue_try_nosync(mArena, pWork);
66 }
67
68 void MacroTileMgr::markTileComplete(uint32_t id)
69 {
70 SWR_ASSERT(mTiles.find(id) != mTiles.end());
71 MacroTileQueue &tile = mTiles[id];
72 uint32_t numTiles = tile.mWorkItemsFE;
73 InterlockedExchangeAdd(&mWorkItemsConsumed, numTiles);
74
75 _ReadWriteBarrier();
76 tile.mWorkItemsBE += numTiles;
77 SWR_ASSERT(tile.mWorkItemsFE == tile.mWorkItemsBE);
78
79 // clear out tile, but defer fifo clear until the next DC first queues to it.
80 // this prevents worker threads from constantly locking a completed macro tile
81 tile.mWorkItemsFE = 0;
82 tile.mWorkItemsBE = 0;
83 }
84
85 HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples,
86 uint32_t renderTargetArrayIndex)
87 {
88 uint32_t x, y;
89 MacroTileMgr::getTileIndices(macroID, x, y);
90
91 SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
92 SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
93
94 HotTileSet &tile = mHotTiles[x][y];
95 HOTTILE& hotTile = tile.Attachment[attachment];
96 if (hotTile.pBuffer == NULL)
97 {
98 if (create)
99 {
100 uint32_t size = numSamples * mHotTileSize[attachment];
101 uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask);
102 hotTile.pBuffer = (uint8_t*)AllocHotTileMem(size, KNOB_SIMD_WIDTH * 4, numaNode);
103 hotTile.state = HOTTILE_INVALID;
104 hotTile.numSamples = numSamples;
105 hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
106 }
107 else
108 {
109 return NULL;
110 }
111 }
112 else
113 {
114 // free the old tile and create a new one with enough space to hold all samples
115 if (numSamples > hotTile.numSamples)
116 {
117 // tile should be either uninitialized or resolved if we're deleting and switching to a
118 // new sample count
119 SWR_ASSERT((hotTile.state == HOTTILE_INVALID) ||
120 (hotTile.state == HOTTILE_RESOLVED) ||
121 (hotTile.state == HOTTILE_CLEAR));
122 FreeHotTileMem(hotTile.pBuffer);
123
124 uint32_t size = numSamples * mHotTileSize[attachment];
125 uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask);
126 hotTile.pBuffer = (uint8_t*)AllocHotTileMem(size, KNOB_SIMD_WIDTH * 4, numaNode);
127 hotTile.state = HOTTILE_INVALID;
128 hotTile.numSamples = numSamples;
129 }
130
131 // if requested render target array index isn't currently loaded, need to store out the current hottile
132 // and load the requested array slice
133 if (renderTargetArrayIndex != hotTile.renderTargetArrayIndex)
134 {
135 SWR_FORMAT format;
136 switch (attachment)
137 {
138 case SWR_ATTACHMENT_COLOR0:
139 case SWR_ATTACHMENT_COLOR1:
140 case SWR_ATTACHMENT_COLOR2:
141 case SWR_ATTACHMENT_COLOR3:
142 case SWR_ATTACHMENT_COLOR4:
143 case SWR_ATTACHMENT_COLOR5:
144 case SWR_ATTACHMENT_COLOR6:
145 case SWR_ATTACHMENT_COLOR7: format = KNOB_COLOR_HOT_TILE_FORMAT; break;
146 case SWR_ATTACHMENT_DEPTH: format = KNOB_DEPTH_HOT_TILE_FORMAT; break;
147 case SWR_ATTACHMENT_STENCIL: format = KNOB_STENCIL_HOT_TILE_FORMAT; break;
148 default: SWR_ASSERT(false, "Unknown attachment: %d", attachment); format = KNOB_COLOR_HOT_TILE_FORMAT; break;
149 }
150
151 if (hotTile.state == HOTTILE_DIRTY)
152 {
153 pContext->pfnStoreTile(GetPrivateState(pDC), format, attachment,
154 x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, hotTile.renderTargetArrayIndex, hotTile.pBuffer);
155 }
156
157 pContext->pfnLoadTile(GetPrivateState(pDC), format, attachment,
158 x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, renderTargetArrayIndex, hotTile.pBuffer);
159
160 hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
161 hotTile.state = HOTTILE_DIRTY;
162 }
163 }
164 return &tile.Attachment[attachment];
165 }
166
167 HOTTILE* HotTileMgr::GetHotTileNoLoad(
168 SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID,
169 SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples)
170 {
171 uint32_t x, y;
172 MacroTileMgr::getTileIndices(macroID, x, y);
173
174 SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
175 SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
176
177 HotTileSet &tile = mHotTiles[x][y];
178 HOTTILE& hotTile = tile.Attachment[attachment];
179 if (hotTile.pBuffer == NULL)
180 {
181 if (create)
182 {
183 uint32_t size = numSamples * mHotTileSize[attachment];
184 hotTile.pBuffer = (uint8_t*)_aligned_malloc(size, KNOB_SIMD_WIDTH * 4);
185 hotTile.state = HOTTILE_INVALID;
186 hotTile.numSamples = numSamples;
187 hotTile.renderTargetArrayIndex = 0;
188 }
189 else
190 {
191 return NULL;
192 }
193 }
194
195 return &hotTile;
196 }
197
198 void HotTileMgr::ClearColorHotTile(const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
199 {
200 // Load clear color into SIMD register...
201 float *pClearData = (float*)(pHotTile->clearData);
202 simdscalar valR = _simd_broadcast_ss(&pClearData[0]);
203 simdscalar valG = _simd_broadcast_ss(&pClearData[1]);
204 simdscalar valB = _simd_broadcast_ss(&pClearData[2]);
205 simdscalar valA = _simd_broadcast_ss(&pClearData[3]);
206
207 float *pfBuf = (float*)pHotTile->pBuffer;
208 uint32_t numSamples = pHotTile->numSamples;
209
210 for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
211 {
212 for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
213 {
214 for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM) //SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM); si++)
215 {
216 _simd_store_ps(pfBuf, valR);
217 pfBuf += KNOB_SIMD_WIDTH;
218 _simd_store_ps(pfBuf, valG);
219 pfBuf += KNOB_SIMD_WIDTH;
220 _simd_store_ps(pfBuf, valB);
221 pfBuf += KNOB_SIMD_WIDTH;
222 _simd_store_ps(pfBuf, valA);
223 pfBuf += KNOB_SIMD_WIDTH;
224 }
225 }
226 }
227 }
228
229 void HotTileMgr::ClearDepthHotTile(const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
230 {
231 // Load clear color into SIMD register...
232 float *pClearData = (float*)(pHotTile->clearData);
233 simdscalar valZ = _simd_broadcast_ss(&pClearData[0]);
234
235 float *pfBuf = (float*)pHotTile->pBuffer;
236 uint32_t numSamples = pHotTile->numSamples;
237
238 for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
239 {
240 for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
241 {
242 for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM)
243 {
244 _simd_store_ps(pfBuf, valZ);
245 pfBuf += KNOB_SIMD_WIDTH;
246 }
247 }
248 }
249 }
250
251 void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile)
252 {
253 // convert from F32 to U8.
254 uint8_t clearVal = (uint8_t)(pHotTile->clearData[0]);
255 //broadcast 32x into __m256i...
256 simdscalari valS = _simd_set1_epi8(clearVal);
257
258 simdscalari* pBuf = (simdscalari*)pHotTile->pBuffer;
259 uint32_t numSamples = pHotTile->numSamples;
260
261 for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
262 {
263 for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
264 {
265 // We're putting 4 pixels in each of the 32-bit slots, so increment 4 times as quickly.
266 for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM * 4)
267 {
268 _simd_store_si(pBuf, valS);
269 pBuf += 1;
270 }
271 }
272 }
273 }
274
275 //////////////////////////////////////////////////////////////////////////
276 /// @brief InitializeHotTiles
277 /// for draw calls, we initialize the active hot tiles and perform deferred
278 /// load on them if tile is in invalid state. we do this in the outer thread
279 /// loop instead of inside the draw routine itself mainly for performance,
280 /// to avoid unnecessary setup every triangle
281 /// @todo support deferred clear
282 /// @param pCreateInfo - pointer to creation info.
283 void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID)
284 {
285 const API_STATE& state = GetApiState(pDC);
286
287 uint32_t x, y;
288 MacroTileMgr::getTileIndices(macroID, x, y);
289 x *= KNOB_MACROTILE_X_DIM;
290 y *= KNOB_MACROTILE_Y_DIM;
291
292 uint32_t numSamples = GetNumSamples(state.rastState.sampleCount);
293
294 // check RT if enabled
295 unsigned long rtSlot = 0;
296 uint32_t colorHottileEnableMask = state.colorHottileEnable;
297 while (_BitScanForward(&rtSlot, colorHottileEnableMask))
298 {
299 HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true, numSamples);
300
301 if (pHotTile->state == HOTTILE_INVALID)
302 {
303 RDTSC_START(BELoadTiles);
304 // invalid hottile before draw requires a load from surface before we can draw to it
305 pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
306 pHotTile->state = HOTTILE_DIRTY;
307 RDTSC_STOP(BELoadTiles, 0, 0);
308 }
309 else if (pHotTile->state == HOTTILE_CLEAR)
310 {
311 RDTSC_START(BELoadTiles);
312 // Clear the tile.
313 ClearColorHotTile(pHotTile);
314 pHotTile->state = HOTTILE_DIRTY;
315 RDTSC_STOP(BELoadTiles, 0, 0);
316 }
317 colorHottileEnableMask &= ~(1 << rtSlot);
318 }
319
320 // check depth if enabled
321 if (state.depthHottileEnable)
322 {
323 HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
324 if (pHotTile->state == HOTTILE_INVALID)
325 {
326 RDTSC_START(BELoadTiles);
327 // invalid hottile before draw requires a load from surface before we can draw to it
328 pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
329 pHotTile->state = HOTTILE_DIRTY;
330 RDTSC_STOP(BELoadTiles, 0, 0);
331 }
332 else if (pHotTile->state == HOTTILE_CLEAR)
333 {
334 RDTSC_START(BELoadTiles);
335 // Clear the tile.
336 ClearDepthHotTile(pHotTile);
337 pHotTile->state = HOTTILE_DIRTY;
338 RDTSC_STOP(BELoadTiles, 0, 0);
339 }
340 }
341
342 // check stencil if enabled
343 if (state.stencilHottileEnable)
344 {
345 HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
346 if (pHotTile->state == HOTTILE_INVALID)
347 {
348 RDTSC_START(BELoadTiles);
349 // invalid hottile before draw requires a load from surface before we can draw to it
350 pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
351 pHotTile->state = HOTTILE_DIRTY;
352 RDTSC_STOP(BELoadTiles, 0, 0);
353 }
354 else if (pHotTile->state == HOTTILE_CLEAR)
355 {
356 RDTSC_START(BELoadTiles);
357 // Clear the tile.
358 ClearStencilHotTile(pHotTile);
359 pHotTile->state = HOTTILE_DIRTY;
360 RDTSC_STOP(BELoadTiles, 0, 0);
361 }
362 }
363 }