swr: [rasterizer core] Move InitializeHotTiles and corresponding clear code out of...
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / tilemgr.cpp
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file tilemgr.cpp
24 *
25 * @brief Implementation for Macro Tile Manager which provides the facilities
26 * for threads to work on an macro tile.
27 *
28 ******************************************************************************/
29 #include <unordered_map>
30
31 #include "fifo.hpp"
32 #include "core/tilemgr.h"
33 #include "core/multisample.h"
34 #include "rdtsc_core.h"
35
36 #define TILE_ID(x,y) ((x << 16 | y))
37
38 // override new/delete for alignment
39 void *MacroTileMgr::operator new(size_t size)
40 {
41 return _aligned_malloc(size, 64);
42 }
43
44 void MacroTileMgr::operator delete(void *p)
45 {
46 _aligned_free(p);
47 }
48
49 void* DispatchQueue::operator new(size_t size)
50 {
51 return _aligned_malloc(size, 64);
52 }
53
54 void DispatchQueue::operator delete(void *p)
55 {
56 _aligned_free(p);
57 }
58
59 MacroTileMgr::MacroTileMgr(Arena& arena) : mArena(arena)
60 {
61 }
62
63 void MacroTileMgr::initialize()
64 {
65 mWorkItemsProduced = 0;
66 mWorkItemsConsumed = 0;
67
68 mDirtyTiles.clear();
69 }
70
71 void MacroTileMgr::enqueue(uint32_t x, uint32_t y, BE_WORK *pWork)
72 {
73 // Should not enqueue more then what we have backing for in the hot tile manager.
74 SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
75 SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
76
77 uint32_t id = TILE_ID(x, y);
78
79 MacroTileQueue &tile = mTiles[id];
80 tile.mWorkItemsFE++;
81
82 if (tile.mWorkItemsFE == 1)
83 {
84 tile.clear(mArena);
85 mDirtyTiles.push_back(id);
86 }
87
88 mWorkItemsProduced++;
89 tile.enqueue_try_nosync(mArena, pWork);
90 }
91
92 void MacroTileMgr::markTileComplete(uint32_t id)
93 {
94 SWR_ASSERT(mTiles.find(id) != mTiles.end());
95 MacroTileQueue &tile = mTiles[id];
96 uint32_t numTiles = tile.mWorkItemsFE;
97 InterlockedExchangeAdd(&mWorkItemsConsumed, numTiles);
98
99 _ReadWriteBarrier();
100 tile.mWorkItemsBE += numTiles;
101 SWR_ASSERT(tile.mWorkItemsFE == tile.mWorkItemsBE);
102
103 // clear out tile, but defer fifo clear until the next DC first queues to it.
104 // this prevents worker threads from constantly locking a completed macro tile
105 tile.mWorkItemsFE = 0;
106 tile.mWorkItemsBE = 0;
107 }
108
109 HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples,
110 uint32_t renderTargetArrayIndex)
111 {
112 uint32_t x, y;
113 MacroTileMgr::getTileIndices(macroID, x, y);
114
115 SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
116 SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
117
118 HotTileSet &tile = mHotTiles[x][y];
119 HOTTILE& hotTile = tile.Attachment[attachment];
120 if (hotTile.pBuffer == NULL)
121 {
122 if (create)
123 {
124 uint32_t size = numSamples * mHotTileSize[attachment];
125 hotTile.pBuffer = (BYTE*)_aligned_malloc(size, KNOB_SIMD_WIDTH * 4);
126 hotTile.state = HOTTILE_INVALID;
127 hotTile.numSamples = numSamples;
128 hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
129 }
130 else
131 {
132 return NULL;
133 }
134 }
135 else
136 {
137 // free the old tile and create a new one with enough space to hold all samples
138 if (numSamples > hotTile.numSamples)
139 {
140 // tile should be either uninitialized or resolved if we're deleting and switching to a
141 // new sample count
142 SWR_ASSERT((hotTile.state == HOTTILE_INVALID) ||
143 (hotTile.state == HOTTILE_RESOLVED) ||
144 (hotTile.state == HOTTILE_CLEAR));
145 _aligned_free(hotTile.pBuffer);
146
147 uint32_t size = numSamples * mHotTileSize[attachment];
148 hotTile.pBuffer = (BYTE*)_aligned_malloc(size, KNOB_SIMD_WIDTH * 4);
149 hotTile.state = HOTTILE_INVALID;
150 hotTile.numSamples = numSamples;
151 }
152
153 // if requested render target array index isn't currently loaded, need to store out the current hottile
154 // and load the requested array slice
155 if (renderTargetArrayIndex != hotTile.renderTargetArrayIndex)
156 {
157 SWR_FORMAT format;
158 switch (attachment)
159 {
160 case SWR_ATTACHMENT_COLOR0:
161 case SWR_ATTACHMENT_COLOR1:
162 case SWR_ATTACHMENT_COLOR2:
163 case SWR_ATTACHMENT_COLOR3:
164 case SWR_ATTACHMENT_COLOR4:
165 case SWR_ATTACHMENT_COLOR5:
166 case SWR_ATTACHMENT_COLOR6:
167 case SWR_ATTACHMENT_COLOR7: format = KNOB_COLOR_HOT_TILE_FORMAT; break;
168 case SWR_ATTACHMENT_DEPTH: format = KNOB_DEPTH_HOT_TILE_FORMAT; break;
169 case SWR_ATTACHMENT_STENCIL: format = KNOB_STENCIL_HOT_TILE_FORMAT; break;
170 default: SWR_ASSERT(false, "Unknown attachment: %d", attachment); format = KNOB_COLOR_HOT_TILE_FORMAT; break;
171 }
172
173 if (hotTile.state == HOTTILE_DIRTY)
174 {
175 pContext->pfnStoreTile(GetPrivateState(pDC), format, attachment,
176 x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, hotTile.renderTargetArrayIndex, hotTile.pBuffer);
177 }
178
179 pContext->pfnLoadTile(GetPrivateState(pDC), format, attachment,
180 x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, renderTargetArrayIndex, hotTile.pBuffer);
181
182 hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
183 hotTile.state = HOTTILE_DIRTY;
184 }
185 }
186 return &tile.Attachment[attachment];
187 }
188
189 void HotTileMgr::ClearColorHotTile(const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
190 {
191 // Load clear color into SIMD register...
192 float *pClearData = (float*)(pHotTile->clearData);
193 simdscalar valR = _simd_broadcast_ss(&pClearData[0]);
194 simdscalar valG = _simd_broadcast_ss(&pClearData[1]);
195 simdscalar valB = _simd_broadcast_ss(&pClearData[2]);
196 simdscalar valA = _simd_broadcast_ss(&pClearData[3]);
197
198 float *pfBuf = (float*)pHotTile->pBuffer;
199 uint32_t numSamples = pHotTile->numSamples;
200
201 for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
202 {
203 for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
204 {
205 for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM) //SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM); si++)
206 {
207 _simd_store_ps(pfBuf, valR);
208 pfBuf += KNOB_SIMD_WIDTH;
209 _simd_store_ps(pfBuf, valG);
210 pfBuf += KNOB_SIMD_WIDTH;
211 _simd_store_ps(pfBuf, valB);
212 pfBuf += KNOB_SIMD_WIDTH;
213 _simd_store_ps(pfBuf, valA);
214 pfBuf += KNOB_SIMD_WIDTH;
215 }
216 }
217 }
218 }
219
220 void HotTileMgr::ClearDepthHotTile(const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
221 {
222 // Load clear color into SIMD register...
223 float *pClearData = (float*)(pHotTile->clearData);
224 simdscalar valZ = _simd_broadcast_ss(&pClearData[0]);
225
226 float *pfBuf = (float*)pHotTile->pBuffer;
227 uint32_t numSamples = pHotTile->numSamples;
228
229 for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
230 {
231 for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
232 {
233 for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM)
234 {
235 _simd_store_ps(pfBuf, valZ);
236 pfBuf += KNOB_SIMD_WIDTH;
237 }
238 }
239 }
240 }
241
242 void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile)
243 {
244 // convert from F32 to U8.
245 uint8_t clearVal = (uint8_t)(pHotTile->clearData[0]);
246 //broadcast 32x into __m256i...
247 simdscalari valS = _simd_set1_epi8(clearVal);
248
249 simdscalari* pBuf = (simdscalari*)pHotTile->pBuffer;
250 uint32_t numSamples = pHotTile->numSamples;
251
252 for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
253 {
254 for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
255 {
256 // We're putting 4 pixels in each of the 32-bit slots, so increment 4 times as quickly.
257 for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM * 4)
258 {
259 _simd_store_si(pBuf, valS);
260 pBuf += 1;
261 }
262 }
263 }
264 }
265
266 //////////////////////////////////////////////////////////////////////////
267 /// @brief InitializeHotTiles
268 /// for draw calls, we initialize the active hot tiles and perform deferred
269 /// load on them if tile is in invalid state. we do this in the outer thread
270 /// loop instead of inside the draw routine itself mainly for performance,
271 /// to avoid unnecessary setup every triangle
272 /// @todo support deferred clear
273 /// @param pCreateInfo - pointer to creation info.
274 void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID)
275 {
276 const API_STATE& state = GetApiState(pDC);
277 HotTileMgr *pHotTileMgr = pContext->pHotTileMgr;
278
279 uint32_t x, y;
280 MacroTileMgr::getTileIndices(macroID, x, y);
281 x *= KNOB_MACROTILE_X_DIM;
282 y *= KNOB_MACROTILE_Y_DIM;
283
284 uint32_t numSamples = GetNumSamples(state.rastState.sampleCount);
285
286 // check RT if enabled
287 unsigned long rtSlot = 0;
288 uint32_t colorHottileEnableMask = state.colorHottileEnable;
289 while (_BitScanForward(&rtSlot, colorHottileEnableMask))
290 {
291 HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true, numSamples);
292
293 if (pHotTile->state == HOTTILE_INVALID)
294 {
295 RDTSC_START(BELoadTiles);
296 // invalid hottile before draw requires a load from surface before we can draw to it
297 pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
298 pHotTile->state = HOTTILE_DIRTY;
299 RDTSC_STOP(BELoadTiles, 0, 0);
300 }
301 else if (pHotTile->state == HOTTILE_CLEAR)
302 {
303 RDTSC_START(BELoadTiles);
304 // Clear the tile.
305 ClearColorHotTile(pHotTile);
306 pHotTile->state = HOTTILE_DIRTY;
307 RDTSC_STOP(BELoadTiles, 0, 0);
308 }
309 colorHottileEnableMask &= ~(1 << rtSlot);
310 }
311
312 // check depth if enabled
313 if (state.depthHottileEnable)
314 {
315 HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
316 if (pHotTile->state == HOTTILE_INVALID)
317 {
318 RDTSC_START(BELoadTiles);
319 // invalid hottile before draw requires a load from surface before we can draw to it
320 pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
321 pHotTile->state = HOTTILE_DIRTY;
322 RDTSC_STOP(BELoadTiles, 0, 0);
323 }
324 else if (pHotTile->state == HOTTILE_CLEAR)
325 {
326 RDTSC_START(BELoadTiles);
327 // Clear the tile.
328 ClearDepthHotTile(pHotTile);
329 pHotTile->state = HOTTILE_DIRTY;
330 RDTSC_STOP(BELoadTiles, 0, 0);
331 }
332 }
333
334 // check stencil if enabled
335 if (state.stencilHottileEnable)
336 {
337 HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
338 if (pHotTile->state == HOTTILE_INVALID)
339 {
340 RDTSC_START(BELoadTiles);
341 // invalid hottile before draw requires a load from surface before we can draw to it
342 pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
343 pHotTile->state = HOTTILE_DIRTY;
344 RDTSC_STOP(BELoadTiles, 0, 0);
345 }
346 else if (pHotTile->state == HOTTILE_CLEAR)
347 {
348 RDTSC_START(BELoadTiles);
349 // Clear the tile.
350 ClearStencilHotTile(pHotTile);
351 pHotTile->state = HOTTILE_DIRTY;
352 RDTSC_STOP(BELoadTiles, 0, 0);
353 }
354 }
355 }