swr: [rasterizer] switch assert uses to SWR_ASSERT
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / tilemgr.h
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file tilemgr.h
24 *
25 * @brief Definitions for Macro Tile Manager which provides the facilities
26 * for threads to work on an macro tile.
27 *
28 ******************************************************************************/
29 #pragma once
30
31 #include <set>
32 #include <unordered_map>
33 #include "common/formats.h"
34 #include "fifo.hpp"
35 #include "context.h"
36 #include "format_traits.h"
37
38 //////////////////////////////////////////////////////////////////////////
39 /// MacroTile - work queue for a tile.
40 //////////////////////////////////////////////////////////////////////////
41 struct MacroTileQueue
42 {
43 MacroTileQueue() { }
44 ~MacroTileQueue() { }
45
46 //////////////////////////////////////////////////////////////////////////
47 /// @brief Returns number of work items queued for this tile.
48 uint32_t getNumQueued()
49 {
50 return mFifo.getNumQueued();
51 }
52
53 //////////////////////////////////////////////////////////////////////////
54 /// @brief Attempt to lock the work fifo. If already locked then return false.
55 bool tryLock()
56 {
57 return mFifo.tryLock();
58 }
59
60 //////////////////////////////////////////////////////////////////////////
61 /// @brief Clear fifo and unlock it.
62 void clear(Arena& arena)
63 {
64 mFifo.clear(arena);
65 }
66
67 //////////////////////////////////////////////////////////////////////////
68 /// @brief Peek at work sitting at the front of the fifo.
69 BE_WORK* peek()
70 {
71 return mFifo.peek();
72 }
73
74 bool enqueue_try_nosync(Arena& arena, const BE_WORK* entry)
75 {
76 return mFifo.enqueue_try_nosync(arena, entry);
77 }
78
79 //////////////////////////////////////////////////////////////////////////
80 /// @brief Move to next work item
81 void dequeue()
82 {
83 mFifo.dequeue_noinc();
84 }
85
86 //////////////////////////////////////////////////////////////////////////
87 /// @brief Destroy fifo
88 void destroy()
89 {
90 mFifo.destroy();
91 }
92
93 ///@todo This will all be private.
94 uint32_t mWorkItemsFE = 0;
95 uint32_t mWorkItemsBE = 0;
96
97 private:
98 QUEUE<BE_WORK> mFifo;
99 };
100
101 //////////////////////////////////////////////////////////////////////////
102 /// MacroTileMgr - Manages macrotiles for a draw.
103 //////////////////////////////////////////////////////////////////////////
104 class MacroTileMgr
105 {
106 public:
107 MacroTileMgr(Arena& arena);
108 ~MacroTileMgr()
109 {
110 for (auto &tile : mTiles)
111 {
112 tile.second.destroy();
113 }
114 }
115
116 void initialize();
117 INLINE std::vector<uint32_t>& getDirtyTiles() { return mDirtyTiles; }
118 INLINE MacroTileQueue& getMacroTileQueue(uint32_t id) { return mTiles[id]; }
119 void markTileComplete(uint32_t id);
120
121 INLINE bool isWorkComplete()
122 {
123 return mWorkItemsProduced == mWorkItemsConsumed;
124 }
125
126 void enqueue(uint32_t x, uint32_t y, BE_WORK *pWork);
127
128 static INLINE void getTileIndices(uint32_t tileID, uint32_t &x, uint32_t &y)
129 {
130 y = tileID & 0xffff;
131 x = (tileID >> 16) & 0xffff;
132 }
133
134 void *operator new(size_t size);
135 void operator delete (void *p);
136
137 private:
138 Arena& mArena;
139 SWR_FORMAT mFormat;
140 std::unordered_map<uint32_t, MacroTileQueue> mTiles;
141
142 // Any tile that has work queued to it is a dirty tile.
143 std::vector<uint32_t> mDirtyTiles;
144
145 OSALIGNLINE(LONG) mWorkItemsProduced;
146 OSALIGNLINE(volatile LONG) mWorkItemsConsumed;
147 };
148
149 //////////////////////////////////////////////////////////////////////////
150 /// DispatchQueue - work queue for dispatch
151 //////////////////////////////////////////////////////////////////////////
152 class DispatchQueue
153 {
154 public:
155 DispatchQueue() {}
156
157 //////////////////////////////////////////////////////////////////////////
158 /// @brief Setup the producer consumer counts.
159 void initialize(uint32_t totalTasks, void* pTaskData)
160 {
161 // The available and outstanding counts start with total tasks.
162 // At the start there are N tasks available and outstanding.
163 // When both the available and outstanding counts have reached 0 then all work has completed.
164 // When a worker starts on a threadgroup then it decrements the available count.
165 // When a worker completes a threadgroup then it decrements the outstanding count.
166
167 mTasksAvailable = totalTasks;
168 mTasksOutstanding = totalTasks;
169
170 mpTaskData = pTaskData;
171 }
172
173 //////////////////////////////////////////////////////////////////////////
174 /// @brief Returns number of tasks available for this dispatch.
175 uint32_t getNumQueued()
176 {
177 return (mTasksAvailable > 0) ? mTasksAvailable : 0;
178 }
179
180 //////////////////////////////////////////////////////////////////////////
181 /// @brief Atomically decrement the work available count. If the result
182 // is greater than 0 then we can on the associated thread group.
183 // Otherwise, there is no more work to do.
184 bool getWork(uint32_t& groupId)
185 {
186 LONG result = InterlockedDecrement(&mTasksAvailable);
187
188 if (result >= 0)
189 {
190 groupId = result;
191 return true;
192 }
193
194 return false;
195 }
196
197 //////////////////////////////////////////////////////////////////////////
198 /// @brief Atomically decrement the outstanding count. A worker is notifying
199 /// us that he just finished some work. Also, return true if we're
200 /// the last worker to complete this dispatch.
201 bool finishedWork()
202 {
203 LONG result = InterlockedDecrement(&mTasksOutstanding);
204 SWR_ASSERT(result >= 0, "Should never oversubscribe work");
205
206 return (result == 0) ? true : false;
207 }
208
209 //////////////////////////////////////////////////////////////////////////
210 /// @brief Work is complete once both the available/outstanding counts have reached 0.
211 bool isWorkComplete()
212 {
213 return ((mTasksAvailable <= 0) &&
214 (mTasksOutstanding <= 0));
215 }
216
217 //////////////////////////////////////////////////////////////////////////
218 /// @brief Return pointer to task data.
219 const void* GetTasksData()
220 {
221 return mpTaskData;
222 }
223
224 void *operator new(size_t size);
225 void operator delete (void *p);
226
227 void* mpTaskData; // The API thread will set this up and the callback task function will interpet this.
228
229 OSALIGNLINE(volatile LONG) mTasksAvailable{ 0 };
230 OSALIGNLINE(volatile LONG) mTasksOutstanding{ 0 };
231 };
232
233
234 enum HOTTILE_STATE
235 {
236 HOTTILE_INVALID, // tile is in unitialized state and should be loaded with surface contents before rendering
237 HOTTILE_CLEAR, // tile should be cleared
238 HOTTILE_DIRTY, // tile has been rendered to
239 HOTTILE_RESOLVED, // tile has been stored to memory
240 };
241
242 struct HOTTILE
243 {
244 BYTE *pBuffer;
245 HOTTILE_STATE state;
246 DWORD clearData[4]; // May need to change based on pfnClearTile implementation. Reorder for alignment?
247 uint32_t numSamples;
248 uint32_t renderTargetArrayIndex; // current render target array index loaded
249 };
250
251 union HotTileSet
252 {
253 struct
254 {
255 HOTTILE Color[SWR_NUM_RENDERTARGETS];
256 HOTTILE Depth;
257 HOTTILE Stencil;
258 };
259 HOTTILE Attachment[SWR_NUM_ATTACHMENTS];
260 };
261
262 class HotTileMgr
263 {
264 public:
265 HotTileMgr()
266 {
267 memset(&mHotTiles[0][0], 0, sizeof(mHotTiles));
268
269 // cache hottile size
270 for (uint32_t i = SWR_ATTACHMENT_COLOR0; i <= SWR_ATTACHMENT_COLOR7; ++i)
271 {
272 mHotTileSize[i] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8;
273 }
274 mHotTileSize[SWR_ATTACHMENT_DEPTH] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
275 mHotTileSize[SWR_ATTACHMENT_STENCIL] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
276 }
277
278 ~HotTileMgr()
279 {
280 for (int x = 0; x < KNOB_NUM_HOT_TILES_X; ++x)
281 {
282 for (int y = 0; y < KNOB_NUM_HOT_TILES_Y; ++y)
283 {
284 for (int a = 0; a < SWR_NUM_ATTACHMENTS; ++a)
285 {
286 if (mHotTiles[x][y].Attachment[a].pBuffer != NULL)
287 {
288 _aligned_free(mHotTiles[x][y].Attachment[a].pBuffer);
289 mHotTiles[x][y].Attachment[a].pBuffer = NULL;
290 }
291 }
292 }
293 }
294 }
295
296 HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1,
297 uint32_t renderTargetArrayIndex = 0)
298 {
299 uint32_t x, y;
300 MacroTileMgr::getTileIndices(macroID, x, y);
301
302 SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
303 SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
304
305 HotTileSet &tile = mHotTiles[x][y];
306 HOTTILE& hotTile = tile.Attachment[attachment];
307 if (hotTile.pBuffer == NULL)
308 {
309 if (create)
310 {
311 uint32_t size = numSamples * mHotTileSize[attachment];
312 hotTile.pBuffer = (BYTE*)_aligned_malloc(size, KNOB_SIMD_WIDTH * 4);
313 hotTile.state = HOTTILE_INVALID;
314 hotTile.numSamples = numSamples;
315 hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
316 }
317 else
318 {
319 return NULL;
320 }
321 }
322 else
323 {
324 // free the old tile and create a new one with enough space to hold all samples
325 if (numSamples > hotTile.numSamples)
326 {
327 // tile should be either uninitialized or resolved if we're deleting and switching to a
328 // new sample count
329 SWR_ASSERT((hotTile.state == HOTTILE_INVALID) ||
330 (hotTile.state == HOTTILE_RESOLVED) ||
331 (hotTile.state == HOTTILE_CLEAR));
332 _aligned_free(hotTile.pBuffer);
333
334 uint32_t size = numSamples * mHotTileSize[attachment];
335 hotTile.pBuffer = (BYTE*)_aligned_malloc(size, KNOB_SIMD_WIDTH * 4);
336 hotTile.state = HOTTILE_INVALID;
337 hotTile.numSamples = numSamples;
338 }
339
340 // if requested render target array index isn't currently loaded, need to store out the current hottile
341 // and load the requested array slice
342 if (renderTargetArrayIndex != hotTile.renderTargetArrayIndex)
343 {
344 SWR_FORMAT format;
345 switch (attachment)
346 {
347 case SWR_ATTACHMENT_COLOR0:
348 case SWR_ATTACHMENT_COLOR1:
349 case SWR_ATTACHMENT_COLOR2:
350 case SWR_ATTACHMENT_COLOR3:
351 case SWR_ATTACHMENT_COLOR4:
352 case SWR_ATTACHMENT_COLOR5:
353 case SWR_ATTACHMENT_COLOR6:
354 case SWR_ATTACHMENT_COLOR7: format = KNOB_COLOR_HOT_TILE_FORMAT; break;
355 case SWR_ATTACHMENT_DEPTH: format = KNOB_DEPTH_HOT_TILE_FORMAT; break;
356 case SWR_ATTACHMENT_STENCIL: format = KNOB_STENCIL_HOT_TILE_FORMAT; break;
357 default: SWR_ASSERT(false, "Unknown attachment: %d", attachment); format = KNOB_COLOR_HOT_TILE_FORMAT; break;
358 }
359
360 if (hotTile.state == HOTTILE_DIRTY)
361 {
362 pContext->pfnStoreTile(GetPrivateState(pDC), format, attachment,
363 x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, hotTile.renderTargetArrayIndex, hotTile.pBuffer);
364 }
365
366 pContext->pfnLoadTile(GetPrivateState(pDC), format, attachment,
367 x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, renderTargetArrayIndex, hotTile.pBuffer);
368
369 hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
370 hotTile.state = HOTTILE_DIRTY;
371 }
372 }
373 return &tile.Attachment[attachment];
374 }
375
376 HotTileSet &GetHotTile(uint32_t macroID)
377 {
378 uint32_t x, y;
379 MacroTileMgr::getTileIndices(macroID, x, y);
380 SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
381 SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
382
383 return mHotTiles[x][y];
384 }
385
386 private:
387 HotTileSet mHotTiles[KNOB_NUM_HOT_TILES_X][KNOB_NUM_HOT_TILES_Y];
388 uint32_t mHotTileSize[SWR_NUM_ATTACHMENTS];
389 };
390