8ef74a7f4b9880a0956a1257b4d9a89e36e92645
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / tilemgr.h
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file tilemgr.h
24 *
25 * @brief Definitions for Macro Tile Manager which provides the facilities
26 * for threads to work on an macro tile.
27 *
28 ******************************************************************************/
29 #pragma once
30
31 #include <set>
32 #include <unordered_map>
33 #include "common/formats.h"
34 #include "fifo.hpp"
35 #include "context.h"
36 #include "format_traits.h"
37
38 //////////////////////////////////////////////////////////////////////////
39 /// MacroTile - work queue for a tile.
40 //////////////////////////////////////////////////////////////////////////
41 struct MacroTileQueue
42 {
43 MacroTileQueue() { }
44 ~MacroTileQueue() { }
45
46 //////////////////////////////////////////////////////////////////////////
47 /// @brief Returns number of work items queued for this tile.
48 uint32_t getNumQueued()
49 {
50 return mFifo.getNumQueued();
51 }
52
53 //////////////////////////////////////////////////////////////////////////
54 /// @brief Attempt to lock the work fifo. If already locked then return false.
55 bool tryLock()
56 {
57 return mFifo.tryLock();
58 }
59
60 //////////////////////////////////////////////////////////////////////////
61 /// @brief Clear fifo and unlock it.
62 template <typename ArenaT>
63 void clear(ArenaT& arena)
64 {
65 mFifo.clear(arena);
66 }
67
68 //////////////////////////////////////////////////////////////////////////
69 /// @brief Peek at work sitting at the front of the fifo.
70 BE_WORK* peek()
71 {
72 return mFifo.peek();
73 }
74
75 template <typename ArenaT>
76 bool enqueue_try_nosync(ArenaT& arena, const BE_WORK* entry)
77 {
78 return mFifo.enqueue_try_nosync(arena, entry);
79 }
80
81 //////////////////////////////////////////////////////////////////////////
82 /// @brief Move to next work item
83 void dequeue()
84 {
85 mFifo.dequeue_noinc();
86 }
87
88 //////////////////////////////////////////////////////////////////////////
89 /// @brief Destroy fifo
90 void destroy()
91 {
92 mFifo.destroy();
93 }
94
95 ///@todo This will all be private.
96 uint32_t mWorkItemsFE = 0;
97 uint32_t mWorkItemsBE = 0;
98 uint32_t mId = 0;
99
100 private:
101 QUEUE<BE_WORK> mFifo;
102 };
103
104 //////////////////////////////////////////////////////////////////////////
105 /// MacroTileMgr - Manages macrotiles for a draw.
106 //////////////////////////////////////////////////////////////////////////
107 class MacroTileMgr
108 {
109 public:
110 MacroTileMgr(CachingArena& arena);
111 ~MacroTileMgr()
112 {
113 for (auto &tile : mTiles)
114 {
115 tile.second.destroy();
116 }
117 }
118
119 INLINE void initialize()
120 {
121 mWorkItemsProduced = 0;
122 mWorkItemsConsumed = 0;
123
124 mDirtyTiles.clear();
125 }
126
127 INLINE std::vector<MacroTileQueue*>& getDirtyTiles() { return mDirtyTiles; }
128 void markTileComplete(uint32_t id);
129
130 INLINE bool isWorkComplete()
131 {
132 return mWorkItemsProduced == mWorkItemsConsumed;
133 }
134
135 void enqueue(uint32_t x, uint32_t y, BE_WORK *pWork);
136
137 static INLINE void getTileIndices(uint32_t tileID, uint32_t &x, uint32_t &y)
138 {
139 y = tileID & 0xffff;
140 x = (tileID >> 16) & 0xffff;
141 }
142
143 private:
144 CachingArena& mArena;
145 std::unordered_map<uint32_t, MacroTileQueue> mTiles;
146
147 // Any tile that has work queued to it is a dirty tile.
148 std::vector<MacroTileQueue*> mDirtyTiles;
149
150 OSALIGNLINE(long) mWorkItemsProduced { 0 };
151 OSALIGNLINE(volatile long) mWorkItemsConsumed { 0 };
152 };
153
154 typedef void(*PFN_DISPATCH)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace);
155
156 //////////////////////////////////////////////////////////////////////////
157 /// DispatchQueue - work queue for dispatch
158 //////////////////////////////////////////////////////////////////////////
159 class DispatchQueue
160 {
161 public:
162 DispatchQueue() {}
163
164 //////////////////////////////////////////////////////////////////////////
165 /// @brief Setup the producer consumer counts.
166 void initialize(uint32_t totalTasks, void* pTaskData, PFN_DISPATCH pfnDispatch)
167 {
168 // The available and outstanding counts start with total tasks.
169 // At the start there are N tasks available and outstanding.
170 // When both the available and outstanding counts have reached 0 then all work has completed.
171 // When a worker starts on a threadgroup then it decrements the available count.
172 // When a worker completes a threadgroup then it decrements the outstanding count.
173
174 mTasksAvailable = totalTasks;
175 mTasksOutstanding = totalTasks;
176
177 mpTaskData = pTaskData;
178 mPfnDispatch = pfnDispatch;
179 }
180
181 //////////////////////////////////////////////////////////////////////////
182 /// @brief Returns number of tasks available for this dispatch.
183 uint32_t getNumQueued()
184 {
185 return (mTasksAvailable > 0) ? mTasksAvailable : 0;
186 }
187
188 //////////////////////////////////////////////////////////////////////////
189 /// @brief Atomically decrement the work available count. If the result
190 // is greater than 0 then we can on the associated thread group.
191 // Otherwise, there is no more work to do.
192 bool getWork(uint32_t& groupId)
193 {
194 long result = InterlockedDecrement(&mTasksAvailable);
195
196 if (result >= 0)
197 {
198 groupId = result;
199 return true;
200 }
201
202 return false;
203 }
204
205 //////////////////////////////////////////////////////////////////////////
206 /// @brief Atomically decrement the outstanding count. A worker is notifying
207 /// us that he just finished some work. Also, return true if we're
208 /// the last worker to complete this dispatch.
209 bool finishedWork()
210 {
211 long result = InterlockedDecrement(&mTasksOutstanding);
212 SWR_ASSERT(result >= 0, "Should never oversubscribe work");
213
214 return (result == 0) ? true : false;
215 }
216
217 //////////////////////////////////////////////////////////////////////////
218 /// @brief Work is complete once both the available/outstanding counts have reached 0.
219 bool isWorkComplete()
220 {
221 return ((mTasksAvailable <= 0) &&
222 (mTasksOutstanding <= 0));
223 }
224
225 //////////////////////////////////////////////////////////////////////////
226 /// @brief Return pointer to task data.
227 const void* GetTasksData()
228 {
229 return mpTaskData;
230 }
231
232 //////////////////////////////////////////////////////////////////////////
233 /// @brief Dispatches a unit of work
234 void dispatch(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace)
235 {
236 SWR_ASSERT(mPfnDispatch != nullptr);
237 mPfnDispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace);
238 }
239
240 void* mpTaskData{ nullptr }; // The API thread will set this up and the callback task function will interpet this.
241 PFN_DISPATCH mPfnDispatch{ nullptr }; // Function to call per dispatch
242
243 OSALIGNLINE(volatile long) mTasksAvailable{ 0 };
244 OSALIGNLINE(volatile long) mTasksOutstanding{ 0 };
245 };
246
247
248 enum HOTTILE_STATE
249 {
250 HOTTILE_INVALID, // tile is in unitialized state and should be loaded with surface contents before rendering
251 HOTTILE_CLEAR, // tile should be cleared
252 HOTTILE_DIRTY, // tile has been rendered to
253 HOTTILE_RESOLVED, // tile has been stored to memory
254 };
255
256 struct HOTTILE
257 {
258 uint8_t *pBuffer;
259 HOTTILE_STATE state;
260 DWORD clearData[4]; // May need to change based on pfnClearTile implementation. Reorder for alignment?
261 uint32_t numSamples;
262 uint32_t renderTargetArrayIndex; // current render target array index loaded
263 };
264
265 union HotTileSet
266 {
267 struct
268 {
269 HOTTILE Color[SWR_NUM_RENDERTARGETS];
270 HOTTILE Depth;
271 HOTTILE Stencil;
272 };
273 HOTTILE Attachment[SWR_NUM_ATTACHMENTS];
274 };
275
276 class HotTileMgr
277 {
278 public:
279 HotTileMgr()
280 {
281 memset(mHotTiles, 0, sizeof(mHotTiles));
282
283 // cache hottile size
284 for (uint32_t i = SWR_ATTACHMENT_COLOR0; i <= SWR_ATTACHMENT_COLOR7; ++i)
285 {
286 mHotTileSize[i] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8;
287 }
288 mHotTileSize[SWR_ATTACHMENT_DEPTH] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
289 mHotTileSize[SWR_ATTACHMENT_STENCIL] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
290 }
291
292 ~HotTileMgr()
293 {
294 for (int x = 0; x < KNOB_NUM_HOT_TILES_X; ++x)
295 {
296 for (int y = 0; y < KNOB_NUM_HOT_TILES_Y; ++y)
297 {
298 for (int a = 0; a < SWR_NUM_ATTACHMENTS; ++a)
299 {
300 FreeHotTileMem(mHotTiles[x][y].Attachment[a].pBuffer);
301 }
302 }
303 }
304 }
305
306 void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID);
307
308 HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1,
309 uint32_t renderTargetArrayIndex = 0);
310
311 HOTTILE *GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1);
312
313 static void ClearColorHotTile(const HOTTILE* pHotTile);
314 static void ClearDepthHotTile(const HOTTILE* pHotTile);
315 static void ClearStencilHotTile(const HOTTILE* pHotTile);
316
317 private:
318 HotTileSet mHotTiles[KNOB_NUM_HOT_TILES_X][KNOB_NUM_HOT_TILES_Y];
319 uint32_t mHotTileSize[SWR_NUM_ATTACHMENTS];
320
321 void* AllocHotTileMem(size_t size, uint32_t align, uint32_t numaNode)
322 {
323 void* p = nullptr;
324 #if defined(_WIN32)
325 HANDLE hProcess = GetCurrentProcess();
326 p = VirtualAllocExNuma(hProcess, nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE, numaNode);
327 #else
328 p = AlignedMalloc(size, align);
329 #endif
330
331 return p;
332 }
333
334 void FreeHotTileMem(void* pBuffer)
335 {
336 if (pBuffer)
337 {
338 #if defined(_WIN32)
339 VirtualFree(pBuffer, 0, MEM_RELEASE);
340 #else
341 AlignedFree(pBuffer);
342 #endif
343 }
344 }
345 };
346