1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Definitions for Macro Tile Manager which provides the facilities
26 * for threads to work on an macro tile.
28 ******************************************************************************/
32 #include <unordered_map>
33 #include "common/formats.h"
36 #include "format_traits.h"
38 //////////////////////////////////////////////////////////////////////////
39 /// MacroTile - work queue for a tile.
40 //////////////////////////////////////////////////////////////////////////
46 //////////////////////////////////////////////////////////////////////////
47 /// @brief Returns number of work items queued for this tile.
48 uint32_t getNumQueued()
50 return mFifo
.getNumQueued();
53 //////////////////////////////////////////////////////////////////////////
54 /// @brief Attempt to lock the work fifo. If already locked then return false.
57 return mFifo
.tryLock();
60 //////////////////////////////////////////////////////////////////////////
61 /// @brief Clear fifo and unlock it.
62 template <typename ArenaT
>
63 void clear(ArenaT
& arena
)
68 //////////////////////////////////////////////////////////////////////////
69 /// @brief Peek at work sitting at the front of the fifo.
75 template <typename ArenaT
>
76 bool enqueue_try_nosync(ArenaT
& arena
, const BE_WORK
* entry
)
78 return mFifo
.enqueue_try_nosync(arena
, entry
);
81 //////////////////////////////////////////////////////////////////////////
82 /// @brief Move to next work item
85 mFifo
.dequeue_noinc();
88 //////////////////////////////////////////////////////////////////////////
89 /// @brief Destroy fifo
95 ///@todo This will all be private.
96 uint32_t mWorkItemsFE
= 0;
97 uint32_t mWorkItemsBE
= 0;
101 QUEUE
<BE_WORK
> mFifo
;
104 //////////////////////////////////////////////////////////////////////////
105 /// MacroTileMgr - Manages macrotiles for a draw.
106 //////////////////////////////////////////////////////////////////////////
110 MacroTileMgr(CachingArena
& arena
);
113 for (auto &tile
: mTiles
)
115 tile
.second
.destroy();
119 INLINE
void initialize()
121 mWorkItemsProduced
= 0;
122 mWorkItemsConsumed
= 0;
127 INLINE
std::vector
<MacroTileQueue
*>& getDirtyTiles() { return mDirtyTiles
; }
128 void markTileComplete(uint32_t id
);
130 INLINE
bool isWorkComplete()
132 return mWorkItemsProduced
== mWorkItemsConsumed
;
135 void enqueue(uint32_t x
, uint32_t y
, BE_WORK
*pWork
);
137 static INLINE
void getTileIndices(uint32_t tileID
, uint32_t &x
, uint32_t &y
)
140 x
= (tileID
>> 16) & 0xffff;
144 CachingArena
& mArena
;
145 std::unordered_map
<uint32_t, MacroTileQueue
> mTiles
;
147 // Any tile that has work queued to it is a dirty tile.
148 std::vector
<MacroTileQueue
*> mDirtyTiles
;
150 OSALIGNLINE(long) mWorkItemsProduced
{ 0 };
151 OSALIGNLINE(volatile long) mWorkItemsConsumed
{ 0 };
154 typedef void(*PFN_DISPATCH
)(DRAW_CONTEXT
* pDC
, uint32_t workerId
, uint32_t threadGroupId
, void*& pSpillFillBuffer
, void*& pScratchSpace
);
156 //////////////////////////////////////////////////////////////////////////
157 /// DispatchQueue - work queue for dispatch
158 //////////////////////////////////////////////////////////////////////////
164 //////////////////////////////////////////////////////////////////////////
165 /// @brief Setup the producer consumer counts.
166 void initialize(uint32_t totalTasks
, void* pTaskData
, PFN_DISPATCH pfnDispatch
)
168 // The available and outstanding counts start with total tasks.
169 // At the start there are N tasks available and outstanding.
170 // When both the available and outstanding counts have reached 0 then all work has completed.
171 // When a worker starts on a threadgroup then it decrements the available count.
172 // When a worker completes a threadgroup then it decrements the outstanding count.
174 mTasksAvailable
= totalTasks
;
175 mTasksOutstanding
= totalTasks
;
177 mpTaskData
= pTaskData
;
178 mPfnDispatch
= pfnDispatch
;
181 //////////////////////////////////////////////////////////////////////////
182 /// @brief Returns number of tasks available for this dispatch.
183 uint32_t getNumQueued()
185 return (mTasksAvailable
> 0) ? mTasksAvailable
: 0;
188 //////////////////////////////////////////////////////////////////////////
189 /// @brief Atomically decrement the work available count. If the result
190 // is greater than 0 then we can on the associated thread group.
191 // Otherwise, there is no more work to do.
192 bool getWork(uint32_t& groupId
)
194 long result
= InterlockedDecrement(&mTasksAvailable
);
205 //////////////////////////////////////////////////////////////////////////
206 /// @brief Atomically decrement the outstanding count. A worker is notifying
207 /// us that he just finished some work. Also, return true if we're
208 /// the last worker to complete this dispatch.
211 long result
= InterlockedDecrement(&mTasksOutstanding
);
212 SWR_ASSERT(result
>= 0, "Should never oversubscribe work");
214 return (result
== 0) ? true : false;
217 //////////////////////////////////////////////////////////////////////////
218 /// @brief Work is complete once both the available/outstanding counts have reached 0.
219 bool isWorkComplete()
221 return ((mTasksAvailable
<= 0) &&
222 (mTasksOutstanding
<= 0));
225 //////////////////////////////////////////////////////////////////////////
226 /// @brief Return pointer to task data.
227 const void* GetTasksData()
232 //////////////////////////////////////////////////////////////////////////
233 /// @brief Dispatches a unit of work
234 void dispatch(DRAW_CONTEXT
* pDC
, uint32_t workerId
, uint32_t threadGroupId
, void*& pSpillFillBuffer
, void*& pScratchSpace
)
236 SWR_ASSERT(mPfnDispatch
!= nullptr);
237 mPfnDispatch(pDC
, workerId
, threadGroupId
, pSpillFillBuffer
, pScratchSpace
);
240 void* mpTaskData
{ nullptr }; // The API thread will set this up and the callback task function will interpet this.
241 PFN_DISPATCH mPfnDispatch
{ nullptr }; // Function to call per dispatch
243 OSALIGNLINE(volatile long) mTasksAvailable
{ 0 };
244 OSALIGNLINE(volatile long) mTasksOutstanding
{ 0 };
250 HOTTILE_INVALID
, // tile is in unitialized state and should be loaded with surface contents before rendering
251 HOTTILE_CLEAR
, // tile should be cleared
252 HOTTILE_DIRTY
, // tile has been rendered to
253 HOTTILE_RESOLVED
, // tile has been stored to memory
260 DWORD clearData
[4]; // May need to change based on pfnClearTile implementation. Reorder for alignment?
262 uint32_t renderTargetArrayIndex
; // current render target array index loaded
269 HOTTILE Color
[SWR_NUM_RENDERTARGETS
];
273 HOTTILE Attachment
[SWR_NUM_ATTACHMENTS
];
281 memset(mHotTiles
, 0, sizeof(mHotTiles
));
283 // cache hottile size
284 for (uint32_t i
= SWR_ATTACHMENT_COLOR0
; i
<= SWR_ATTACHMENT_COLOR7
; ++i
)
286 mHotTileSize
[i
] = KNOB_MACROTILE_X_DIM
* KNOB_MACROTILE_Y_DIM
* FormatTraits
<KNOB_COLOR_HOT_TILE_FORMAT
>::bpp
/ 8;
288 mHotTileSize
[SWR_ATTACHMENT_DEPTH
] = KNOB_MACROTILE_X_DIM
* KNOB_MACROTILE_Y_DIM
* FormatTraits
<KNOB_DEPTH_HOT_TILE_FORMAT
>::bpp
/ 8;
289 mHotTileSize
[SWR_ATTACHMENT_STENCIL
] = KNOB_MACROTILE_X_DIM
* KNOB_MACROTILE_Y_DIM
* FormatTraits
<KNOB_STENCIL_HOT_TILE_FORMAT
>::bpp
/ 8;
294 for (int x
= 0; x
< KNOB_NUM_HOT_TILES_X
; ++x
)
296 for (int y
= 0; y
< KNOB_NUM_HOT_TILES_Y
; ++y
)
298 for (int a
= 0; a
< SWR_NUM_ATTACHMENTS
; ++a
)
300 FreeHotTileMem(mHotTiles
[x
][y
].Attachment
[a
].pBuffer
);
306 void InitializeHotTiles(SWR_CONTEXT
* pContext
, DRAW_CONTEXT
* pDC
, uint32_t workerId
, uint32_t macroID
);
308 HOTTILE
*GetHotTile(SWR_CONTEXT
* pContext
, DRAW_CONTEXT
* pDC
, uint32_t macroID
, SWR_RENDERTARGET_ATTACHMENT attachment
, bool create
, uint32_t numSamples
= 1,
309 uint32_t renderTargetArrayIndex
= 0);
311 HOTTILE
*GetHotTileNoLoad(SWR_CONTEXT
* pContext
, DRAW_CONTEXT
* pDC
, uint32_t macroID
, SWR_RENDERTARGET_ATTACHMENT attachment
, bool create
, uint32_t numSamples
= 1);
313 static void ClearColorHotTile(const HOTTILE
* pHotTile
);
314 static void ClearDepthHotTile(const HOTTILE
* pHotTile
);
315 static void ClearStencilHotTile(const HOTTILE
* pHotTile
);
318 HotTileSet mHotTiles
[KNOB_NUM_HOT_TILES_X
][KNOB_NUM_HOT_TILES_Y
];
319 uint32_t mHotTileSize
[SWR_NUM_ATTACHMENTS
];
321 void* AllocHotTileMem(size_t size
, uint32_t align
, uint32_t numaNode
)
325 HANDLE hProcess
= GetCurrentProcess();
326 p
= VirtualAllocExNuma(hProcess
, nullptr, size
, MEM_COMMIT
| MEM_RESERVE
, PAGE_READWRITE
, numaNode
);
328 p
= AlignedMalloc(size
, align
);
334 void FreeHotTileMem(void* pBuffer
)
339 VirtualFree(pBuffer
, 0, MEM_RELEASE
);
341 AlignedFree(pBuffer
);