1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Definitions for Macro Tile Manager which provides the facilities
26 * for threads to work on an macro tile.
28 ******************************************************************************/
32 #include <unordered_map>
33 #include "common/formats.h"
36 #include "format_traits.h"
38 //////////////////////////////////////////////////////////////////////////
39 /// MacroTile - work queue for a tile.
40 //////////////////////////////////////////////////////////////////////////
46 //////////////////////////////////////////////////////////////////////////
47 /// @brief Returns number of work items queued for this tile.
48 uint32_t getNumQueued()
50 return mFifo
.getNumQueued();
53 //////////////////////////////////////////////////////////////////////////
54 /// @brief Attempt to lock the work fifo. If already locked then return false.
57 return mFifo
.tryLock();
60 //////////////////////////////////////////////////////////////////////////
61 /// @brief Clear fifo and unlock it.
62 template <typename ArenaT
>
63 void clear(ArenaT
& arena
)
68 //////////////////////////////////////////////////////////////////////////
69 /// @brief Peek at work sitting at the front of the fifo.
75 template <typename ArenaT
>
76 bool enqueue_try_nosync(ArenaT
& arena
, const BE_WORK
* entry
)
78 return mFifo
.enqueue_try_nosync(arena
, entry
);
81 //////////////////////////////////////////////////////////////////////////
82 /// @brief Move to next work item
85 mFifo
.dequeue_noinc();
88 //////////////////////////////////////////////////////////////////////////
89 /// @brief Destroy fifo
95 ///@todo This will all be private.
96 uint32_t mWorkItemsFE
= 0;
97 uint32_t mWorkItemsBE
= 0;
100 QUEUE
<BE_WORK
> mFifo
;
103 //////////////////////////////////////////////////////////////////////////
104 /// MacroTileMgr - Manages macrotiles for a draw.
105 //////////////////////////////////////////////////////////////////////////
109 MacroTileMgr(CachingArena
& arena
);
112 for (auto &tile
: mTiles
)
114 tile
.second
.destroy();
118 INLINE
void initialize()
120 mWorkItemsProduced
= 0;
121 mWorkItemsConsumed
= 0;
126 INLINE
std::vector
<uint32_t>& getDirtyTiles() { return mDirtyTiles
; }
127 INLINE MacroTileQueue
& getMacroTileQueue(uint32_t id
) { return mTiles
[id
]; }
128 void markTileComplete(uint32_t id
);
130 INLINE
bool isWorkComplete()
132 return mWorkItemsProduced
== mWorkItemsConsumed
;
135 void enqueue(uint32_t x
, uint32_t y
, BE_WORK
*pWork
);
137 static INLINE
void getTileIndices(uint32_t tileID
, uint32_t &x
, uint32_t &y
)
140 x
= (tileID
>> 16) & 0xffff;
144 CachingArena
& mArena
;
145 std::unordered_map
<uint32_t, MacroTileQueue
> mTiles
;
147 // Any tile that has work queued to it is a dirty tile.
148 std::vector
<uint32_t> mDirtyTiles
;
150 OSALIGNLINE(LONG
) mWorkItemsProduced
{ 0 };
151 OSALIGNLINE(volatile LONG
) mWorkItemsConsumed
{ 0 };
154 //////////////////////////////////////////////////////////////////////////
155 /// DispatchQueue - work queue for dispatch
156 //////////////////////////////////////////////////////////////////////////
162 //////////////////////////////////////////////////////////////////////////
163 /// @brief Setup the producer consumer counts.
164 void initialize(uint32_t totalTasks
, void* pTaskData
)
166 // The available and outstanding counts start with total tasks.
167 // At the start there are N tasks available and outstanding.
168 // When both the available and outstanding counts have reached 0 then all work has completed.
169 // When a worker starts on a threadgroup then it decrements the available count.
170 // When a worker completes a threadgroup then it decrements the outstanding count.
172 mTasksAvailable
= totalTasks
;
173 mTasksOutstanding
= totalTasks
;
175 mpTaskData
= pTaskData
;
178 //////////////////////////////////////////////////////////////////////////
179 /// @brief Returns number of tasks available for this dispatch.
180 uint32_t getNumQueued()
182 return (mTasksAvailable
> 0) ? mTasksAvailable
: 0;
185 //////////////////////////////////////////////////////////////////////////
186 /// @brief Atomically decrement the work available count. If the result
187 // is greater than 0 then we can on the associated thread group.
188 // Otherwise, there is no more work to do.
189 bool getWork(uint32_t& groupId
)
191 LONG result
= InterlockedDecrement(&mTasksAvailable
);
202 //////////////////////////////////////////////////////////////////////////
203 /// @brief Atomically decrement the outstanding count. A worker is notifying
204 /// us that he just finished some work. Also, return true if we're
205 /// the last worker to complete this dispatch.
208 LONG result
= InterlockedDecrement(&mTasksOutstanding
);
209 SWR_ASSERT(result
>= 0, "Should never oversubscribe work");
211 return (result
== 0) ? true : false;
214 //////////////////////////////////////////////////////////////////////////
215 /// @brief Work is complete once both the available/outstanding counts have reached 0.
216 bool isWorkComplete()
218 return ((mTasksAvailable
<= 0) &&
219 (mTasksOutstanding
<= 0));
222 //////////////////////////////////////////////////////////////////////////
223 /// @brief Return pointer to task data.
224 const void* GetTasksData()
229 void* mpTaskData
{ nullptr }; // The API thread will set this up and the callback task function will interpet this.
231 OSALIGNLINE(volatile LONG
) mTasksAvailable
{ 0 };
232 OSALIGNLINE(volatile LONG
) mTasksOutstanding
{ 0 };
238 HOTTILE_INVALID
, // tile is in unitialized state and should be loaded with surface contents before rendering
239 HOTTILE_CLEAR
, // tile should be cleared
240 HOTTILE_DIRTY
, // tile has been rendered to
241 HOTTILE_RESOLVED
, // tile has been stored to memory
248 DWORD clearData
[4]; // May need to change based on pfnClearTile implementation. Reorder for alignment?
250 uint32_t renderTargetArrayIndex
; // current render target array index loaded
257 HOTTILE Color
[SWR_NUM_RENDERTARGETS
];
261 HOTTILE Attachment
[SWR_NUM_ATTACHMENTS
];
269 memset(mHotTiles
, 0, sizeof(mHotTiles
));
271 // cache hottile size
272 for (uint32_t i
= SWR_ATTACHMENT_COLOR0
; i
<= SWR_ATTACHMENT_COLOR7
; ++i
)
274 mHotTileSize
[i
] = KNOB_MACROTILE_X_DIM
* KNOB_MACROTILE_Y_DIM
* FormatTraits
<KNOB_COLOR_HOT_TILE_FORMAT
>::bpp
/ 8;
276 mHotTileSize
[SWR_ATTACHMENT_DEPTH
] = KNOB_MACROTILE_X_DIM
* KNOB_MACROTILE_Y_DIM
* FormatTraits
<KNOB_DEPTH_HOT_TILE_FORMAT
>::bpp
/ 8;
277 mHotTileSize
[SWR_ATTACHMENT_STENCIL
] = KNOB_MACROTILE_X_DIM
* KNOB_MACROTILE_Y_DIM
* FormatTraits
<KNOB_STENCIL_HOT_TILE_FORMAT
>::bpp
/ 8;
282 for (int x
= 0; x
< KNOB_NUM_HOT_TILES_X
; ++x
)
284 for (int y
= 0; y
< KNOB_NUM_HOT_TILES_Y
; ++y
)
286 for (int a
= 0; a
< SWR_NUM_ATTACHMENTS
; ++a
)
288 FreeHotTileMem(mHotTiles
[x
][y
].Attachment
[a
].pBuffer
);
294 void InitializeHotTiles(SWR_CONTEXT
* pContext
, DRAW_CONTEXT
* pDC
, uint32_t macroID
);
296 HOTTILE
*GetHotTile(SWR_CONTEXT
* pContext
, DRAW_CONTEXT
* pDC
, uint32_t macroID
, SWR_RENDERTARGET_ATTACHMENT attachment
, bool create
, uint32_t numSamples
= 1,
297 uint32_t renderTargetArrayIndex
= 0);
299 HOTTILE
*GetHotTileNoLoad(SWR_CONTEXT
* pContext
, DRAW_CONTEXT
* pDC
, uint32_t macroID
, SWR_RENDERTARGET_ATTACHMENT attachment
, bool create
, uint32_t numSamples
= 1);
301 static void ClearColorHotTile(const HOTTILE
* pHotTile
);
302 static void ClearDepthHotTile(const HOTTILE
* pHotTile
);
303 static void ClearStencilHotTile(const HOTTILE
* pHotTile
);
306 HotTileSet mHotTiles
[KNOB_NUM_HOT_TILES_X
][KNOB_NUM_HOT_TILES_Y
];
307 uint32_t mHotTileSize
[SWR_NUM_ATTACHMENTS
];
309 void* AllocHotTileMem(size_t size
, uint32_t align
, uint32_t numaNode
)
313 HANDLE hProcess
= GetCurrentProcess();
314 p
= VirtualAllocExNuma(hProcess
, nullptr, size
, MEM_COMMIT
| MEM_RESERVE
, PAGE_READWRITE
, numaNode
);
316 p
= AlignedMalloc(size
, align
);
322 void FreeHotTileMem(void* pBuffer
)
327 VirtualFree(pBuffer
, 0, MEM_RELEASE
);
329 AlignedFree(pBuffer
);