1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Definitions for Macro Tile Manager which provides the facilities
26 * for threads to work on an macro tile.
28 ******************************************************************************/
32 #include <unordered_map>
33 #include "common/formats.h"
36 #include "format_traits.h"
38 //////////////////////////////////////////////////////////////////////////
39 /// MacroTile - work queue for a tile.
40 //////////////////////////////////////////////////////////////////////////
46 //////////////////////////////////////////////////////////////////////////
47 /// @brief Returns number of work items queued for this tile.
48 uint32_t getNumQueued()
50 return mFifo
.getNumQueued();
53 //////////////////////////////////////////////////////////////////////////
54 /// @brief Attempt to lock the work fifo. If already locked then return false.
57 return mFifo
.tryLock();
60 //////////////////////////////////////////////////////////////////////////
61 /// @brief Clear fifo and unlock it.
62 void clear(Arena
& arena
)
67 //////////////////////////////////////////////////////////////////////////
68 /// @brief Peek at work sitting at the front of the fifo.
74 bool enqueue_try_nosync(Arena
& arena
, const BE_WORK
* entry
)
76 return mFifo
.enqueue_try_nosync(arena
, entry
);
79 //////////////////////////////////////////////////////////////////////////
80 /// @brief Move to next work item
83 mFifo
.dequeue_noinc();
86 //////////////////////////////////////////////////////////////////////////
87 /// @brief Destroy fifo
93 ///@todo This will all be private.
94 uint32_t mWorkItemsFE
= 0;
95 uint32_t mWorkItemsBE
= 0;
101 //////////////////////////////////////////////////////////////////////////
102 /// MacroTileMgr - Manages macrotiles for a draw.
103 //////////////////////////////////////////////////////////////////////////
107 MacroTileMgr(Arena
& arena
);
110 for (auto &tile
: mTiles
)
112 tile
.second
.destroy();
117 INLINE
std::vector
<uint32_t>& getDirtyTiles() { return mDirtyTiles
; }
118 INLINE MacroTileQueue
& getMacroTileQueue(uint32_t id
) { return mTiles
[id
]; }
119 void markTileComplete(uint32_t id
);
121 INLINE
bool isWorkComplete()
123 return mWorkItemsProduced
== mWorkItemsConsumed
;
126 void enqueue(uint32_t x
, uint32_t y
, BE_WORK
*pWork
);
128 static INLINE
void getTileIndices(uint32_t tileID
, uint32_t &x
, uint32_t &y
)
131 x
= (tileID
>> 16) & 0xffff;
134 void *operator new(size_t size
);
135 void operator delete (void *p
);
140 std::unordered_map
<uint32_t, MacroTileQueue
> mTiles
;
142 // Any tile that has work queued to it is a dirty tile.
143 std::vector
<uint32_t> mDirtyTiles
;
145 OSALIGNLINE(LONG
) mWorkItemsProduced
;
146 OSALIGNLINE(volatile LONG
) mWorkItemsConsumed
;
149 //////////////////////////////////////////////////////////////////////////
150 /// DispatchQueue - work queue for dispatch
151 //////////////////////////////////////////////////////////////////////////
157 //////////////////////////////////////////////////////////////////////////
158 /// @brief Setup the producer consumer counts.
159 void initialize(uint32_t totalTasks
, void* pTaskData
)
161 // The available and outstanding counts start with total tasks.
162 // At the start there are N tasks available and outstanding.
163 // When both the available and outstanding counts have reached 0 then all work has completed.
164 // When a worker starts on a threadgroup then it decrements the available count.
165 // When a worker completes a threadgroup then it decrements the outstanding count.
167 mTasksAvailable
= totalTasks
;
168 mTasksOutstanding
= totalTasks
;
170 mpTaskData
= pTaskData
;
173 //////////////////////////////////////////////////////////////////////////
174 /// @brief Returns number of tasks available for this dispatch.
175 uint32_t getNumQueued()
177 return (mTasksAvailable
> 0) ? mTasksAvailable
: 0;
180 //////////////////////////////////////////////////////////////////////////
181 /// @brief Atomically decrement the work available count. If the result
182 // is greater than 0 then we can on the associated thread group.
183 // Otherwise, there is no more work to do.
184 bool getWork(uint32_t& groupId
)
186 LONG result
= InterlockedDecrement(&mTasksAvailable
);
197 //////////////////////////////////////////////////////////////////////////
198 /// @brief Atomically decrement the outstanding count. A worker is notifying
199 /// us that he just finished some work. Also, return true if we're
200 /// the last worker to complete this dispatch.
203 LONG result
= InterlockedDecrement(&mTasksOutstanding
);
204 SWR_ASSERT(result
>= 0, "Should never oversubscribe work");
206 return (result
== 0) ? true : false;
209 //////////////////////////////////////////////////////////////////////////
210 /// @brief Work is complete once both the available/outstanding counts have reached 0.
211 bool isWorkComplete()
213 return ((mTasksAvailable
<= 0) &&
214 (mTasksOutstanding
<= 0));
217 //////////////////////////////////////////////////////////////////////////
218 /// @brief Return pointer to task data.
219 const void* GetTasksData()
224 void *operator new(size_t size
);
225 void operator delete (void *p
);
227 void* mpTaskData
; // The API thread will set this up and the callback task function will interpet this.
229 OSALIGNLINE(volatile LONG
) mTasksAvailable
{ 0 };
230 OSALIGNLINE(volatile LONG
) mTasksOutstanding
{ 0 };
236 HOTTILE_INVALID
, // tile is in unitialized state and should be loaded with surface contents before rendering
237 HOTTILE_CLEAR
, // tile should be cleared
238 HOTTILE_DIRTY
, // tile has been rendered to
239 HOTTILE_RESOLVED
, // tile has been stored to memory
246 DWORD clearData
[4]; // May need to change based on pfnClearTile implementation. Reorder for alignment?
248 uint32_t renderTargetArrayIndex
; // current render target array index loaded
255 HOTTILE Color
[SWR_NUM_RENDERTARGETS
];
259 HOTTILE Attachment
[SWR_NUM_ATTACHMENTS
];
267 memset(&mHotTiles
[0][0], 0, sizeof(mHotTiles
));
269 // cache hottile size
270 for (uint32_t i
= SWR_ATTACHMENT_COLOR0
; i
<= SWR_ATTACHMENT_COLOR7
; ++i
)
272 mHotTileSize
[i
] = KNOB_MACROTILE_X_DIM
* KNOB_MACROTILE_Y_DIM
* FormatTraits
<KNOB_COLOR_HOT_TILE_FORMAT
>::bpp
/ 8;
274 mHotTileSize
[SWR_ATTACHMENT_DEPTH
] = KNOB_MACROTILE_X_DIM
* KNOB_MACROTILE_Y_DIM
* FormatTraits
<KNOB_DEPTH_HOT_TILE_FORMAT
>::bpp
/ 8;
275 mHotTileSize
[SWR_ATTACHMENT_STENCIL
] = KNOB_MACROTILE_X_DIM
* KNOB_MACROTILE_Y_DIM
* FormatTraits
<KNOB_STENCIL_HOT_TILE_FORMAT
>::bpp
/ 8;
280 for (int x
= 0; x
< KNOB_NUM_HOT_TILES_X
; ++x
)
282 for (int y
= 0; y
< KNOB_NUM_HOT_TILES_Y
; ++y
)
284 for (int a
= 0; a
< SWR_NUM_ATTACHMENTS
; ++a
)
286 if (mHotTiles
[x
][y
].Attachment
[a
].pBuffer
!= NULL
)
288 _aligned_free(mHotTiles
[x
][y
].Attachment
[a
].pBuffer
);
289 mHotTiles
[x
][y
].Attachment
[a
].pBuffer
= NULL
;
296 HOTTILE
*GetHotTile(SWR_CONTEXT
* pContext
, DRAW_CONTEXT
* pDC
, uint32_t macroID
, SWR_RENDERTARGET_ATTACHMENT attachment
, bool create
, uint32_t numSamples
= 1,
297 uint32_t renderTargetArrayIndex
= 0)
300 MacroTileMgr::getTileIndices(macroID
, x
, y
);
302 SWR_ASSERT(x
< KNOB_NUM_HOT_TILES_X
);
303 SWR_ASSERT(y
< KNOB_NUM_HOT_TILES_Y
);
305 HotTileSet
&tile
= mHotTiles
[x
][y
];
306 HOTTILE
& hotTile
= tile
.Attachment
[attachment
];
307 if (hotTile
.pBuffer
== NULL
)
311 uint32_t size
= numSamples
* mHotTileSize
[attachment
];
312 hotTile
.pBuffer
= (BYTE
*)_aligned_malloc(size
, KNOB_SIMD_WIDTH
* 4);
313 hotTile
.state
= HOTTILE_INVALID
;
314 hotTile
.numSamples
= numSamples
;
315 hotTile
.renderTargetArrayIndex
= renderTargetArrayIndex
;
324 // free the old tile and create a new one with enough space to hold all samples
325 if (numSamples
> hotTile
.numSamples
)
327 // tile should be either uninitialized or resolved if we're deleting and switching to a
329 SWR_ASSERT((hotTile
.state
== HOTTILE_INVALID
) ||
330 (hotTile
.state
== HOTTILE_RESOLVED
) ||
331 (hotTile
.state
== HOTTILE_CLEAR
));
332 _aligned_free(hotTile
.pBuffer
);
334 uint32_t size
= numSamples
* mHotTileSize
[attachment
];
335 hotTile
.pBuffer
= (BYTE
*)_aligned_malloc(size
, KNOB_SIMD_WIDTH
* 4);
336 hotTile
.state
= HOTTILE_INVALID
;
337 hotTile
.numSamples
= numSamples
;
340 // if requested render target array index isn't currently loaded, need to store out the current hottile
341 // and load the requested array slice
342 if (renderTargetArrayIndex
!= hotTile
.renderTargetArrayIndex
)
347 case SWR_ATTACHMENT_COLOR0
:
348 case SWR_ATTACHMENT_COLOR1
:
349 case SWR_ATTACHMENT_COLOR2
:
350 case SWR_ATTACHMENT_COLOR3
:
351 case SWR_ATTACHMENT_COLOR4
:
352 case SWR_ATTACHMENT_COLOR5
:
353 case SWR_ATTACHMENT_COLOR6
:
354 case SWR_ATTACHMENT_COLOR7
: format
= KNOB_COLOR_HOT_TILE_FORMAT
; break;
355 case SWR_ATTACHMENT_DEPTH
: format
= KNOB_DEPTH_HOT_TILE_FORMAT
; break;
356 case SWR_ATTACHMENT_STENCIL
: format
= KNOB_STENCIL_HOT_TILE_FORMAT
; break;
357 default: SWR_ASSERT(false, "Unknown attachment: %d", attachment
); format
= KNOB_COLOR_HOT_TILE_FORMAT
; break;
360 if (hotTile
.state
== HOTTILE_DIRTY
)
362 pContext
->pfnStoreTile(GetPrivateState(pDC
), format
, attachment
,
363 x
* KNOB_MACROTILE_X_DIM
, y
* KNOB_MACROTILE_Y_DIM
, hotTile
.renderTargetArrayIndex
, hotTile
.pBuffer
);
366 pContext
->pfnLoadTile(GetPrivateState(pDC
), format
, attachment
,
367 x
* KNOB_MACROTILE_X_DIM
, y
* KNOB_MACROTILE_Y_DIM
, renderTargetArrayIndex
, hotTile
.pBuffer
);
369 hotTile
.renderTargetArrayIndex
= renderTargetArrayIndex
;
370 hotTile
.state
= HOTTILE_DIRTY
;
373 return &tile
.Attachment
[attachment
];
376 HotTileSet
&GetHotTile(uint32_t macroID
)
379 MacroTileMgr::getTileIndices(macroID
, x
, y
);
380 SWR_ASSERT(x
< KNOB_NUM_HOT_TILES_X
);
381 SWR_ASSERT(y
< KNOB_NUM_HOT_TILES_Y
);
383 return mHotTiles
[x
][y
];
387 HotTileSet mHotTiles
[KNOB_NUM_HOT_TILES_X
][KNOB_NUM_HOT_TILES_Y
];
388 uint32_t mHotTileSize
[SWR_NUM_ATTACHMENTS
];