1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief API implementation
27 ******************************************************************************/
34 #include "core/backend.h"
35 #include "core/context.h"
36 #include "core/frontend.h"
37 #include "core/rasterizer.h"
38 #include "core/rdtsc_core.h"
39 #include "core/threads.h"
40 #include "core/tilemgr.h"
41 #include "core/clip.h"
43 #include "common/simdintrin.h"
44 #include "common/os.h"
46 void SetupDefaultState(SWR_CONTEXT
*pContext
);
48 //////////////////////////////////////////////////////////////////////////
49 /// @brief Create SWR Context.
50 /// @param pCreateInfo - pointer to creation info.
51 HANDLE
SwrCreateContext(
52 SWR_CREATECONTEXT_INFO
* pCreateInfo
)
57 void* pContextMem
= _aligned_malloc(sizeof(SWR_CONTEXT
), KNOB_SIMD_WIDTH
* 4);
58 memset(pContextMem
, 0, sizeof(SWR_CONTEXT
));
59 SWR_CONTEXT
*pContext
= new (pContextMem
) SWR_CONTEXT();
61 pContext
->driverType
= pCreateInfo
->driver
;
62 pContext
->privateStateSize
= pCreateInfo
->privateStateSize
;
64 pContext
->dcRing
.Init(KNOB_MAX_DRAWS_IN_FLIGHT
);
65 pContext
->dsRing
.Init(KNOB_MAX_DRAWS_IN_FLIGHT
);
67 for (uint32_t dc
= 0; dc
< KNOB_MAX_DRAWS_IN_FLIGHT
; ++dc
)
69 pContext
->dcRing
[dc
].pArena
= new CachingArena(pContext
->cachingArenaAllocator
);
70 pContext
->dcRing
[dc
].pTileMgr
= new MacroTileMgr(*(pContext
->dcRing
[dc
].pArena
));
71 pContext
->dcRing
[dc
].pDispatch
= new DispatchQueue(); /// @todo Could lazily allocate this if Dispatch seen.
73 pContext
->dsRing
[dc
].pArena
= new CachingArena(pContext
->cachingArenaAllocator
);
76 if (!KNOB_SINGLE_THREADED
)
78 memset(&pContext
->WaitLock
, 0, sizeof(pContext
->WaitLock
));
79 memset(&pContext
->FifosNotEmpty
, 0, sizeof(pContext
->FifosNotEmpty
));
80 new (&pContext
->WaitLock
) std::mutex();
81 new (&pContext
->FifosNotEmpty
) std::condition_variable();
83 CreateThreadPool(pContext
, &pContext
->threadPool
);
86 // Calling createThreadPool() above can set SINGLE_THREADED
87 if (KNOB_SINGLE_THREADED
)
89 pContext
->NumWorkerThreads
= 1;
92 // Allocate scratch space for workers.
93 ///@note We could lazily allocate this but its rather small amount of memory.
94 for (uint32_t i
= 0; i
< pContext
->NumWorkerThreads
; ++i
)
96 ///@todo Use numa API for allocations using numa information from thread data (if exists).
97 pContext
->pScratch
[i
] = (uint8_t*)_aligned_malloc((32 * 1024), KNOB_SIMD_WIDTH
* 4);
100 // State setup AFTER context is fully initialized
101 SetupDefaultState(pContext
);
103 // initialize hot tile manager
104 pContext
->pHotTileMgr
= new HotTileMgr();
106 // initialize function pointer tables
107 InitClearTilesTable();
109 // initialize store tiles function
110 pContext
->pfnLoadTile
= pCreateInfo
->pfnLoadTile
;
111 pContext
->pfnStoreTile
= pCreateInfo
->pfnStoreTile
;
112 pContext
->pfnClearTile
= pCreateInfo
->pfnClearTile
;
114 // pass pointer to bucket manager back to caller
115 #ifdef KNOB_ENABLE_RDTSC
116 pCreateInfo
->pBucketMgr
= &gBucketMgr
;
119 pCreateInfo
->contextSaveSize
= sizeof(API_STATE
);
121 return (HANDLE
)pContext
;
124 void SwrDestroyContext(HANDLE hContext
)
126 SWR_CONTEXT
*pContext
= (SWR_CONTEXT
*)hContext
;
127 DestroyThreadPool(pContext
, &pContext
->threadPool
);
130 for (uint32_t i
= 0; i
< KNOB_MAX_DRAWS_IN_FLIGHT
; ++i
)
132 delete pContext
->dcRing
[i
].pArena
;
133 delete pContext
->dsRing
[i
].pArena
;
134 delete(pContext
->dcRing
[i
].pTileMgr
);
135 delete(pContext
->dcRing
[i
].pDispatch
);
138 // Free scratch space.
139 for (uint32_t i
= 0; i
< pContext
->NumWorkerThreads
; ++i
)
141 _aligned_free(pContext
->pScratch
[i
]);
144 delete(pContext
->pHotTileMgr
);
146 pContext
->~SWR_CONTEXT();
147 _aligned_free((SWR_CONTEXT
*)hContext
);
150 void CopyState(DRAW_STATE
& dst
, const DRAW_STATE
& src
)
152 memcpy(&dst
.state
, &src
.state
, sizeof(API_STATE
));
155 void WakeAllThreads(SWR_CONTEXT
*pContext
)
157 pContext
->FifosNotEmpty
.notify_all();
160 template<bool IsDraw
>
161 void QueueWork(SWR_CONTEXT
*pContext
)
165 // Each worker thread looks at a DC for both FE and BE work at different times and so we
166 // multiply threadDone by 2. When the threadDone counter has reached 0 then all workers
167 // have moved past this DC. (i.e. Each worker has checked this DC for both FE and BE work and
168 // then moved on if all work is done.)
169 pContext
->pCurDrawContext
->threadsDone
=
170 pContext
->NumWorkerThreads
? pContext
->NumWorkerThreads
* 2 : 2;
174 pContext
->pCurDrawContext
->threadsDone
=
175 pContext
->NumWorkerThreads
? pContext
->NumWorkerThreads
: 1;
180 std::unique_lock
<std::mutex
> lock(pContext
->WaitLock
);
181 pContext
->dcRing
.Enqueue();
184 if (KNOB_SINGLE_THREADED
)
186 // flush denormals to 0
187 uint32_t mxcsr
= _mm_getcsr();
188 _mm_setcsr(mxcsr
| _MM_FLUSH_ZERO_ON
| _MM_DENORMALS_ZERO_ON
);
192 static TileSet lockedTiles
;
193 uint64_t curDraw
[2] = { pContext
->pCurDrawContext
->drawId
, pContext
->pCurDrawContext
->drawId
};
194 WorkOnFifoFE(pContext
, 0, curDraw
[0], 0);
195 WorkOnFifoBE(pContext
, 0, curDraw
[1], lockedTiles
);
199 uint64_t curDispatch
= pContext
->pCurDrawContext
->drawId
;
200 WorkOnCompute(pContext
, 0, curDispatch
);
203 // Dequeue the work here, if not already done, since we're single threaded (i.e. no workers).
204 if (!pContext
->dcRing
.IsEmpty())
206 pContext
->dcRing
.Dequeue();
214 RDTSC_START(APIDrawWakeAllThreads
);
215 WakeAllThreads(pContext
);
216 RDTSC_STOP(APIDrawWakeAllThreads
, 1, 0);
219 // Set current draw context to NULL so that next state call forces a new draw context to be created and populated.
220 pContext
->pPrevDrawContext
= pContext
->pCurDrawContext
;
221 pContext
->pCurDrawContext
= nullptr;
224 INLINE
void QueueDraw(SWR_CONTEXT
* pContext
)
226 QueueWork
<true>(pContext
);
229 INLINE
void QueueDispatch(SWR_CONTEXT
* pContext
)
231 QueueWork
<false>(pContext
);
234 DRAW_CONTEXT
* GetDrawContext(SWR_CONTEXT
*pContext
, bool isSplitDraw
= false)
236 RDTSC_START(APIGetDrawContext
);
237 // If current draw context is null then need to obtain a new draw context to use from ring.
238 if (pContext
->pCurDrawContext
== nullptr)
240 // Need to wait for a free entry.
241 while (pContext
->dcRing
.IsFull())
246 uint32_t dcIndex
= pContext
->dcRing
.GetHead() % KNOB_MAX_DRAWS_IN_FLIGHT
;
248 DRAW_CONTEXT
* pCurDrawContext
= &pContext
->dcRing
[dcIndex
];
249 pContext
->pCurDrawContext
= pCurDrawContext
;
251 // Assign next available entry in DS ring to this DC.
252 uint32_t dsIndex
= pContext
->curStateId
% KNOB_MAX_DRAWS_IN_FLIGHT
;
253 pCurDrawContext
->pState
= &pContext
->dsRing
[dsIndex
];
255 auto& stateArena
= *(pCurDrawContext
->pState
->pArena
);
257 // Copy previous state to current state.
258 if (pContext
->pPrevDrawContext
)
260 DRAW_CONTEXT
* pPrevDrawContext
= pContext
->pPrevDrawContext
;
262 // If we're splitting our draw then we can just use the same state from the previous
263 // draw. In this case, we won't increment the DS ring index so the next non-split
264 // draw can receive the state.
265 if (isSplitDraw
== false)
267 CopyState(*pCurDrawContext
->pState
, *pPrevDrawContext
->pState
);
269 stateArena
.Reset(true); // Reset memory.
270 pCurDrawContext
->pState
->pPrivateState
= nullptr;
272 pContext
->curStateId
++; // Progress state ring index forward.
276 // If its a split draw then just copy the state pointer over
277 // since its the same draw.
278 pCurDrawContext
->pState
= pPrevDrawContext
->pState
;
283 stateArena
.Reset(); // Reset memory.
284 pContext
->curStateId
++; // Progress state ring index forward.
287 pCurDrawContext
->dependency
= 0;
288 pCurDrawContext
->pArena
->Reset();
289 pCurDrawContext
->pContext
= pContext
;
290 pCurDrawContext
->isCompute
= false; // Dispatch has to set this to true.
292 pCurDrawContext
->doneFE
= false;
293 pCurDrawContext
->FeLock
= 0;
294 pCurDrawContext
->threadsDone
= 0;
296 pCurDrawContext
->pTileMgr
->initialize();
298 // Assign unique drawId for this DC
299 pCurDrawContext
->drawId
= pContext
->dcRing
.GetHead();
301 pCurDrawContext
->cleanupState
= true;
305 SWR_ASSERT(isSplitDraw
== false, "Split draw should only be used when obtaining a new DC");
308 RDTSC_STOP(APIGetDrawContext
, 0, 0);
309 return pContext
->pCurDrawContext
;
312 API_STATE
* GetDrawState(SWR_CONTEXT
*pContext
)
314 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
315 SWR_ASSERT(pDC
->pState
!= nullptr);
317 return &pDC
->pState
->state
;
320 void SWR_API
SwrSaveState(
322 void* pOutputStateBlock
,
325 SWR_CONTEXT
*pContext
= (SWR_CONTEXT
*)hContext
;
326 auto pSrc
= GetDrawState(pContext
);
327 SWR_ASSERT(pOutputStateBlock
&& memSize
>= sizeof(*pSrc
));
329 memcpy(pOutputStateBlock
, pSrc
, sizeof(*pSrc
));
332 void SWR_API
SwrRestoreState(
334 const void* pStateBlock
,
337 SWR_CONTEXT
*pContext
= (SWR_CONTEXT
*)hContext
;
338 auto pDst
= GetDrawState(pContext
);
339 SWR_ASSERT(pStateBlock
&& memSize
>= sizeof(*pDst
));
341 memcpy(pDst
, pStateBlock
, sizeof(*pDst
));
344 void SetupDefaultState(SWR_CONTEXT
*pContext
)
346 API_STATE
* pState
= GetDrawState(pContext
);
348 pState
->rastState
.cullMode
= SWR_CULLMODE_NONE
;
349 pState
->rastState
.frontWinding
= SWR_FRONTWINDING_CCW
;
352 static INLINE SWR_CONTEXT
* GetContext(HANDLE hContext
)
354 return (SWR_CONTEXT
*)hContext
;
357 void SwrSync(HANDLE hContext
, PFN_CALLBACK_FUNC pfnFunc
, uint64_t userData
, uint64_t userData2
, uint64_t userData3
)
359 RDTSC_START(APISync
);
361 SWR_ASSERT(pfnFunc
!= nullptr);
363 SWR_CONTEXT
*pContext
= GetContext(hContext
);
364 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
366 pDC
->FeWork
.type
= SYNC
;
367 pDC
->FeWork
.pfnWork
= ProcessSync
;
368 pDC
->FeWork
.desc
.sync
.pfnCallbackFunc
= pfnFunc
;
369 pDC
->FeWork
.desc
.sync
.userData
= userData
;
370 pDC
->FeWork
.desc
.sync
.userData2
= userData2
;
371 pDC
->FeWork
.desc
.sync
.userData3
= userData3
;
373 // cannot execute until all previous draws have completed
374 pDC
->dependency
= pDC
->drawId
- 1;
379 RDTSC_STOP(APISync
, 1, 0);
382 void SwrWaitForIdle(HANDLE hContext
)
384 SWR_CONTEXT
*pContext
= GetContext(hContext
);
386 RDTSC_START(APIWaitForIdle
);
388 while (!pContext
->dcRing
.IsEmpty())
393 RDTSC_STOP(APIWaitForIdle
, 1, 0);
396 void SwrSetVertexBuffers(
399 const SWR_VERTEX_BUFFER_STATE
* pVertexBuffers
)
401 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
403 for (uint32_t i
= 0; i
< numBuffers
; ++i
)
405 const SWR_VERTEX_BUFFER_STATE
*pVB
= &pVertexBuffers
[i
];
406 pState
->vertexBuffers
[pVB
->index
] = *pVB
;
410 void SwrSetIndexBuffer(
412 const SWR_INDEX_BUFFER_STATE
* pIndexBuffer
)
414 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
416 pState
->indexBuffer
= *pIndexBuffer
;
419 void SwrSetFetchFunc(
421 PFN_FETCH_FUNC pfnFetchFunc
)
423 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
425 pState
->pfnFetchFunc
= pfnFetchFunc
;
430 PFN_SO_FUNC pfnSoFunc
,
431 uint32_t streamIndex
)
433 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
435 SWR_ASSERT(streamIndex
< MAX_SO_STREAMS
);
437 pState
->pfnSoFunc
[streamIndex
] = pfnSoFunc
;
442 SWR_STREAMOUT_STATE
* pSoState
)
444 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
446 pState
->soState
= *pSoState
;
449 void SwrSetSoBuffers(
451 SWR_STREAMOUT_BUFFER
* pSoBuffer
,
454 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
456 SWR_ASSERT((slot
< 4), "There are only 4 SO buffer slots [0, 3]\nSlot requested: %d", slot
);
458 pState
->soBuffer
[slot
] = *pSoBuffer
;
461 void SwrSetVertexFunc(
463 PFN_VERTEX_FUNC pfnVertexFunc
)
465 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
467 pState
->pfnVertexFunc
= pfnVertexFunc
;
470 void SwrSetFrontendState(
472 SWR_FRONTEND_STATE
*pFEState
)
474 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
475 pState
->frontendState
= *pFEState
;
480 SWR_GS_STATE
*pGSState
)
482 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
483 pState
->gsState
= *pGSState
;
488 PFN_GS_FUNC pfnGsFunc
)
490 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
491 pState
->pfnGsFunc
= pfnGsFunc
;
496 PFN_CS_FUNC pfnCsFunc
,
497 uint32_t totalThreadsInGroup
)
499 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
500 pState
->pfnCsFunc
= pfnCsFunc
;
501 pState
->totalThreadsInGroup
= totalThreadsInGroup
;
506 SWR_TS_STATE
*pState
)
508 API_STATE
* pApiState
= GetDrawState(GetContext(hContext
));
509 pApiState
->tsState
= *pState
;
516 API_STATE
* pApiState
= GetDrawState(GetContext(hContext
));
517 pApiState
->pfnHsFunc
= pfnFunc
;
524 API_STATE
* pApiState
= GetDrawState(GetContext(hContext
));
525 pApiState
->pfnDsFunc
= pfnFunc
;
528 void SwrSetDepthStencilState(
530 SWR_DEPTH_STENCIL_STATE
*pDSState
)
532 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
534 pState
->depthStencilState
= *pDSState
;
537 void SwrSetBackendState(
539 SWR_BACKEND_STATE
*pBEState
)
541 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
543 pState
->backendState
= *pBEState
;
546 void SwrSetPixelShaderState(
548 SWR_PS_STATE
*pPSState
)
550 API_STATE
*pState
= GetDrawState(GetContext(hContext
));
551 pState
->psState
= *pPSState
;
554 void SwrSetBlendState(
556 SWR_BLEND_STATE
*pBlendState
)
558 API_STATE
*pState
= GetDrawState(GetContext(hContext
));
559 memcpy(&pState
->blendState
, pBlendState
, sizeof(SWR_BLEND_STATE
));
562 void SwrSetBlendFunc(
564 uint32_t renderTarget
,
565 PFN_BLEND_JIT_FUNC pfnBlendFunc
)
567 SWR_ASSERT(renderTarget
< SWR_NUM_RENDERTARGETS
);
568 API_STATE
*pState
= GetDrawState(GetContext(hContext
));
569 pState
->pfnBlendFunc
[renderTarget
] = pfnBlendFunc
;
577 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
579 static const uint8_t IDENTITY_MAP
[] =
581 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
582 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
584 static_assert(sizeof(IDENTITY_MAP
) == sizeof(pState
->linkageMap
),
585 "Update for new value of MAX_ATTRIBUTES");
587 pState
->linkageMask
= mask
;
588 pState
->linkageCount
= _mm_popcnt_u32(mask
);
594 memcpy(pState
->linkageMap
, pMap
, pState
->linkageCount
);
597 // update guardband multipliers for the viewport
598 void updateGuardband(API_STATE
*pState
)
600 // guardband center is viewport center
601 pState
->gbState
.left
= KNOB_GUARDBAND_WIDTH
/ pState
->vp
[0].width
;
602 pState
->gbState
.right
= KNOB_GUARDBAND_WIDTH
/ pState
->vp
[0].width
;
603 pState
->gbState
.top
= KNOB_GUARDBAND_HEIGHT
/ pState
->vp
[0].height
;
604 pState
->gbState
.bottom
= KNOB_GUARDBAND_HEIGHT
/ pState
->vp
[0].height
;
607 void SwrSetRastState(
609 const SWR_RASTSTATE
*pRastState
)
611 SWR_CONTEXT
*pContext
= GetContext(hContext
);
612 API_STATE
* pState
= GetDrawState(pContext
);
614 memcpy(&pState
->rastState
, pRastState
, sizeof(SWR_RASTSTATE
));
617 void SwrSetViewports(
619 uint32_t numViewports
,
620 const SWR_VIEWPORT
* pViewports
,
621 const SWR_VIEWPORT_MATRIX
* pMatrices
)
623 SWR_ASSERT(numViewports
<= KNOB_NUM_VIEWPORTS_SCISSORS
,
624 "Invalid number of viewports.");
626 SWR_CONTEXT
*pContext
= GetContext(hContext
);
627 API_STATE
* pState
= GetDrawState(pContext
);
629 memcpy(&pState
->vp
[0], pViewports
, sizeof(SWR_VIEWPORT
) * numViewports
);
631 if (pMatrices
!= nullptr)
633 memcpy(&pState
->vpMatrix
[0], pMatrices
, sizeof(SWR_VIEWPORT_MATRIX
) * numViewports
);
637 // Compute default viewport transform.
638 for (uint32_t i
= 0; i
< numViewports
; ++i
)
640 if (pContext
->driverType
== DX
)
642 pState
->vpMatrix
[i
].m00
= pState
->vp
[i
].width
/ 2.0f
;
643 pState
->vpMatrix
[i
].m11
= -pState
->vp
[i
].height
/ 2.0f
;
644 pState
->vpMatrix
[i
].m22
= pState
->vp
[i
].maxZ
- pState
->vp
[i
].minZ
;
645 pState
->vpMatrix
[i
].m30
= pState
->vp
[i
].x
+ pState
->vpMatrix
[i
].m00
;
646 pState
->vpMatrix
[i
].m31
= pState
->vp
[i
].y
- pState
->vpMatrix
[i
].m11
;
647 pState
->vpMatrix
[i
].m32
= pState
->vp
[i
].minZ
;
651 // Standard, with the exception that Y is inverted.
652 pState
->vpMatrix
[i
].m00
= (pState
->vp
[i
].width
- pState
->vp
[i
].x
) / 2.0f
;
653 pState
->vpMatrix
[i
].m11
= (pState
->vp
[i
].y
- pState
->vp
[i
].height
) / 2.0f
;
654 pState
->vpMatrix
[i
].m22
= (pState
->vp
[i
].maxZ
- pState
->vp
[i
].minZ
) / 2.0f
;
655 pState
->vpMatrix
[i
].m30
= pState
->vp
[i
].x
+ pState
->vpMatrix
[i
].m00
;
656 pState
->vpMatrix
[i
].m31
= pState
->vp
[i
].height
+ pState
->vpMatrix
[i
].m11
;
657 pState
->vpMatrix
[i
].m32
= pState
->vp
[i
].minZ
+ pState
->vpMatrix
[i
].m22
;
659 // Now that the matrix is calculated, clip the view coords to screen size.
660 // OpenGL allows for -ve x,y in the viewport.
661 pState
->vp
[i
].x
= std::max(pState
->vp
[i
].x
, 0.0f
);
662 pState
->vp
[i
].y
= std::max(pState
->vp
[i
].y
, 0.0f
);
667 updateGuardband(pState
);
670 void SwrSetScissorRects(
672 uint32_t numScissors
,
673 const BBOX
* pScissors
)
675 SWR_ASSERT(numScissors
<= KNOB_NUM_VIEWPORTS_SCISSORS
,
676 "Invalid number of scissor rects.");
678 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
679 memcpy(&pState
->scissorRects
[0], pScissors
, numScissors
* sizeof(BBOX
));
682 void SetupMacroTileScissors(DRAW_CONTEXT
*pDC
)
684 API_STATE
*pState
= &pDC
->pState
->state
;
685 uint32_t left
, right
, top
, bottom
;
687 // Set up scissor dimensions based on scissor or viewport
688 if (pState
->rastState
.scissorEnable
)
690 // scissor rect right/bottom edge are exclusive, core expects scissor dimensions to be inclusive, so subtract one pixel from right/bottom edges
691 left
= pState
->scissorRects
[0].left
;
692 right
= pState
->scissorRects
[0].right
;
693 top
= pState
->scissorRects
[0].top
;
694 bottom
= pState
->scissorRects
[0].bottom
;
698 left
= (int32_t)pState
->vp
[0].x
;
699 right
= (int32_t)pState
->vp
[0].x
+ (int32_t)pState
->vp
[0].width
;
700 top
= (int32_t)pState
->vp
[0].y
;
701 bottom
= (int32_t)pState
->vp
[0].y
+ (int32_t)pState
->vp
[0].height
;
704 right
= std::min
<uint32_t>(right
, KNOB_MAX_SCISSOR_X
);
705 bottom
= std::min
<uint32_t>(bottom
, KNOB_MAX_SCISSOR_Y
);
707 if (left
> KNOB_MAX_SCISSOR_X
|| top
> KNOB_MAX_SCISSOR_Y
)
709 pState
->scissorInFixedPoint
.left
= 0;
710 pState
->scissorInFixedPoint
.right
= 0;
711 pState
->scissorInFixedPoint
.top
= 0;
712 pState
->scissorInFixedPoint
.bottom
= 0;
716 pState
->scissorInFixedPoint
.left
= left
* FIXED_POINT_SCALE
;
717 pState
->scissorInFixedPoint
.right
= right
* FIXED_POINT_SCALE
- 1;
718 pState
->scissorInFixedPoint
.top
= top
* FIXED_POINT_SCALE
;
719 pState
->scissorInFixedPoint
.bottom
= bottom
* FIXED_POINT_SCALE
- 1;
722 // templated backend function tables
723 extern PFN_BACKEND_FUNC gBackendNullPs
[SWR_MULTISAMPLE_TYPE_MAX
];
724 extern PFN_BACKEND_FUNC gBackendSingleSample
[2][2];
725 extern PFN_BACKEND_FUNC gBackendPixelRateTable
[SWR_MULTISAMPLE_TYPE_MAX
][SWR_MSAA_SAMPLE_PATTERN_MAX
][SWR_INPUT_COVERAGE_MAX
][2][2];
726 extern PFN_BACKEND_FUNC gBackendSampleRateTable
[SWR_MULTISAMPLE_TYPE_MAX
][SWR_INPUT_COVERAGE_MAX
][2];
727 extern PFN_OUTPUT_MERGER gBackendOutputMergerTable
[SWR_NUM_RENDERTARGETS
+ 1][SWR_MULTISAMPLE_TYPE_MAX
];
728 extern PFN_CALC_PIXEL_BARYCENTRICS gPixelBarycentricTable
[2];
729 extern PFN_CALC_SAMPLE_BARYCENTRICS gSampleBarycentricTable
[2];
730 extern PFN_CALC_CENTROID_BARYCENTRICS gCentroidBarycentricTable
[SWR_MULTISAMPLE_TYPE_MAX
][2][2][2];
731 void SetupPipeline(DRAW_CONTEXT
*pDC
)
733 DRAW_STATE
* pState
= pDC
->pState
;
734 const SWR_RASTSTATE
&rastState
= pState
->state
.rastState
;
735 const SWR_PS_STATE
&psState
= pState
->state
.psState
;
736 BACKEND_FUNCS
& backendFuncs
= pState
->backendFuncs
;
737 const uint32_t forcedSampleCount
= (rastState
.bForcedSampleCount
) ? 1 : 0;
740 if (psState
.pfnPixelShader
== nullptr)
742 backendFuncs
.pfnBackend
= gBackendNullPs
[pState
->state
.rastState
.sampleCount
];
743 // always need to generate I & J per sample for Z interpolation
744 backendFuncs
.pfnCalcSampleBarycentrics
= gSampleBarycentricTable
[1];
748 const bool bMultisampleEnable
= ((rastState
.sampleCount
> SWR_MULTISAMPLE_1X
) || rastState
.bForcedSampleCount
) ? 1 : 0;
749 const uint32_t centroid
= ((psState
.barycentricsMask
& SWR_BARYCENTRIC_CENTROID_MASK
) > 0) ? 1 : 0;
751 // currently only support 'normal' input coverage
752 SWR_ASSERT(psState
.inputCoverage
== SWR_INPUT_COVERAGE_NORMAL
||
753 psState
.inputCoverage
== SWR_INPUT_COVERAGE_NONE
);
755 SWR_BARYCENTRICS_MASK barycentricsMask
= (SWR_BARYCENTRICS_MASK
)psState
.barycentricsMask
;
757 // select backend function
758 switch(psState
.shadingRate
)
760 case SWR_SHADING_RATE_PIXEL
:
761 if(bMultisampleEnable
)
763 // always need to generate I & J per sample for Z interpolation
764 barycentricsMask
= (SWR_BARYCENTRICS_MASK
)(barycentricsMask
| SWR_BARYCENTRIC_PER_SAMPLE_MASK
);
765 backendFuncs
.pfnBackend
= gBackendPixelRateTable
[rastState
.sampleCount
][rastState
.samplePattern
][psState
.inputCoverage
][centroid
][forcedSampleCount
];
766 backendFuncs
.pfnOutputMerger
= gBackendOutputMergerTable
[psState
.numRenderTargets
][pState
->state
.blendState
.sampleCount
];
770 // always need to generate I & J per pixel for Z interpolation
771 barycentricsMask
= (SWR_BARYCENTRICS_MASK
)(barycentricsMask
| SWR_BARYCENTRIC_PER_PIXEL_MASK
);
772 backendFuncs
.pfnBackend
= gBackendSingleSample
[psState
.inputCoverage
][centroid
];
773 backendFuncs
.pfnOutputMerger
= gBackendOutputMergerTable
[psState
.numRenderTargets
][SWR_MULTISAMPLE_1X
];
776 case SWR_SHADING_RATE_SAMPLE
:
777 SWR_ASSERT(rastState
.samplePattern
== SWR_MSAA_STANDARD_PATTERN
);
778 // always need to generate I & J per sample for Z interpolation
779 barycentricsMask
= (SWR_BARYCENTRICS_MASK
)(barycentricsMask
| SWR_BARYCENTRIC_PER_SAMPLE_MASK
);
780 backendFuncs
.pfnBackend
= gBackendSampleRateTable
[rastState
.sampleCount
][psState
.inputCoverage
][centroid
];
781 backendFuncs
.pfnOutputMerger
= gBackendOutputMergerTable
[psState
.numRenderTargets
][pState
->state
.blendState
.sampleCount
];
784 SWR_ASSERT(0 && "Invalid shading rate");
788 // setup pointer to function that generates necessary barycentrics required by the PS
789 bool bBarycentrics
= (barycentricsMask
& SWR_BARYCENTRIC_PER_PIXEL_MASK
) > 0 ? 1 : 0;
790 backendFuncs
.pfnCalcPixelBarycentrics
= gPixelBarycentricTable
[bBarycentrics
];
792 bBarycentrics
= (barycentricsMask
& SWR_BARYCENTRIC_PER_SAMPLE_MASK
) > 0 ? 1 : 0;
793 backendFuncs
.pfnCalcSampleBarycentrics
= gSampleBarycentricTable
[bBarycentrics
];
795 bBarycentrics
= (barycentricsMask
& SWR_BARYCENTRIC_CENTROID_MASK
) > 0 ? 1 : 0;
796 backendFuncs
.pfnCalcCentroidBarycentrics
= gCentroidBarycentricTable
[rastState
.sampleCount
][bBarycentrics
][rastState
.samplePattern
][forcedSampleCount
];
799 PFN_PROCESS_PRIMS pfnBinner
;
800 switch (pState
->state
.topology
)
803 pState
->pfnProcessPrims
= ClipPoints
;
804 pfnBinner
= BinPoints
;
809 case TOP_LINE_LIST_ADJ
:
810 case TOP_LISTSTRIP_ADJ
:
811 pState
->pfnProcessPrims
= ClipLines
;
812 pfnBinner
= BinLines
;
815 pState
->pfnProcessPrims
= ClipTriangles
;
816 pfnBinner
= BinTriangles
;
820 // disable clipper if viewport transform is disabled
821 if (pState
->state
.frontendState
.vpTransformDisable
)
823 pState
->pfnProcessPrims
= pfnBinner
;
826 if ((pState
->state
.psState
.pfnPixelShader
== nullptr) &&
827 (pState
->state
.depthStencilState
.depthTestEnable
== FALSE
) &&
828 (pState
->state
.depthStencilState
.depthWriteEnable
== FALSE
) &&
829 (pState
->state
.depthStencilState
.stencilTestEnable
== FALSE
) &&
830 (pState
->state
.depthStencilState
.stencilWriteEnable
== FALSE
) &&
831 (pState
->state
.linkageCount
== 0))
833 pState
->pfnProcessPrims
= nullptr;
834 pState
->state
.linkageMask
= 0;
837 if (pState
->state
.soState
.rasterizerDisable
== true)
839 pState
->pfnProcessPrims
= nullptr;
840 pState
->state
.linkageMask
= 0;
843 // set up the frontend attrib mask
844 pState
->state
.feAttribMask
= pState
->state
.linkageMask
;
845 if (pState
->state
.soState
.soEnable
)
847 for (uint32_t i
= 0; i
< 4; ++i
)
849 pState
->state
.feAttribMask
|= pState
->state
.soState
.streamMasks
[i
];
853 // complicated logic to test for cases where we don't need backing hottile memory for a draw
854 // have to check for the special case where depth/stencil test is enabled but depthwrite is disabled.
855 pState
->state
.depthHottileEnable
= ((!(pState
->state
.depthStencilState
.depthTestEnable
&&
856 !pState
->state
.depthStencilState
.depthWriteEnable
&&
857 pState
->state
.depthStencilState
.depthTestFunc
== ZFUNC_ALWAYS
)) &&
858 (pState
->state
.depthStencilState
.depthTestEnable
||
859 pState
->state
.depthStencilState
.depthWriteEnable
)) ? true : false;
861 pState
->state
.stencilHottileEnable
= (((!(pState
->state
.depthStencilState
.stencilTestEnable
&&
862 !pState
->state
.depthStencilState
.stencilWriteEnable
&&
863 pState
->state
.depthStencilState
.stencilTestFunc
== ZFUNC_ALWAYS
)) ||
864 // for stencil we have to check the double sided state as well
865 (!(pState
->state
.depthStencilState
.doubleSidedStencilTestEnable
&&
866 !pState
->state
.depthStencilState
.stencilWriteEnable
&&
867 pState
->state
.depthStencilState
.backfaceStencilTestFunc
== ZFUNC_ALWAYS
))) &&
868 (pState
->state
.depthStencilState
.stencilTestEnable
||
869 pState
->state
.depthStencilState
.stencilWriteEnable
)) ? true : false;
871 uint32_t numRTs
= pState
->state
.psState
.numRenderTargets
;
872 pState
->state
.colorHottileEnable
= 0;
873 if (psState
.pfnPixelShader
!= nullptr)
875 for (uint32_t rt
= 0; rt
< numRTs
; ++rt
)
877 pState
->state
.colorHottileEnable
|=
878 (!pState
->state
.blendState
.renderTarget
[rt
].writeDisableAlpha
||
879 !pState
->state
.blendState
.renderTarget
[rt
].writeDisableRed
||
880 !pState
->state
.blendState
.renderTarget
[rt
].writeDisableGreen
||
881 !pState
->state
.blendState
.renderTarget
[rt
].writeDisableBlue
) ? (1 << rt
) : 0;
886 //////////////////////////////////////////////////////////////////////////
888 /// @param pDC - Draw context to initialize for this draw.
893 // We don't need to re-setup the scissors/pipeline state again for split draw.
894 if (isSplitDraw
== false)
896 SetupMacroTileScissors(pDC
);
901 //////////////////////////////////////////////////////////////////////////
902 /// @brief We can split the draw for certain topologies for better performance.
903 /// @param totalVerts - Total vertices for draw
904 /// @param topology - Topology used for draw
905 uint32_t MaxVertsPerDraw(
908 PRIMITIVE_TOPOLOGY topology
)
910 API_STATE
& state
= pDC
->pState
->state
;
912 uint32_t vertsPerDraw
= totalVerts
;
914 if (state
.soState
.soEnable
)
922 case TOP_TRIANGLE_LIST
:
923 vertsPerDraw
= KNOB_MAX_PRIMS_PER_DRAW
;
926 case TOP_PATCHLIST_1
:
927 case TOP_PATCHLIST_2
:
928 case TOP_PATCHLIST_3
:
929 case TOP_PATCHLIST_4
:
930 case TOP_PATCHLIST_5
:
931 case TOP_PATCHLIST_6
:
932 case TOP_PATCHLIST_7
:
933 case TOP_PATCHLIST_8
:
934 case TOP_PATCHLIST_9
:
935 case TOP_PATCHLIST_10
:
936 case TOP_PATCHLIST_11
:
937 case TOP_PATCHLIST_12
:
938 case TOP_PATCHLIST_13
:
939 case TOP_PATCHLIST_14
:
940 case TOP_PATCHLIST_15
:
941 case TOP_PATCHLIST_16
:
942 case TOP_PATCHLIST_17
:
943 case TOP_PATCHLIST_18
:
944 case TOP_PATCHLIST_19
:
945 case TOP_PATCHLIST_20
:
946 case TOP_PATCHLIST_21
:
947 case TOP_PATCHLIST_22
:
948 case TOP_PATCHLIST_23
:
949 case TOP_PATCHLIST_24
:
950 case TOP_PATCHLIST_25
:
951 case TOP_PATCHLIST_26
:
952 case TOP_PATCHLIST_27
:
953 case TOP_PATCHLIST_28
:
954 case TOP_PATCHLIST_29
:
955 case TOP_PATCHLIST_30
:
956 case TOP_PATCHLIST_31
:
957 case TOP_PATCHLIST_32
:
958 if (pDC
->pState
->state
.tsState
.tsEnable
)
960 uint32_t vertsPerPrim
= topology
- TOP_PATCHLIST_BASE
;
961 vertsPerDraw
= vertsPerPrim
* KNOB_MAX_TESS_PRIMS_PER_DRAW
;
965 // The Primitive Assembly code can only handle 1 RECT at a time.
971 // We are not splitting up draws for other topologies.
978 // Recursive template used to auto-nest conditionals. Converts dynamic boolean function
979 // arguments to static template arguments.
980 template <bool... ArgsB
>
983 // Last Arg Terminator
984 static PFN_FE_WORK_FUNC
GetFunc(bool bArg
)
988 return ProcessDraw
<ArgsB
..., true>;
991 return ProcessDraw
<ArgsB
..., false>;
994 // Recursively parse args
995 template <typename
... TArgsT
>
996 static PFN_FE_WORK_FUNC
GetFunc(bool bArg
, TArgsT
... remainingArgs
)
1000 return FEDrawChooser
<ArgsB
..., true>::GetFunc(remainingArgs
...);
1003 return FEDrawChooser
<ArgsB
..., false>::GetFunc(remainingArgs
...);
1007 // Selector for correct templated Draw front-end function
1009 static PFN_FE_WORK_FUNC
GetFEDrawFunc(bool IsIndexed
, bool HasTessellation
, bool HasGeometryShader
, bool HasStreamOut
, bool RasterizerEnabled
)
1011 return FEDrawChooser
<>::GetFunc(IsIndexed
, HasTessellation
, HasGeometryShader
, HasStreamOut
, RasterizerEnabled
);
1015 //////////////////////////////////////////////////////////////////////////
1016 /// @brief DrawInstanced
1017 /// @param hContext - Handle passed back from SwrCreateContext
1018 /// @param topology - Specifies topology for draw.
1019 /// @param numVerts - How many vertices to read sequentially from vertex data (per instance).
1020 /// @param startVertex - Specifies start vertex for draw. (vertex data)
1021 /// @param numInstances - How many instances to render.
1022 /// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
1025 PRIMITIVE_TOPOLOGY topology
,
1026 uint32_t numVertices
,
1027 uint32_t startVertex
,
1028 uint32_t numInstances
= 1,
1029 uint32_t startInstance
= 0)
1036 RDTSC_START(APIDraw
);
1038 SWR_CONTEXT
*pContext
= GetContext(hContext
);
1039 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1041 int32_t maxVertsPerDraw
= MaxVertsPerDraw(pDC
, numVertices
, topology
);
1042 uint32_t primsPerDraw
= GetNumPrims(topology
, maxVertsPerDraw
);
1043 int32_t remainingVerts
= numVertices
;
1045 API_STATE
*pState
= &pDC
->pState
->state
;
1046 pState
->topology
= topology
;
1047 pState
->forceFront
= false;
1049 // disable culling for points/lines
1050 uint32_t oldCullMode
= pState
->rastState
.cullMode
;
1051 if (topology
== TOP_POINT_LIST
)
1053 pState
->rastState
.cullMode
= SWR_CULLMODE_NONE
;
1054 pState
->forceFront
= true;
1058 while (remainingVerts
)
1060 uint32_t numVertsForDraw
= (remainingVerts
< maxVertsPerDraw
) ?
1061 remainingVerts
: maxVertsPerDraw
;
1063 bool isSplitDraw
= (draw
> 0) ? true : false;
1064 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
, isSplitDraw
);
1065 InitDraw(pDC
, isSplitDraw
);
1067 pDC
->FeWork
.type
= DRAW
;
1068 pDC
->FeWork
.pfnWork
= GetFEDrawFunc(
1070 pState
->tsState
.tsEnable
,
1071 pState
->gsState
.gsEnable
,
1072 pState
->soState
.soEnable
,
1073 pDC
->pState
->pfnProcessPrims
!= nullptr);
1074 pDC
->FeWork
.desc
.draw
.numVerts
= numVertsForDraw
;
1075 pDC
->FeWork
.desc
.draw
.startVertex
= startVertex
;
1076 pDC
->FeWork
.desc
.draw
.numInstances
= numInstances
;
1077 pDC
->FeWork
.desc
.draw
.startInstance
= startInstance
;
1078 pDC
->FeWork
.desc
.draw
.startPrimID
= draw
* primsPerDraw
;
1079 pDC
->FeWork
.desc
.draw
.startVertexID
= draw
* maxVertsPerDraw
;
1081 pDC
->cleanupState
= (remainingVerts
== numVertsForDraw
);
1084 QueueDraw(pContext
);
1086 remainingVerts
-= numVertsForDraw
;
1090 // restore culling state
1091 pDC
= GetDrawContext(pContext
);
1092 pDC
->pState
->state
.rastState
.cullMode
= oldCullMode
;
1094 RDTSC_STOP(APIDraw
, numVertices
* numInstances
, 0);
1097 //////////////////////////////////////////////////////////////////////////
1099 /// @param hContext - Handle passed back from SwrCreateContext
1100 /// @param topology - Specifies topology for draw.
1101 /// @param startVertex - Specifies start vertex in vertex buffer for draw.
1102 /// @param primCount - Number of vertices.
1105 PRIMITIVE_TOPOLOGY topology
,
1106 uint32_t startVertex
,
1107 uint32_t numVertices
)
1109 DrawInstanced(hContext
, topology
, numVertices
, startVertex
);
1112 //////////////////////////////////////////////////////////////////////////
1113 /// @brief SwrDrawInstanced
1114 /// @param hContext - Handle passed back from SwrCreateContext
1115 /// @param topology - Specifies topology for draw.
1116 /// @param numVertsPerInstance - How many vertices to read sequentially from vertex data.
1117 /// @param numInstances - How many instances to render.
1118 /// @param startVertex - Specifies start vertex for draw. (vertex data)
1119 /// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
1120 void SwrDrawInstanced(
1122 PRIMITIVE_TOPOLOGY topology
,
1123 uint32_t numVertsPerInstance
,
1124 uint32_t numInstances
,
1125 uint32_t startVertex
,
1126 uint32_t startInstance
1129 DrawInstanced(hContext
, topology
, numVertsPerInstance
, startVertex
, numInstances
, startInstance
);
1132 //////////////////////////////////////////////////////////////////////////
1133 /// @brief DrawIndexedInstanced
1134 /// @param hContext - Handle passed back from SwrCreateContext
1135 /// @param topology - Specifies topology for draw.
1136 /// @param numIndices - Number of indices to read sequentially from index buffer.
1137 /// @param indexOffset - Starting index into index buffer.
1138 /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
1139 /// @param numInstances - Number of instances to render.
1140 /// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
1141 void DrawIndexedInstance(
1143 PRIMITIVE_TOPOLOGY topology
,
1144 uint32_t numIndices
,
1145 uint32_t indexOffset
,
1147 uint32_t numInstances
= 1,
1148 uint32_t startInstance
= 0)
1155 RDTSC_START(APIDrawIndexed
);
1157 SWR_CONTEXT
*pContext
= GetContext(hContext
);
1158 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1159 API_STATE
* pState
= &pDC
->pState
->state
;
1161 int32_t maxIndicesPerDraw
= MaxVertsPerDraw(pDC
, numIndices
, topology
);
1162 uint32_t primsPerDraw
= GetNumPrims(topology
, maxIndicesPerDraw
);
1163 int32_t remainingIndices
= numIndices
;
1165 uint32_t indexSize
= 0;
1166 switch (pState
->indexBuffer
.format
)
1168 case R32_UINT
: indexSize
= sizeof(uint32_t); break;
1169 case R16_UINT
: indexSize
= sizeof(uint16_t); break;
1170 case R8_UINT
: indexSize
= sizeof(uint8_t); break;
1176 uint8_t *pIB
= (uint8_t*)pState
->indexBuffer
.pIndices
;
1177 pIB
+= (uint64_t)indexOffset
* (uint64_t)indexSize
;
1179 pState
->topology
= topology
;
1180 pState
->forceFront
= false;
1182 // disable culling for points/lines
1183 uint32_t oldCullMode
= pState
->rastState
.cullMode
;
1184 if (topology
== TOP_POINT_LIST
)
1186 pState
->rastState
.cullMode
= SWR_CULLMODE_NONE
;
1187 pState
->forceFront
= true;
1190 while (remainingIndices
)
1192 uint32_t numIndicesForDraw
= (remainingIndices
< maxIndicesPerDraw
) ?
1193 remainingIndices
: maxIndicesPerDraw
;
1195 // When breaking up draw, we need to obtain new draw context for each iteration.
1196 bool isSplitDraw
= (draw
> 0) ? true : false;
1197 pDC
= GetDrawContext(pContext
, isSplitDraw
);
1198 InitDraw(pDC
, isSplitDraw
);
1200 pDC
->FeWork
.type
= DRAW
;
1201 pDC
->FeWork
.pfnWork
= GetFEDrawFunc(
1203 pState
->tsState
.tsEnable
,
1204 pState
->gsState
.gsEnable
,
1205 pState
->soState
.soEnable
,
1206 pDC
->pState
->pfnProcessPrims
!= nullptr);
1207 pDC
->FeWork
.desc
.draw
.pDC
= pDC
;
1208 pDC
->FeWork
.desc
.draw
.numIndices
= numIndicesForDraw
;
1209 pDC
->FeWork
.desc
.draw
.pIB
= (int*)pIB
;
1210 pDC
->FeWork
.desc
.draw
.type
= pDC
->pState
->state
.indexBuffer
.format
;
1212 pDC
->FeWork
.desc
.draw
.numInstances
= numInstances
;
1213 pDC
->FeWork
.desc
.draw
.startInstance
= startInstance
;
1214 pDC
->FeWork
.desc
.draw
.baseVertex
= baseVertex
;
1215 pDC
->FeWork
.desc
.draw
.startPrimID
= draw
* primsPerDraw
;
1217 pDC
->cleanupState
= (remainingIndices
== numIndicesForDraw
);
1220 QueueDraw(pContext
);
1222 pIB
+= maxIndicesPerDraw
* indexSize
;
1223 remainingIndices
-= numIndicesForDraw
;
1227 // restore culling state
1228 pDC
= GetDrawContext(pContext
);
1229 pDC
->pState
->state
.rastState
.cullMode
= oldCullMode
;
1231 RDTSC_STOP(APIDrawIndexed
, numIndices
* numInstances
, 0);
1235 //////////////////////////////////////////////////////////////////////////
1236 /// @brief DrawIndexed
1237 /// @param hContext - Handle passed back from SwrCreateContext
1238 /// @param topology - Specifies topology for draw.
1239 /// @param numIndices - Number of indices to read sequentially from index buffer.
1240 /// @param indexOffset - Starting index into index buffer.
1241 /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
1242 void SwrDrawIndexed(
1244 PRIMITIVE_TOPOLOGY topology
,
1245 uint32_t numIndices
,
1246 uint32_t indexOffset
,
1250 DrawIndexedInstance(hContext
, topology
, numIndices
, indexOffset
, baseVertex
);
1253 //////////////////////////////////////////////////////////////////////////
1254 /// @brief SwrDrawIndexedInstanced
1255 /// @param hContext - Handle passed back from SwrCreateContext
1256 /// @param topology - Specifies topology for draw.
1257 /// @param numIndices - Number of indices to read sequentially from index buffer.
1258 /// @param numInstances - Number of instances to render.
1259 /// @param indexOffset - Starting index into index buffer.
1260 /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
1261 /// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
1262 void SwrDrawIndexedInstanced(
1264 PRIMITIVE_TOPOLOGY topology
,
1265 uint32_t numIndices
,
1266 uint32_t numInstances
,
1267 uint32_t indexOffset
,
1269 uint32_t startInstance
)
1271 DrawIndexedInstance(hContext
, topology
, numIndices
, indexOffset
, baseVertex
, numInstances
, startInstance
);
1274 //////////////////////////////////////////////////////////////////////////
1275 /// @brief SwrInvalidateTiles
1276 /// @param hContext - Handle passed back from SwrCreateContext
1277 /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate.
1278 void SwrInvalidateTiles(
1280 uint32_t attachmentMask
)
1282 SWR_CONTEXT
*pContext
= (SWR_CONTEXT
*)hContext
;
1283 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1285 pDC
->FeWork
.type
= DISCARDINVALIDATETILES
;
1286 pDC
->FeWork
.pfnWork
= ProcessDiscardInvalidateTiles
;
1287 pDC
->FeWork
.desc
.discardInvalidateTiles
.attachmentMask
= attachmentMask
;
1288 memset(&pDC
->FeWork
.desc
.discardInvalidateTiles
.rect
, 0, sizeof(SWR_RECT
));
1289 pDC
->FeWork
.desc
.discardInvalidateTiles
.newTileState
= SWR_TILE_INVALID
;
1290 pDC
->FeWork
.desc
.discardInvalidateTiles
.createNewTiles
= false;
1291 pDC
->FeWork
.desc
.discardInvalidateTiles
.fullTilesOnly
= false;
1294 QueueDraw(pContext
);
1297 //////////////////////////////////////////////////////////////////////////
1298 /// @brief SwrDiscardRect
1299 /// @param hContext - Handle passed back from SwrCreateContext
1300 /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard.
1301 /// @param rect - if rect is all zeros, the entire attachment surface will be discarded
1302 void SwrDiscardRect(
1304 uint32_t attachmentMask
,
1307 SWR_CONTEXT
*pContext
= (SWR_CONTEXT
*)hContext
;
1308 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1310 // Queue a load to the hottile
1311 pDC
->FeWork
.type
= DISCARDINVALIDATETILES
;
1312 pDC
->FeWork
.pfnWork
= ProcessDiscardInvalidateTiles
;
1313 pDC
->FeWork
.desc
.discardInvalidateTiles
.attachmentMask
= attachmentMask
;
1314 pDC
->FeWork
.desc
.discardInvalidateTiles
.rect
= rect
;
1315 pDC
->FeWork
.desc
.discardInvalidateTiles
.newTileState
= SWR_TILE_RESOLVED
;
1316 pDC
->FeWork
.desc
.discardInvalidateTiles
.createNewTiles
= true;
1317 pDC
->FeWork
.desc
.discardInvalidateTiles
.fullTilesOnly
= true;
1320 QueueDraw(pContext
);
1323 //////////////////////////////////////////////////////////////////////////
1324 /// @brief SwrDispatch
1325 /// @param hContext - Handle passed back from SwrCreateContext
1326 /// @param threadGroupCountX - Number of thread groups dispatched in X direction
1327 /// @param threadGroupCountY - Number of thread groups dispatched in Y direction
1328 /// @param threadGroupCountZ - Number of thread groups dispatched in Z direction
1331 uint32_t threadGroupCountX
,
1332 uint32_t threadGroupCountY
,
1333 uint32_t threadGroupCountZ
)
1340 RDTSC_START(APIDispatch
);
1341 SWR_CONTEXT
*pContext
= (SWR_CONTEXT
*)hContext
;
1342 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1344 pDC
->isCompute
= true; // This is a compute context.
1346 // Ensure spill fill pointers are initialized to nullptr.
1347 memset(pDC
->pSpillFill
, 0, sizeof(pDC
->pSpillFill
));
1349 COMPUTE_DESC
* pTaskData
= (COMPUTE_DESC
*)pDC
->pArena
->AllocAligned(sizeof(COMPUTE_DESC
), 64);
1351 pTaskData
->threadGroupCountX
= threadGroupCountX
;
1352 pTaskData
->threadGroupCountY
= threadGroupCountY
;
1353 pTaskData
->threadGroupCountZ
= threadGroupCountZ
;
1355 uint32_t totalThreadGroups
= threadGroupCountX
* threadGroupCountY
* threadGroupCountZ
;
1356 pDC
->pDispatch
->initialize(totalThreadGroups
, pTaskData
);
1358 QueueDispatch(pContext
);
1359 RDTSC_STOP(APIDispatch
, threadGroupCountX
* threadGroupCountY
* threadGroupCountZ
, 0);
1362 // Deswizzles, converts and stores current contents of the hot tiles to surface
1363 // described by pState
1366 SWR_RENDERTARGET_ATTACHMENT attachment
,
1367 SWR_TILE_STATE postStoreTileState
)
1369 RDTSC_START(APIStoreTiles
);
1371 SWR_CONTEXT
*pContext
= (SWR_CONTEXT
*)hContext
;
1372 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1374 SetupMacroTileScissors(pDC
);
1376 pDC
->FeWork
.type
= STORETILES
;
1377 pDC
->FeWork
.pfnWork
= ProcessStoreTiles
;
1378 pDC
->FeWork
.desc
.storeTiles
.attachment
= attachment
;
1379 pDC
->FeWork
.desc
.storeTiles
.postStoreTileState
= postStoreTileState
;
1382 QueueDraw(pContext
);
1384 RDTSC_STOP(APIStoreTiles
, 0, 0);
1387 void SwrClearRenderTarget(
1390 const float clearColor
[4],
1394 RDTSC_START(APIClearRenderTarget
);
1396 SWR_CONTEXT
*pContext
= (SWR_CONTEXT
*)hContext
;
1398 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1400 SetupMacroTileScissors(pDC
);
1403 flags
.mask
= clearMask
;
1405 pDC
->FeWork
.type
= CLEAR
;
1406 pDC
->FeWork
.pfnWork
= ProcessClear
;
1407 pDC
->FeWork
.desc
.clear
.flags
= flags
;
1408 pDC
->FeWork
.desc
.clear
.clearDepth
= z
;
1409 pDC
->FeWork
.desc
.clear
.clearRTColor
[0] = clearColor
[0];
1410 pDC
->FeWork
.desc
.clear
.clearRTColor
[1] = clearColor
[1];
1411 pDC
->FeWork
.desc
.clear
.clearRTColor
[2] = clearColor
[2];
1412 pDC
->FeWork
.desc
.clear
.clearRTColor
[3] = clearColor
[3];
1413 pDC
->FeWork
.desc
.clear
.clearStencil
= stencil
;
1416 QueueDraw(pContext
);
1418 RDTSC_STOP(APIClearRenderTarget
, 0, pDC
->drawId
);
1421 //////////////////////////////////////////////////////////////////////////
1422 /// @brief Returns a pointer to the private context state for the current
1423 /// draw operation. This is used for external componets such as the
1425 /// SWR is responsible for the allocation of the private context state.
1426 /// @param hContext - Handle passed back from SwrCreateContext
1427 VOID
* SwrGetPrivateContextState(
1430 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1431 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1432 DRAW_STATE
* pState
= pDC
->pState
;
1434 if (pState
->pPrivateState
== nullptr)
1436 pState
->pPrivateState
= pState
->pArena
->AllocAligned(pContext
->privateStateSize
, KNOB_SIMD_WIDTH
*sizeof(float));
1439 return pState
->pPrivateState
;
1442 //////////////////////////////////////////////////////////////////////////
1443 /// @brief Clients can use this to allocate memory for draw/dispatch
1444 /// operations. The memory will automatically be freed once operation
1445 /// has completed. Client can use this to allocate binding tables,
1446 /// etc. needed for shader execution.
1447 /// @param hContext - Handle passed back from SwrCreateContext
1448 /// @param size - Size of allocation
1449 /// @param align - Alignment needed for allocation.
1450 VOID
* SwrAllocDrawContextMemory(
1455 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1456 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1458 return pDC
->pState
->pArena
->AllocAligned(size
, align
);
1461 //////////////////////////////////////////////////////////////////////////
1462 /// @brief Returns pointer to SWR stats.
1463 /// @note The counters are atomically incremented by multiple threads.
1464 /// When calling this, you need to ensure all previous operations
1466 /// @todo If necessary, add a callback to avoid stalling the pipe to
1467 /// sample the counters.
1468 /// @param hContext - Handle passed back from SwrCreateContext
1469 /// @param pStats - SWR will fill this out for caller.
1474 SWR_CONTEXT
*pContext
= GetContext(hContext
);
1475 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1477 pDC
->FeWork
.type
= QUERYSTATS
;
1478 pDC
->FeWork
.pfnWork
= ProcessQueryStats
;
1479 pDC
->FeWork
.desc
.queryStats
.pStats
= pStats
;
1481 // cannot execute until all previous draws have completed
1482 pDC
->dependency
= pDC
->drawId
- 1;
1485 QueueDraw(pContext
);
1488 //////////////////////////////////////////////////////////////////////////
1489 /// @brief Enables stats counting
1490 /// @param hContext - Handle passed back from SwrCreateContext
1491 /// @param enable - If true then counts are incremented.
1492 void SwrEnableStats(
1496 SWR_CONTEXT
*pContext
= GetContext(hContext
);
1497 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1499 pDC
->pState
->state
.enableStats
= enable
;
1502 //////////////////////////////////////////////////////////////////////////
1503 /// @brief Mark end of frame - used for performance profiling
1504 /// @param hContext - Handle passed back from SwrCreateContext
1505 void SWR_API
SwrEndFrame(