1 /****************************************************************************
2 * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief API implementation
27 ******************************************************************************/
35 #include "core/backend.h"
36 #include "core/context.h"
37 #include "core/depthstencil.h"
38 #include "core/frontend.h"
39 #include "core/rasterizer.h"
40 #include "core/rdtsc_core.h"
41 #include "core/threads.h"
42 #include "core/tilemgr.h"
43 #include "core/clip.h"
44 #include "core/utils.h"
45 #include "core/tileset.h"
47 #include "common/os.h"
49 static const SWR_RECT g_MaxScissorRect
= {0, 0, KNOB_MAX_SCISSOR_X
, KNOB_MAX_SCISSOR_Y
};
51 void SetupDefaultState(SWR_CONTEXT
* pContext
);
53 static INLINE SWR_CONTEXT
* GetContext(HANDLE hContext
)
55 return (SWR_CONTEXT
*)hContext
;
58 void WakeAllThreads(SWR_CONTEXT
* pContext
)
60 pContext
->FifosNotEmpty
.notify_all();
63 //////////////////////////////////////////////////////////////////////////
64 /// @brief Create SWR Context.
65 /// @param pCreateInfo - pointer to creation info.
66 HANDLE
SwrCreateContext(SWR_CREATECONTEXT_INFO
* pCreateInfo
)
68 void* pContextMem
= AlignedMalloc(sizeof(SWR_CONTEXT
), KNOB_SIMD_WIDTH
* 4);
69 memset(pContextMem
, 0, sizeof(SWR_CONTEXT
));
70 SWR_CONTEXT
* pContext
= new (pContextMem
) SWR_CONTEXT();
72 pContext
->privateStateSize
= pCreateInfo
->privateStateSize
;
74 // initialize callback functions
75 pContext
->pfnLoadTile
= pCreateInfo
->pfnLoadTile
;
76 pContext
->pfnStoreTile
= pCreateInfo
->pfnStoreTile
;
77 pContext
->pfnTranslateGfxptrForRead
= pCreateInfo
->pfnTranslateGfxptrForRead
;
78 pContext
->pfnTranslateGfxptrForWrite
= pCreateInfo
->pfnTranslateGfxptrForWrite
;
79 pContext
->pfnMakeGfxPtr
= pCreateInfo
->pfnMakeGfxPtr
;
80 pContext
->pfnCreateMemoryContext
= pCreateInfo
->pfnCreateMemoryContext
;
81 pContext
->pfnDestroyMemoryContext
= pCreateInfo
->pfnDestroyMemoryContext
;
82 pContext
->pfnUpdateSoWriteOffset
= pCreateInfo
->pfnUpdateSoWriteOffset
;
83 pContext
->pfnUpdateStats
= pCreateInfo
->pfnUpdateStats
;
84 pContext
->pfnUpdateStatsFE
= pCreateInfo
->pfnUpdateStatsFE
;
87 pContext
->hExternalMemory
= pCreateInfo
->hExternalMemory
;
89 pContext
->MAX_DRAWS_IN_FLIGHT
= KNOB_MAX_DRAWS_IN_FLIGHT
;
90 if (pCreateInfo
->MAX_DRAWS_IN_FLIGHT
!= 0)
92 pContext
->MAX_DRAWS_IN_FLIGHT
= pCreateInfo
->MAX_DRAWS_IN_FLIGHT
;
95 pContext
->dcRing
.Init(pContext
->MAX_DRAWS_IN_FLIGHT
);
96 pContext
->dsRing
.Init(pContext
->MAX_DRAWS_IN_FLIGHT
);
98 pContext
->pMacroTileManagerArray
=
99 (MacroTileMgr
*)AlignedMalloc(sizeof(MacroTileMgr
) * pContext
->MAX_DRAWS_IN_FLIGHT
, 64);
100 pContext
->pDispatchQueueArray
=
101 (DispatchQueue
*)AlignedMalloc(sizeof(DispatchQueue
) * pContext
->MAX_DRAWS_IN_FLIGHT
, 64);
103 for (uint32_t dc
= 0; dc
< pContext
->MAX_DRAWS_IN_FLIGHT
; ++dc
)
105 pContext
->dcRing
[dc
].pArena
= new CachingArena(pContext
->cachingArenaAllocator
);
106 new (&pContext
->pMacroTileManagerArray
[dc
]) MacroTileMgr(*pContext
->dcRing
[dc
].pArena
);
107 new (&pContext
->pDispatchQueueArray
[dc
]) DispatchQueue();
109 pContext
->dsRing
[dc
].pArena
= new CachingArena(pContext
->cachingArenaAllocator
);
112 if (pCreateInfo
->pThreadInfo
)
114 pContext
->threadInfo
= *pCreateInfo
->pThreadInfo
;
118 pContext
->threadInfo
.MAX_WORKER_THREADS
= KNOB_MAX_WORKER_THREADS
;
119 pContext
->threadInfo
.BASE_NUMA_NODE
= KNOB_BASE_NUMA_NODE
;
120 pContext
->threadInfo
.BASE_CORE
= KNOB_BASE_CORE
;
121 pContext
->threadInfo
.BASE_THREAD
= KNOB_BASE_THREAD
;
122 pContext
->threadInfo
.MAX_NUMA_NODES
= KNOB_MAX_NUMA_NODES
;
123 pContext
->threadInfo
.MAX_CORES_PER_NUMA_NODE
= KNOB_MAX_CORES_PER_NUMA_NODE
;
124 pContext
->threadInfo
.MAX_THREADS_PER_CORE
= KNOB_MAX_THREADS_PER_CORE
;
125 pContext
->threadInfo
.SINGLE_THREADED
= KNOB_SINGLE_THREADED
;
128 if (pCreateInfo
->pApiThreadInfo
)
130 pContext
->apiThreadInfo
= *pCreateInfo
->pApiThreadInfo
;
134 pContext
->apiThreadInfo
.bindAPIThread0
= true;
135 pContext
->apiThreadInfo
.numAPIReservedThreads
= 1;
136 pContext
->apiThreadInfo
.numAPIThreadsPerCore
= 1;
139 if (pCreateInfo
->pWorkerPrivateState
)
141 pContext
->workerPrivateState
= *pCreateInfo
->pWorkerPrivateState
;
144 memset(&pContext
->WaitLock
, 0, sizeof(pContext
->WaitLock
));
145 memset(&pContext
->FifosNotEmpty
, 0, sizeof(pContext
->FifosNotEmpty
));
146 new (&pContext
->WaitLock
) std::mutex();
147 new (&pContext
->FifosNotEmpty
) std::condition_variable();
149 CreateThreadPool(pContext
, &pContext
->threadPool
);
151 if (pContext
->apiThreadInfo
.bindAPIThread0
)
153 BindApiThread(pContext
, 0);
156 if (pContext
->threadInfo
.SINGLE_THREADED
)
158 pContext
->pSingleThreadLockedTiles
= new TileSet();
161 pContext
->ppScratch
= new uint8_t*[pContext
->NumWorkerThreads
];
163 (SWR_STATS
*)AlignedMalloc(sizeof(SWR_STATS
) * pContext
->NumWorkerThreads
, 64);
165 #if defined(KNOB_ENABLE_AR)
166 // Setup ArchRast thread contexts which includes +1 for API thread.
167 pContext
->pArContext
= new HANDLE
[pContext
->NumWorkerThreads
+ 1];
168 pContext
->pArContext
[pContext
->NumWorkerThreads
] =
169 ArchRast::CreateThreadContext(ArchRast::AR_THREAD::API
);
172 #if defined(KNOB_ENABLE_RDTSC)
173 pContext
->pBucketMgr
= new BucketManager(pCreateInfo
->contextName
);
174 RDTSC_RESET(pContext
->pBucketMgr
);
175 RDTSC_INIT(pContext
->pBucketMgr
, 0);
178 // Allocate scratch space for workers.
179 ///@note We could lazily allocate this but its rather small amount of memory.
180 for (uint32_t i
= 0; i
< pContext
->NumWorkerThreads
; ++i
)
184 pContext
->threadPool
.pThreadData
? pContext
->threadPool
.pThreadData
[i
].numaId
: 0;
185 pContext
->ppScratch
[i
] = (uint8_t*)VirtualAllocExNuma(GetCurrentProcess(),
187 KNOB_WORKER_SCRATCH_SPACE_SIZE
,
188 MEM_RESERVE
| MEM_COMMIT
,
192 pContext
->ppScratch
[i
] =
193 (uint8_t*)AlignedMalloc(KNOB_WORKER_SCRATCH_SPACE_SIZE
, KNOB_SIMD_WIDTH
* 4);
196 #if defined(KNOB_ENABLE_AR)
197 // Initialize worker thread context for ArchRast.
198 pContext
->pArContext
[i
] = ArchRast::CreateThreadContext(ArchRast::AR_THREAD::WORKER
);
200 SWR_WORKER_DATA
* pWorkerData
= (SWR_WORKER_DATA
*)pContext
->threadPool
.pThreadData
[i
].pWorkerPrivateData
;
201 pWorkerData
->hArContext
= pContext
->pArContext
[i
];
207 #if defined(KNOB_ENABLE_AR)
208 // cache the API thread event manager, for use with sim layer
209 pCreateInfo
->hArEventManager
= pContext
->pArContext
[pContext
->NumWorkerThreads
];
212 // State setup AFTER context is fully initialized
213 SetupDefaultState(pContext
);
215 // initialize hot tile manager
216 pContext
->pHotTileMgr
= new HotTileMgr();
218 // pass pointer to bucket manager back to caller
219 #ifdef KNOB_ENABLE_RDTSC
220 pCreateInfo
->pBucketMgr
= pContext
->pBucketMgr
;
223 pCreateInfo
->contextSaveSize
= sizeof(API_STATE
);
225 StartThreadPool(pContext
, &pContext
->threadPool
);
227 return (HANDLE
)pContext
;
230 void CopyState(DRAW_STATE
& dst
, const DRAW_STATE
& src
)
232 memcpy(&dst
.state
, &src
.state
, sizeof(API_STATE
));
235 template <bool IsDraw
>
236 void QueueWork(SWR_CONTEXT
* pContext
)
238 DRAW_CONTEXT
* pDC
= pContext
->pCurDrawContext
;
239 uint32_t dcIndex
= pDC
->drawId
% pContext
->MAX_DRAWS_IN_FLIGHT
;
243 pDC
->pTileMgr
= &pContext
->pMacroTileManagerArray
[dcIndex
];
244 pDC
->pTileMgr
->initialize();
247 // Each worker thread looks at a DC for both FE and BE work at different times and so we
248 // multiply threadDone by 2. When the threadDone counter has reached 0 then all workers
249 // have moved past this DC. (i.e. Each worker has checked this DC for both FE and BE work and
250 // then moved on if all work is done.)
251 pContext
->pCurDrawContext
->threadsDone
= pContext
->NumFEThreads
+ pContext
->NumBEThreads
;
255 InterlockedIncrement(&pContext
->drawsOutstandingFE
);
260 std::unique_lock
<std::mutex
> lock(pContext
->WaitLock
);
261 pContext
->dcRing
.Enqueue();
264 if (pContext
->threadInfo
.SINGLE_THREADED
)
266 uint32_t mxcsr
= SetOptimalVectorCSR();
270 uint32_t curDraw
[2] = {pContext
->pCurDrawContext
->drawId
,
271 pContext
->pCurDrawContext
->drawId
};
272 WorkOnFifoFE(pContext
, 0, curDraw
[0]);
273 WorkOnFifoBE(pContext
, 0, curDraw
[1], *pContext
->pSingleThreadLockedTiles
, 0, 0);
277 uint32_t curDispatch
= pContext
->pCurDrawContext
->drawId
;
278 WorkOnCompute(pContext
, 0, curDispatch
);
281 // Dequeue the work here, if not already done, since we're single threaded (i.e. no
283 while (CompleteDrawContext(pContext
, pContext
->pCurDrawContext
) > 0)
288 RestoreVectorCSR(mxcsr
);
292 RDTSC_BEGIN(pContext
->pBucketMgr
, APIDrawWakeAllThreads
, pDC
->drawId
);
293 WakeAllThreads(pContext
);
294 RDTSC_END(pContext
->pBucketMgr
, APIDrawWakeAllThreads
, 1);
297 // Set current draw context to NULL so that next state call forces a new draw context to be
298 // created and populated.
299 pContext
->pPrevDrawContext
= pContext
->pCurDrawContext
;
300 pContext
->pCurDrawContext
= nullptr;
303 INLINE
void QueueDraw(SWR_CONTEXT
* pContext
)
305 QueueWork
<true>(pContext
);
308 INLINE
void QueueDispatch(SWR_CONTEXT
* pContext
)
310 QueueWork
<false>(pContext
);
313 DRAW_CONTEXT
* GetDrawContext(SWR_CONTEXT
* pContext
, bool isSplitDraw
= false)
315 RDTSC_BEGIN(pContext
->pBucketMgr
, APIGetDrawContext
, 0);
316 // If current draw context is null then need to obtain a new draw context to use from ring.
317 if (pContext
->pCurDrawContext
== nullptr)
319 // Need to wait for a free entry.
320 while (pContext
->dcRing
.IsFull())
325 uint64_t curDraw
= pContext
->dcRing
.GetHead();
326 uint32_t dcIndex
= curDraw
% pContext
->MAX_DRAWS_IN_FLIGHT
;
328 if ((pContext
->frameCount
- pContext
->lastFrameChecked
) > 2 ||
329 (curDraw
- pContext
->lastDrawChecked
) > 0x10000)
331 // Take this opportunity to clean-up old arena allocations
332 pContext
->cachingArenaAllocator
.FreeOldBlocks();
334 pContext
->lastFrameChecked
= pContext
->frameCount
;
335 pContext
->lastDrawChecked
= curDraw
;
338 DRAW_CONTEXT
* pCurDrawContext
= &pContext
->dcRing
[dcIndex
];
339 pContext
->pCurDrawContext
= pCurDrawContext
;
341 // Assign next available entry in DS ring to this DC.
342 uint32_t dsIndex
= pContext
->curStateId
% pContext
->MAX_DRAWS_IN_FLIGHT
;
343 pCurDrawContext
->pState
= &pContext
->dsRing
[dsIndex
];
345 // Copy previous state to current state.
346 if (pContext
->pPrevDrawContext
)
348 DRAW_CONTEXT
* pPrevDrawContext
= pContext
->pPrevDrawContext
;
350 // If we're splitting our draw then we can just use the same state from the previous
351 // draw. In this case, we won't increment the DS ring index so the next non-split
352 // draw can receive the state.
353 if (isSplitDraw
== false)
355 CopyState(*pCurDrawContext
->pState
, *pPrevDrawContext
->pState
);
357 // Should have been cleaned up previously
358 SWR_ASSERT(pCurDrawContext
->pState
->pArena
->IsEmpty() == true);
360 pCurDrawContext
->pState
->pPrivateState
= nullptr;
362 pContext
->curStateId
++; // Progress state ring index forward.
366 // If its a split draw then just copy the state pointer over
367 // since its the same draw.
368 pCurDrawContext
->pState
= pPrevDrawContext
->pState
;
369 SWR_ASSERT(pPrevDrawContext
->cleanupState
== false);
374 SWR_ASSERT(pCurDrawContext
->pState
->pArena
->IsEmpty() == true);
375 pContext
->curStateId
++; // Progress state ring index forward.
378 SWR_ASSERT(pCurDrawContext
->pArena
->IsEmpty() == true);
381 pCurDrawContext
->dependent
= false;
382 pCurDrawContext
->dependentFE
= false;
384 pCurDrawContext
->pContext
= pContext
;
385 pCurDrawContext
->isCompute
= false; // Dispatch has to set this to true.
387 pCurDrawContext
->doneFE
= false;
388 pCurDrawContext
->FeLock
= 0;
389 pCurDrawContext
->threadsDone
= 0;
390 pCurDrawContext
->retireCallback
.pfnCallbackFunc
= nullptr;
392 pCurDrawContext
->dynState
.Reset(pContext
->NumWorkerThreads
);
394 // Assign unique drawId for this DC
395 pCurDrawContext
->drawId
= pContext
->dcRing
.GetHead();
397 pCurDrawContext
->cleanupState
= true;
401 SWR_ASSERT(isSplitDraw
== false, "Split draw should only be used when obtaining a new DC");
404 RDTSC_END(pContext
->pBucketMgr
, APIGetDrawContext
, 0);
405 return pContext
->pCurDrawContext
;
408 API_STATE
* GetDrawState(SWR_CONTEXT
* pContext
)
410 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
411 SWR_ASSERT(pDC
->pState
!= nullptr);
413 return &pDC
->pState
->state
;
416 void SwrDestroyContext(HANDLE hContext
)
418 SWR_CONTEXT
* pContext
= GetContext(hContext
);
419 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
421 pDC
->FeWork
.type
= SHUTDOWN
;
422 pDC
->FeWork
.pfnWork
= ProcessShutdown
;
427 DestroyThreadPool(pContext
, &pContext
->threadPool
);
430 for (uint32_t i
= 0; i
< pContext
->MAX_DRAWS_IN_FLIGHT
; ++i
)
432 AlignedFree(pContext
->dcRing
[i
].dynState
.pStats
);
433 delete pContext
->dcRing
[i
].pArena
;
434 delete pContext
->dsRing
[i
].pArena
;
435 pContext
->pMacroTileManagerArray
[i
].~MacroTileMgr();
436 pContext
->pDispatchQueueArray
[i
].~DispatchQueue();
439 AlignedFree(pContext
->pDispatchQueueArray
);
440 AlignedFree(pContext
->pMacroTileManagerArray
);
442 // Free scratch space.
443 for (uint32_t i
= 0; i
< pContext
->NumWorkerThreads
; ++i
)
446 VirtualFree(pContext
->ppScratch
[i
], 0, MEM_RELEASE
);
448 AlignedFree(pContext
->ppScratch
[i
]);
451 #if defined(KNOB_ENABLE_AR)
452 ArchRast::DestroyThreadContext(pContext
->pArContext
[i
]);
456 #if defined(KNOB_ENABLE_RDTSC)
457 delete pContext
->pBucketMgr
;
460 delete[] pContext
->ppScratch
;
461 AlignedFree(pContext
->pStats
);
463 delete pContext
->pHotTileMgr
;
464 delete pContext
->pSingleThreadLockedTiles
;
466 pContext
->~SWR_CONTEXT();
467 AlignedFree(GetContext(hContext
));
470 void SwrBindApiThread(HANDLE hContext
, uint32_t apiThreadId
)
472 SWR_CONTEXT
* pContext
= GetContext(hContext
);
473 BindApiThread(pContext
, apiThreadId
);
476 void SWR_API
SwrSaveState(HANDLE hContext
, void* pOutputStateBlock
, size_t memSize
)
478 SWR_CONTEXT
* pContext
= GetContext(hContext
);
479 auto pSrc
= GetDrawState(pContext
);
480 SWR_ASSERT(pOutputStateBlock
&& memSize
>= sizeof(*pSrc
));
482 memcpy(pOutputStateBlock
, pSrc
, sizeof(*pSrc
));
485 void SWR_API
SwrRestoreState(HANDLE hContext
, const void* pStateBlock
, size_t memSize
)
487 SWR_CONTEXT
* pContext
= GetContext(hContext
);
488 auto pDst
= GetDrawState(pContext
);
489 SWR_ASSERT(pStateBlock
&& memSize
>= sizeof(*pDst
));
491 memcpy(pDst
, pStateBlock
, sizeof(*pDst
));
494 void SetupDefaultState(SWR_CONTEXT
* pContext
)
496 API_STATE
* pState
= GetDrawState(pContext
);
498 pState
->rastState
.cullMode
= SWR_CULLMODE_NONE
;
499 pState
->rastState
.frontWinding
= SWR_FRONTWINDING_CCW
;
501 pState
->depthBoundsState
.depthBoundsTestEnable
= false;
502 pState
->depthBoundsState
.depthBoundsTestMinValue
= 0.0f
;
503 pState
->depthBoundsState
.depthBoundsTestMaxValue
= 1.0f
;
506 void SWR_API
SwrSync(HANDLE hContext
,
507 PFN_CALLBACK_FUNC pfnFunc
,
512 SWR_ASSERT(pfnFunc
!= nullptr);
514 SWR_CONTEXT
* pContext
= GetContext(hContext
);
515 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
517 RDTSC_BEGIN(pContext
->pBucketMgr
, APISync
, 0);
519 pDC
->FeWork
.type
= SYNC
;
520 pDC
->FeWork
.pfnWork
= ProcessSync
;
522 // Setup callback function
523 pDC
->retireCallback
.pfnCallbackFunc
= pfnFunc
;
524 pDC
->retireCallback
.userData
= userData
;
525 pDC
->retireCallback
.userData2
= userData2
;
526 pDC
->retireCallback
.userData3
= userData3
;
528 AR_API_EVENT(SwrSyncEvent(pDC
->drawId
));
533 RDTSC_END(pContext
->pBucketMgr
, APISync
, 1);
536 void SwrStallBE(HANDLE hContext
)
538 SWR_CONTEXT
* pContext
= GetContext(hContext
);
539 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
541 pDC
->dependent
= true;
544 void SwrWaitForIdle(HANDLE hContext
)
546 SWR_CONTEXT
* pContext
= GetContext(hContext
);
548 RDTSC_BEGIN(pContext
->pBucketMgr
, APIWaitForIdle
, 0);
550 while (!pContext
->dcRing
.IsEmpty())
555 RDTSC_END(pContext
->pBucketMgr
, APIWaitForIdle
, 1);
558 void SwrWaitForIdleFE(HANDLE hContext
)
560 SWR_CONTEXT
* pContext
= GetContext(hContext
);
562 RDTSC_BEGIN(pContext
->pBucketMgr
, APIWaitForIdle
, 0);
564 while (pContext
->drawsOutstandingFE
> 0)
569 RDTSC_END(pContext
->pBucketMgr
, APIWaitForIdle
, 1);
572 void SwrSetVertexBuffers(HANDLE hContext
,
574 const SWR_VERTEX_BUFFER_STATE
* pVertexBuffers
)
576 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
578 for (uint32_t i
= 0; i
< numBuffers
; ++i
)
580 const SWR_VERTEX_BUFFER_STATE
* pVB
= &pVertexBuffers
[i
];
581 pState
->vertexBuffers
[pVB
->index
] = *pVB
;
585 void SwrSetIndexBuffer(HANDLE hContext
, const SWR_INDEX_BUFFER_STATE
* pIndexBuffer
)
587 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
589 pState
->indexBuffer
= *pIndexBuffer
;
592 void SwrSetFetchFunc(HANDLE hContext
, PFN_FETCH_FUNC pfnFetchFunc
)
594 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
596 pState
->pfnFetchFunc
= pfnFetchFunc
;
599 void SwrSetSoFunc(HANDLE hContext
, PFN_SO_FUNC pfnSoFunc
, uint32_t streamIndex
)
601 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
603 SWR_ASSERT(streamIndex
< MAX_SO_STREAMS
);
605 pState
->pfnSoFunc
[streamIndex
] = pfnSoFunc
;
608 void SwrSetSoState(HANDLE hContext
, SWR_STREAMOUT_STATE
* pSoState
)
610 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
612 pState
->soState
= *pSoState
;
615 void SwrSetSoBuffers(HANDLE hContext
, SWR_STREAMOUT_BUFFER
* pSoBuffer
, uint32_t slot
)
617 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
619 SWR_ASSERT((slot
< 4), "There are only 4 SO buffer slots [0, 3]\nSlot requested: %d", slot
);
621 pState
->soBuffer
[slot
] = *pSoBuffer
;
624 void SwrSetVertexFunc(HANDLE hContext
, PFN_VERTEX_FUNC pfnVertexFunc
)
626 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
628 pState
->pfnVertexFunc
= pfnVertexFunc
;
631 void SwrSetFrontendState(HANDLE hContext
, SWR_FRONTEND_STATE
* pFEState
)
633 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
634 pState
->frontendState
= *pFEState
;
637 void SwrSetGsState(HANDLE hContext
, SWR_GS_STATE
* pGSState
)
639 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
640 pState
->gsState
= *pGSState
;
643 void SwrSetGsFunc(HANDLE hContext
, PFN_GS_FUNC pfnGsFunc
)
645 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
646 pState
->pfnGsFunc
= pfnGsFunc
;
649 void SwrSetCsFunc(HANDLE hContext
,
650 PFN_CS_FUNC pfnCsFunc
,
651 uint32_t totalThreadsInGroup
,
652 uint32_t totalSpillFillSize
,
653 uint32_t scratchSpaceSizePerWarp
,
656 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
657 pState
->pfnCsFunc
= pfnCsFunc
;
658 pState
->totalThreadsInGroup
= totalThreadsInGroup
;
659 pState
->totalSpillFillSize
= totalSpillFillSize
;
660 pState
->scratchSpaceSizePerWarp
= scratchSpaceSizePerWarp
;
661 pState
->scratchSpaceNumWarps
= numWarps
;
664 void SwrSetTsState(HANDLE hContext
, SWR_TS_STATE
* pState
)
666 API_STATE
* pApiState
= GetDrawState(GetContext(hContext
));
667 pApiState
->tsState
= *pState
;
670 void SwrSetHsFunc(HANDLE hContext
, PFN_HS_FUNC pfnFunc
)
672 API_STATE
* pApiState
= GetDrawState(GetContext(hContext
));
673 pApiState
->pfnHsFunc
= pfnFunc
;
676 void SwrSetDsFunc(HANDLE hContext
, PFN_DS_FUNC pfnFunc
)
678 API_STATE
* pApiState
= GetDrawState(GetContext(hContext
));
679 pApiState
->pfnDsFunc
= pfnFunc
;
682 void SwrSetDepthStencilState(HANDLE hContext
, SWR_DEPTH_STENCIL_STATE
* pDSState
)
684 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
686 pState
->depthStencilState
= *pDSState
;
689 void SwrSetBackendState(HANDLE hContext
, SWR_BACKEND_STATE
* pBEState
)
691 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
693 pState
->backendState
= *pBEState
;
696 void SwrSetDepthBoundsState(HANDLE hContext
, SWR_DEPTH_BOUNDS_STATE
* pDBState
)
698 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
700 pState
->depthBoundsState
= *pDBState
;
703 void SwrSetPixelShaderState(HANDLE hContext
, SWR_PS_STATE
* pPSState
)
705 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
706 pState
->psState
= *pPSState
;
709 void SwrSetBlendState(HANDLE hContext
, SWR_BLEND_STATE
* pBlendState
)
711 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
712 memcpy(&pState
->blendState
, pBlendState
, sizeof(SWR_BLEND_STATE
));
715 void SwrSetBlendFunc(HANDLE hContext
, uint32_t renderTarget
, PFN_BLEND_JIT_FUNC pfnBlendFunc
)
717 SWR_ASSERT(renderTarget
< SWR_NUM_RENDERTARGETS
);
718 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
719 pState
->pfnBlendFunc
[renderTarget
] = pfnBlendFunc
;
722 // update guardband multipliers for the viewport
723 void updateGuardbands(API_STATE
* pState
)
725 uint32_t numGbs
= pState
->backendState
.readViewportArrayIndex
? KNOB_NUM_VIEWPORTS_SCISSORS
: 1;
727 for (uint32_t i
= 0; i
< numGbs
; ++i
)
729 // guardband center is viewport center
730 pState
->gbState
.left
[i
] = KNOB_GUARDBAND_WIDTH
/ pState
->vp
[i
].width
;
731 pState
->gbState
.right
[i
] = KNOB_GUARDBAND_WIDTH
/ pState
->vp
[i
].width
;
732 pState
->gbState
.top
[i
] = KNOB_GUARDBAND_HEIGHT
/ pState
->vp
[i
].height
;
733 pState
->gbState
.bottom
[i
] = KNOB_GUARDBAND_HEIGHT
/ pState
->vp
[i
].height
;
737 void SwrSetRastState(HANDLE hContext
, const SWR_RASTSTATE
* pRastState
)
739 SWR_CONTEXT
* pContext
= GetContext(hContext
);
740 API_STATE
* pState
= GetDrawState(pContext
);
742 memcpy(&pState
->rastState
, pRastState
, sizeof(SWR_RASTSTATE
));
745 void SwrSetViewports(HANDLE hContext
,
746 uint32_t numViewports
,
747 const SWR_VIEWPORT
* pViewports
,
748 const SWR_VIEWPORT_MATRICES
* pMatrices
)
750 SWR_ASSERT(numViewports
<= KNOB_NUM_VIEWPORTS_SCISSORS
, "Invalid number of viewports.");
752 SWR_CONTEXT
* pContext
= GetContext(hContext
);
753 API_STATE
* pState
= GetDrawState(pContext
);
755 memcpy(&pState
->vp
[0], pViewports
, sizeof(SWR_VIEWPORT
) * numViewports
);
756 // @todo Faster to copy portions of the SOA or just copy all of it?
757 memcpy(&pState
->vpMatrices
, pMatrices
, sizeof(SWR_VIEWPORT_MATRICES
));
760 void SwrSetScissorRects(HANDLE hContext
, uint32_t numScissors
, const SWR_RECT
* pScissors
)
762 SWR_ASSERT(numScissors
<= KNOB_NUM_VIEWPORTS_SCISSORS
, "Invalid number of scissor rects.");
764 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
765 memcpy(&pState
->scissorRects
[0], pScissors
, numScissors
* sizeof(pScissors
[0]));
768 void SetupMacroTileScissors(DRAW_CONTEXT
* pDC
)
770 API_STATE
* pState
= &pDC
->pState
->state
;
771 uint32_t numScissors
=
772 pState
->backendState
.readViewportArrayIndex
? KNOB_NUM_VIEWPORTS_SCISSORS
: 1;
773 pState
->scissorsTileAligned
= true;
775 for (uint32_t index
= 0; index
< numScissors
; ++index
)
777 SWR_RECT
& scissorInFixedPoint
= pState
->scissorsInFixedPoint
[index
];
779 // Set up scissor dimensions based on scissor or viewport
780 if (pState
->rastState
.scissorEnable
)
782 scissorInFixedPoint
= pState
->scissorRects
[index
];
786 // the vp width and height must be added to origin un-rounded then the result round to
787 // -inf. The cast to int works for rounding assuming all [left, right, top, bottom] are
789 scissorInFixedPoint
.xmin
= (int32_t)pState
->vp
[index
].x
;
790 scissorInFixedPoint
.xmax
= (int32_t)(pState
->vp
[index
].x
+ pState
->vp
[index
].width
);
791 scissorInFixedPoint
.ymin
= (int32_t)pState
->vp
[index
].y
;
792 scissorInFixedPoint
.ymax
= (int32_t)(pState
->vp
[index
].y
+ pState
->vp
[index
].height
);
796 scissorInFixedPoint
&= g_MaxScissorRect
;
798 // Test for tile alignment
800 tileAligned
= (scissorInFixedPoint
.xmin
% KNOB_TILE_X_DIM
) == 0;
801 tileAligned
&= (scissorInFixedPoint
.ymin
% KNOB_TILE_Y_DIM
) == 0;
802 tileAligned
&= (scissorInFixedPoint
.xmax
% KNOB_TILE_X_DIM
) == 0;
803 tileAligned
&= (scissorInFixedPoint
.ymax
% KNOB_TILE_Y_DIM
) == 0;
805 pState
->scissorsTileAligned
&= tileAligned
;
807 // Scale to fixed point
808 scissorInFixedPoint
.xmin
*= FIXED_POINT_SCALE
;
809 scissorInFixedPoint
.xmax
*= FIXED_POINT_SCALE
;
810 scissorInFixedPoint
.ymin
*= FIXED_POINT_SCALE
;
811 scissorInFixedPoint
.ymax
*= FIXED_POINT_SCALE
;
813 // Make scissor inclusive
814 scissorInFixedPoint
.xmax
-= 1;
815 scissorInFixedPoint
.ymax
-= 1;
820 // templated backend function tables
822 void SetupPipeline(DRAW_CONTEXT
* pDC
)
824 DRAW_STATE
* pState
= pDC
->pState
;
825 const SWR_RASTSTATE
& rastState
= pState
->state
.rastState
;
826 const SWR_PS_STATE
& psState
= pState
->state
.psState
;
827 BACKEND_FUNCS
& backendFuncs
= pState
->backendFuncs
;
830 if (psState
.pfnPixelShader
== nullptr)
832 backendFuncs
.pfnBackend
= gBackendNullPs
[pState
->state
.rastState
.sampleCount
];
836 const uint32_t forcedSampleCount
= (rastState
.forcedSampleCount
) ? 1 : 0;
837 const bool bMultisampleEnable
=
838 ((rastState
.sampleCount
> SWR_MULTISAMPLE_1X
) || forcedSampleCount
) ? 1 : 0;
839 const uint32_t centroid
=
840 ((psState
.barycentricsMask
& SWR_BARYCENTRIC_CENTROID_MASK
) > 0) ? 1 : 0;
841 const uint32_t canEarlyZ
=
842 (psState
.forceEarlyZ
|| (!psState
.writesODepth
&& !psState
.usesUAV
)) ? 1 : 0;
843 SWR_BARYCENTRICS_MASK barycentricsMask
= (SWR_BARYCENTRICS_MASK
)psState
.barycentricsMask
;
845 // select backend function
846 switch (psState
.shadingRate
)
848 case SWR_SHADING_RATE_PIXEL
:
849 if (bMultisampleEnable
)
851 // always need to generate I & J per sample for Z interpolation
853 (SWR_BARYCENTRICS_MASK
)(barycentricsMask
| SWR_BARYCENTRIC_PER_SAMPLE_MASK
);
854 backendFuncs
.pfnBackend
=
855 gBackendPixelRateTable
[rastState
.sampleCount
][rastState
.bIsCenterPattern
]
856 [psState
.inputCoverage
][centroid
][forcedSampleCount
]
862 // always need to generate I & J per pixel for Z interpolation
864 (SWR_BARYCENTRICS_MASK
)(barycentricsMask
| SWR_BARYCENTRIC_PER_PIXEL_MASK
);
865 backendFuncs
.pfnBackend
=
866 gBackendSingleSample
[psState
.inputCoverage
][centroid
][canEarlyZ
];
869 case SWR_SHADING_RATE_SAMPLE
:
870 SWR_ASSERT(rastState
.bIsCenterPattern
!= true);
871 // always need to generate I & J per sample for Z interpolation
873 (SWR_BARYCENTRICS_MASK
)(barycentricsMask
| SWR_BARYCENTRIC_PER_SAMPLE_MASK
);
874 backendFuncs
.pfnBackend
=
875 gBackendSampleRateTable
[rastState
.sampleCount
][psState
.inputCoverage
][centroid
]
879 SWR_ASSERT(0 && "Invalid shading rate");
884 SWR_ASSERT(backendFuncs
.pfnBackend
);
886 PFN_PROCESS_PRIMS pfnBinner
;
887 #if USE_SIMD16_FRONTEND
888 PFN_PROCESS_PRIMS_SIMD16 pfnBinner_simd16
;
890 switch (pState
->state
.topology
)
893 pState
->pfnProcessPrims
= ClipPoints
;
894 pfnBinner
= BinPoints
;
895 #if USE_SIMD16_FRONTEND
896 pState
->pfnProcessPrims_simd16
= ClipPoints_simd16
;
897 pfnBinner_simd16
= BinPoints_simd16
;
903 case TOP_LINE_LIST_ADJ
:
904 case TOP_LISTSTRIP_ADJ
:
905 pState
->pfnProcessPrims
= ClipLines
;
906 pfnBinner
= BinLines
;
907 #if USE_SIMD16_FRONTEND
908 pState
->pfnProcessPrims_simd16
= ClipLines_simd16
;
909 pfnBinner_simd16
= BinLines_simd16
;
913 pState
->pfnProcessPrims
= ClipTriangles
;
914 pfnBinner
= GetBinTrianglesFunc((rastState
.conservativeRast
> 0));
915 #if USE_SIMD16_FRONTEND
916 pState
->pfnProcessPrims_simd16
= ClipTriangles_simd16
;
917 pfnBinner_simd16
= GetBinTrianglesFunc_simd16((rastState
.conservativeRast
> 0));
923 // Disable clipper if viewport transform is disabled or if clipper is disabled
924 if (pState
->state
.frontendState
.vpTransformDisable
|| !pState
->state
.rastState
.clipEnable
)
926 pState
->pfnProcessPrims
= pfnBinner
;
927 #if USE_SIMD16_FRONTEND
928 pState
->pfnProcessPrims_simd16
= pfnBinner_simd16
;
932 // Disable rasterizer and backend if no pixel, no depth/stencil, and no attributes
933 if ((pState
->state
.psState
.pfnPixelShader
== nullptr) &&
934 (pState
->state
.depthStencilState
.depthTestEnable
== FALSE
) &&
935 (pState
->state
.depthStencilState
.depthWriteEnable
== FALSE
) &&
936 (pState
->state
.depthStencilState
.stencilTestEnable
== FALSE
) &&
937 (pState
->state
.depthStencilState
.stencilWriteEnable
== FALSE
) &&
938 (pState
->state
.backendState
.numAttributes
== 0))
940 pState
->pfnProcessPrims
= nullptr;
941 #if USE_SIMD16_FRONTEND
942 pState
->pfnProcessPrims_simd16
= nullptr;
946 if (pState
->state
.soState
.rasterizerDisable
== true)
948 pState
->pfnProcessPrims
= nullptr;
949 #if USE_SIMD16_FRONTEND
950 pState
->pfnProcessPrims_simd16
= nullptr;
955 // set up the frontend attribute count
956 pState
->state
.feNumAttributes
= 0;
957 const SWR_BACKEND_STATE
& backendState
= pState
->state
.backendState
;
958 if (backendState
.swizzleEnable
)
960 // attribute swizzling is enabled, iterate over the map and record the max attribute used
961 for (uint32_t i
= 0; i
< backendState
.numAttributes
; ++i
)
963 pState
->state
.feNumAttributes
=
964 std::max(pState
->state
.feNumAttributes
,
965 (uint32_t)backendState
.swizzleMap
[i
].sourceAttrib
+ 1);
970 pState
->state
.feNumAttributes
= pState
->state
.backendState
.numAttributes
;
973 if (pState
->state
.soState
.soEnable
)
975 uint64_t streamMasks
= 0;
976 for (uint32_t i
= 0; i
< 4; ++i
)
978 streamMasks
|= pState
->state
.soState
.streamMasks
[i
];
982 if (_BitScanReverse64(&maxAttrib
, streamMasks
))
984 pState
->state
.feNumAttributes
=
985 std::max(pState
->state
.feNumAttributes
, (uint32_t)(maxAttrib
+ 1));
989 // complicated logic to test for cases where we don't need backing hottile memory for a draw
990 // have to check for the special case where depth/stencil test is enabled but depthwrite is
992 pState
->state
.depthHottileEnable
=
993 ((!(pState
->state
.depthStencilState
.depthTestEnable
&&
994 !pState
->state
.depthStencilState
.depthWriteEnable
&&
995 !pState
->state
.depthBoundsState
.depthBoundsTestEnable
&&
996 pState
->state
.depthStencilState
.depthTestFunc
== ZFUNC_ALWAYS
)) &&
997 (pState
->state
.depthStencilState
.depthTestEnable
||
998 pState
->state
.depthStencilState
.depthWriteEnable
||
999 pState
->state
.depthBoundsState
.depthBoundsTestEnable
))
1003 pState
->state
.stencilHottileEnable
=
1004 (((!(pState
->state
.depthStencilState
.stencilTestEnable
&&
1005 !pState
->state
.depthStencilState
.stencilWriteEnable
&&
1006 pState
->state
.depthStencilState
.stencilTestFunc
== ZFUNC_ALWAYS
)) ||
1007 // for stencil we have to check the double sided state as well
1008 (!(pState
->state
.depthStencilState
.doubleSidedStencilTestEnable
&&
1009 !pState
->state
.depthStencilState
.stencilWriteEnable
&&
1010 pState
->state
.depthStencilState
.backfaceStencilTestFunc
== ZFUNC_ALWAYS
))) &&
1011 (pState
->state
.depthStencilState
.stencilTestEnable
||
1012 pState
->state
.depthStencilState
.stencilWriteEnable
))
1016 uint32_t hotTileEnable
= pState
->state
.psState
.renderTargetMask
;
1018 // Disable hottile for surfaces with no writes
1019 if (psState
.pfnPixelShader
!= nullptr)
1022 uint32_t rtMask
= pState
->state
.psState
.renderTargetMask
;
1023 while (_BitScanForward(&rt
, rtMask
))
1025 rtMask
&= ~(1 << rt
);
1027 if (pState
->state
.blendState
.renderTarget
[rt
].writeDisableAlpha
&&
1028 pState
->state
.blendState
.renderTarget
[rt
].writeDisableRed
&&
1029 pState
->state
.blendState
.renderTarget
[rt
].writeDisableGreen
&&
1030 pState
->state
.blendState
.renderTarget
[rt
].writeDisableBlue
)
1032 hotTileEnable
&= ~(1 << rt
);
1037 pState
->state
.colorHottileEnable
= hotTileEnable
;
1039 // Setup depth quantization function
1040 if (pState
->state
.depthHottileEnable
)
1042 switch (pState
->state
.rastState
.depthFormat
)
1044 case R32_FLOAT_X8X24_TYPELESS
:
1045 pState
->state
.pfnQuantizeDepth
= QuantizeDepth
<R32_FLOAT_X8X24_TYPELESS
>;
1048 pState
->state
.pfnQuantizeDepth
= QuantizeDepth
<R32_FLOAT
>;
1050 case R24_UNORM_X8_TYPELESS
:
1051 pState
->state
.pfnQuantizeDepth
= QuantizeDepth
<R24_UNORM_X8_TYPELESS
>;
1054 pState
->state
.pfnQuantizeDepth
= QuantizeDepth
<R16_UNORM
>;
1057 SWR_INVALID("Unsupported depth format for depth quantiztion.");
1058 pState
->state
.pfnQuantizeDepth
= QuantizeDepth
<R32_FLOAT
>;
1063 // set up pass-through quantize if depth isn't enabled
1064 pState
->state
.pfnQuantizeDepth
= QuantizeDepth
<R32_FLOAT
>;
1067 // Generate guardbands
1068 updateGuardbands(&pState
->state
);
1071 //////////////////////////////////////////////////////////////////////////
1073 /// @param pDC - Draw context to initialize for this draw.
1074 void InitDraw(DRAW_CONTEXT
* pDC
, bool isSplitDraw
)
1076 // We don't need to re-setup the scissors/pipeline state again for split draw.
1077 if (isSplitDraw
== false)
1079 SetupMacroTileScissors(pDC
);
1085 //////////////////////////////////////////////////////////////////////////
1086 /// @brief We can split the draw for certain topologies for better performance.
1087 /// @param totalVerts - Total vertices for draw
1088 /// @param topology - Topology used for draw
1089 uint32_t MaxVertsPerDraw(DRAW_CONTEXT
* pDC
, uint32_t totalVerts
, PRIMITIVE_TOPOLOGY topology
)
1091 API_STATE
& state
= pDC
->pState
->state
;
1093 // We can not split draws that have streamout enabled because there is no practical way
1094 // to support multiple threads generating SO data for a single set of buffers.
1095 if (state
.soState
.soEnable
)
1100 // The Primitive Assembly code can only handle 1 RECT at a time. Specified with only 3 verts.
1101 if (topology
== TOP_RECT_LIST
)
1106 // Is split drawing disabled?
1107 if (KNOB_DISABLE_SPLIT_DRAW
)
1112 uint32_t vertsPerDraw
= totalVerts
;
1116 case TOP_POINT_LIST
:
1117 case TOP_TRIANGLE_LIST
:
1118 vertsPerDraw
= KNOB_MAX_PRIMS_PER_DRAW
;
1121 case TOP_PATCHLIST_1
:
1122 case TOP_PATCHLIST_2
:
1123 case TOP_PATCHLIST_3
:
1124 case TOP_PATCHLIST_4
:
1125 case TOP_PATCHLIST_5
:
1126 case TOP_PATCHLIST_6
:
1127 case TOP_PATCHLIST_7
:
1128 case TOP_PATCHLIST_8
:
1129 case TOP_PATCHLIST_9
:
1130 case TOP_PATCHLIST_10
:
1131 case TOP_PATCHLIST_11
:
1132 case TOP_PATCHLIST_12
:
1133 case TOP_PATCHLIST_13
:
1134 case TOP_PATCHLIST_14
:
1135 case TOP_PATCHLIST_15
:
1136 case TOP_PATCHLIST_16
:
1137 case TOP_PATCHLIST_17
:
1138 case TOP_PATCHLIST_18
:
1139 case TOP_PATCHLIST_19
:
1140 case TOP_PATCHLIST_20
:
1141 case TOP_PATCHLIST_21
:
1142 case TOP_PATCHLIST_22
:
1143 case TOP_PATCHLIST_23
:
1144 case TOP_PATCHLIST_24
:
1145 case TOP_PATCHLIST_25
:
1146 case TOP_PATCHLIST_26
:
1147 case TOP_PATCHLIST_27
:
1148 case TOP_PATCHLIST_28
:
1149 case TOP_PATCHLIST_29
:
1150 case TOP_PATCHLIST_30
:
1151 case TOP_PATCHLIST_31
:
1152 case TOP_PATCHLIST_32
:
1153 if (pDC
->pState
->state
.tsState
.tsEnable
)
1155 uint32_t vertsPerPrim
= topology
- TOP_PATCHLIST_BASE
;
1156 vertsPerDraw
= vertsPerPrim
* KNOB_MAX_TESS_PRIMS_PER_DRAW
;
1160 // We are not splitting up draws for other topologies.
1164 return vertsPerDraw
;
1167 //////////////////////////////////////////////////////////////////////////
1168 /// @brief DrawInstanced
1169 /// @param hContext - Handle passed back from SwrCreateContext
1170 /// @param topology - Specifies topology for draw.
1171 /// @param numVerts - How many vertices to read sequentially from vertex data (per instance).
1172 /// @param startVertex - Specifies start vertex for draw. (vertex data)
1173 /// @param numInstances - How many instances to render.
1174 /// @param startInstance - Which instance to start sequentially fetching from in each buffer
1175 /// (instanced data)
1176 void DrawInstanced(HANDLE hContext
,
1177 PRIMITIVE_TOPOLOGY topology
,
1178 uint32_t numVertices
,
1179 uint32_t startVertex
,
1180 uint32_t numInstances
= 1,
1181 uint32_t startInstance
= 0)
1188 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1189 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1191 RDTSC_BEGIN(pContext
->pBucketMgr
, APIDraw
, pDC
->drawId
);
1193 uint32_t maxVertsPerDraw
= MaxVertsPerDraw(pDC
, numVertices
, topology
);
1194 uint32_t primsPerDraw
= GetNumPrims(topology
, maxVertsPerDraw
);
1195 uint32_t remainingVerts
= numVertices
;
1197 API_STATE
* pState
= &pDC
->pState
->state
;
1198 pState
->topology
= topology
;
1199 pState
->forceFront
= false;
1201 // disable culling for points/lines
1202 uint32_t oldCullMode
= pState
->rastState
.cullMode
;
1203 if (topology
== TOP_POINT_LIST
)
1205 pState
->rastState
.cullMode
= SWR_CULLMODE_NONE
;
1206 pState
->forceFront
= true;
1208 else if (topology
== TOP_RECT_LIST
)
1210 pState
->rastState
.cullMode
= SWR_CULLMODE_NONE
;
1214 while (remainingVerts
)
1216 uint32_t numVertsForDraw
=
1217 (remainingVerts
< maxVertsPerDraw
) ? remainingVerts
: maxVertsPerDraw
;
1219 bool isSplitDraw
= (draw
> 0) ? !KNOB_DISABLE_SPLIT_DRAW
: false;
1220 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
, isSplitDraw
);
1221 InitDraw(pDC
, isSplitDraw
);
1223 pDC
->FeWork
.type
= DRAW
;
1224 pDC
->FeWork
.pfnWork
= GetProcessDrawFunc(false, // IsIndexed
1225 false, // bEnableCutIndex
1226 pState
->tsState
.tsEnable
,
1227 pState
->gsState
.gsEnable
,
1228 pState
->soState
.soEnable
,
1229 pDC
->pState
->pfnProcessPrims
!= nullptr);
1230 pDC
->FeWork
.desc
.draw
.numVerts
= numVertsForDraw
;
1231 pDC
->FeWork
.desc
.draw
.startVertex
= startVertex
;
1232 pDC
->FeWork
.desc
.draw
.numInstances
= numInstances
;
1233 pDC
->FeWork
.desc
.draw
.startInstance
= startInstance
;
1234 pDC
->FeWork
.desc
.draw
.startPrimID
= draw
* primsPerDraw
;
1235 pDC
->FeWork
.desc
.draw
.startVertexID
= draw
* maxVertsPerDraw
;
1237 pDC
->cleanupState
= (remainingVerts
== numVertsForDraw
);
1240 QueueDraw(pContext
);
1242 AR_API_EVENT(DrawInstancedEvent(pDC
->drawId
,
1248 pState
->tsState
.tsEnable
,
1249 pState
->gsState
.gsEnable
,
1250 pState
->soState
.soEnable
,
1251 pState
->gsState
.outputTopology
,
1254 remainingVerts
-= numVertsForDraw
;
1258 // restore culling state
1259 pDC
= GetDrawContext(pContext
);
1260 pDC
->pState
->state
.rastState
.cullMode
= oldCullMode
;
1262 RDTSC_END(pContext
->pBucketMgr
, APIDraw
, numVertices
* numInstances
);
1265 //////////////////////////////////////////////////////////////////////////
1267 /// @param hContext - Handle passed back from SwrCreateContext
1268 /// @param topology - Specifies topology for draw.
1269 /// @param startVertex - Specifies start vertex in vertex buffer for draw.
1270 /// @param primCount - Number of vertices.
1271 void SwrDraw(HANDLE hContext
,
1272 PRIMITIVE_TOPOLOGY topology
,
1273 uint32_t startVertex
,
1274 uint32_t numVertices
)
1276 DrawInstanced(hContext
, topology
, numVertices
, startVertex
);
1279 //////////////////////////////////////////////////////////////////////////
1280 /// @brief SwrDrawInstanced
1281 /// @param hContext - Handle passed back from SwrCreateContext
1282 /// @param topology - Specifies topology for draw.
1283 /// @param numVertsPerInstance - How many vertices to read sequentially from vertex data.
1284 /// @param numInstances - How many instances to render.
1285 /// @param startVertex - Specifies start vertex for draw. (vertex data)
1286 /// @param startInstance - Which instance to start sequentially fetching from in each buffer
1287 /// (instanced data)
1288 void SwrDrawInstanced(HANDLE hContext
,
1289 PRIMITIVE_TOPOLOGY topology
,
1290 uint32_t numVertsPerInstance
,
1291 uint32_t numInstances
,
1292 uint32_t startVertex
,
1293 uint32_t startInstance
)
1296 hContext
, topology
, numVertsPerInstance
, startVertex
, numInstances
, startInstance
);
1299 //////////////////////////////////////////////////////////////////////////
1300 /// @brief DrawIndexedInstanced
1301 /// @param hContext - Handle passed back from SwrCreateContext
1302 /// @param topology - Specifies topology for draw.
1303 /// @param numIndices - Number of indices to read sequentially from index buffer.
1304 /// @param indexOffset - Starting index into index buffer.
1305 /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
1306 /// @param numInstances - Number of instances to render.
1307 /// @param startInstance - Which instance to start sequentially fetching from in each buffer
1308 /// (instanced data)
1309 void DrawIndexedInstance(HANDLE hContext
,
1310 PRIMITIVE_TOPOLOGY topology
,
1311 uint32_t numIndices
,
1312 uint32_t indexOffset
,
1314 uint32_t numInstances
= 1,
1315 uint32_t startInstance
= 0)
1322 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1323 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1324 API_STATE
* pState
= &pDC
->pState
->state
;
1326 RDTSC_BEGIN(pContext
->pBucketMgr
, APIDrawIndexed
, pDC
->drawId
);
1328 uint32_t maxIndicesPerDraw
= MaxVertsPerDraw(pDC
, numIndices
, topology
);
1329 uint32_t primsPerDraw
= GetNumPrims(topology
, maxIndicesPerDraw
);
1330 uint32_t remainingIndices
= numIndices
;
1332 uint32_t indexSize
= 0;
1333 switch (pState
->indexBuffer
.format
)
1336 indexSize
= sizeof(uint32_t);
1339 indexSize
= sizeof(uint16_t);
1342 indexSize
= sizeof(uint8_t);
1345 SWR_INVALID("Invalid index buffer format: %d", pState
->indexBuffer
.format
);
1349 gfxptr_t xpIB
= pState
->indexBuffer
.xpIndices
;
1350 xpIB
+= (uint64_t)indexOffset
* (uint64_t)indexSize
;
1352 pState
->topology
= topology
;
1353 pState
->forceFront
= false;
1355 // disable culling for points/lines
1356 uint32_t oldCullMode
= pState
->rastState
.cullMode
;
1357 if (topology
== TOP_POINT_LIST
)
1359 pState
->rastState
.cullMode
= SWR_CULLMODE_NONE
;
1360 pState
->forceFront
= true;
1362 else if (topology
== TOP_RECT_LIST
)
1364 pState
->rastState
.cullMode
= SWR_CULLMODE_NONE
;
1367 while (remainingIndices
)
1369 uint32_t numIndicesForDraw
=
1370 (remainingIndices
< maxIndicesPerDraw
) ? remainingIndices
: maxIndicesPerDraw
;
1372 // When breaking up draw, we need to obtain new draw context for each iteration.
1373 bool isSplitDraw
= (draw
> 0) ? !KNOB_DISABLE_SPLIT_DRAW
: false;
1375 pDC
= GetDrawContext(pContext
, isSplitDraw
);
1376 InitDraw(pDC
, isSplitDraw
);
1378 pDC
->FeWork
.type
= DRAW
;
1379 pDC
->FeWork
.pfnWork
= GetProcessDrawFunc(true, // IsIndexed
1380 pState
->frontendState
.bEnableCutIndex
,
1381 pState
->tsState
.tsEnable
,
1382 pState
->gsState
.gsEnable
,
1383 pState
->soState
.soEnable
,
1384 pDC
->pState
->pfnProcessPrims
!= nullptr);
1385 pDC
->FeWork
.desc
.draw
.pDC
= pDC
;
1386 pDC
->FeWork
.desc
.draw
.numIndices
= numIndicesForDraw
;
1387 pDC
->FeWork
.desc
.draw
.xpIB
= xpIB
;
1388 pDC
->FeWork
.desc
.draw
.type
= pDC
->pState
->state
.indexBuffer
.format
;
1390 pDC
->FeWork
.desc
.draw
.numInstances
= numInstances
;
1391 pDC
->FeWork
.desc
.draw
.startInstance
= startInstance
;
1392 pDC
->FeWork
.desc
.draw
.baseVertex
= baseVertex
;
1393 pDC
->FeWork
.desc
.draw
.startPrimID
= draw
* primsPerDraw
;
1395 pDC
->cleanupState
= (remainingIndices
== numIndicesForDraw
);
1398 QueueDraw(pContext
);
1400 AR_API_EVENT(DrawIndexedInstancedEvent(pDC
->drawId
,
1407 pState
->tsState
.tsEnable
,
1408 pState
->gsState
.gsEnable
,
1409 pState
->soState
.soEnable
,
1410 pState
->gsState
.outputTopology
,
1413 xpIB
+= maxIndicesPerDraw
* indexSize
;
1414 remainingIndices
-= numIndicesForDraw
;
1418 // Restore culling state
1419 pDC
= GetDrawContext(pContext
);
1420 pDC
->pState
->state
.rastState
.cullMode
= oldCullMode
;
1422 RDTSC_END(pContext
->pBucketMgr
, APIDrawIndexed
, numIndices
* numInstances
);
1425 //////////////////////////////////////////////////////////////////////////
1426 /// @brief DrawIndexed
1427 /// @param hContext - Handle passed back from SwrCreateContext
1428 /// @param topology - Specifies topology for draw.
1429 /// @param numIndices - Number of indices to read sequentially from index buffer.
1430 /// @param indexOffset - Starting index into index buffer.
1431 /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
1432 void SwrDrawIndexed(HANDLE hContext
,
1433 PRIMITIVE_TOPOLOGY topology
,
1434 uint32_t numIndices
,
1435 uint32_t indexOffset
,
1438 DrawIndexedInstance(hContext
, topology
, numIndices
, indexOffset
, baseVertex
);
1441 //////////////////////////////////////////////////////////////////////////
1442 /// @brief SwrDrawIndexedInstanced
1443 /// @param hContext - Handle passed back from SwrCreateContext
1444 /// @param topology - Specifies topology for draw.
1445 /// @param numIndices - Number of indices to read sequentially from index buffer.
1446 /// @param numInstances - Number of instances to render.
1447 /// @param indexOffset - Starting index into index buffer.
1448 /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
1449 /// @param startInstance - Which instance to start sequentially fetching from in each buffer
1450 /// (instanced data)
1451 void SwrDrawIndexedInstanced(HANDLE hContext
,
1452 PRIMITIVE_TOPOLOGY topology
,
1453 uint32_t numIndices
,
1454 uint32_t numInstances
,
1455 uint32_t indexOffset
,
1457 uint32_t startInstance
)
1459 DrawIndexedInstance(
1460 hContext
, topology
, numIndices
, indexOffset
, baseVertex
, numInstances
, startInstance
);
1463 //////////////////////////////////////////////////////////////////////////
1464 /// @brief SwrInvalidateTiles
1465 /// @param hContext - Handle passed back from SwrCreateContext
1466 /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to
1468 /// @param invalidateRect - The pixel-coordinate rectangle to invalidate. This will be expanded to
1469 /// be hottile size-aligned.
1470 void SWR_API
SwrInvalidateTiles(HANDLE hContext
,
1471 uint32_t attachmentMask
,
1472 const SWR_RECT
& invalidateRect
)
1479 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1480 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1482 pDC
->FeWork
.type
= DISCARDINVALIDATETILES
;
1483 pDC
->FeWork
.pfnWork
= ProcessDiscardInvalidateTiles
;
1484 pDC
->FeWork
.desc
.discardInvalidateTiles
.attachmentMask
= attachmentMask
;
1485 pDC
->FeWork
.desc
.discardInvalidateTiles
.rect
= invalidateRect
;
1486 pDC
->FeWork
.desc
.discardInvalidateTiles
.rect
&= g_MaxScissorRect
;
1487 pDC
->FeWork
.desc
.discardInvalidateTiles
.newTileState
= SWR_TILE_INVALID
;
1488 pDC
->FeWork
.desc
.discardInvalidateTiles
.createNewTiles
= false;
1489 pDC
->FeWork
.desc
.discardInvalidateTiles
.fullTilesOnly
= false;
1492 QueueDraw(pContext
);
1494 AR_API_EVENT(SwrInvalidateTilesEvent(pDC
->drawId
));
1497 //////////////////////////////////////////////////////////////////////////
1498 /// @brief SwrDiscardRect
1499 /// @param hContext - Handle passed back from SwrCreateContext
1500 /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard.
1501 /// @param rect - The pixel-coordinate rectangle to discard. Only fully-covered hottiles will be
1503 void SWR_API
SwrDiscardRect(HANDLE hContext
, uint32_t attachmentMask
, const SWR_RECT
& rect
)
1510 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1511 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1513 // Queue a load to the hottile
1514 pDC
->FeWork
.type
= DISCARDINVALIDATETILES
;
1515 pDC
->FeWork
.pfnWork
= ProcessDiscardInvalidateTiles
;
1516 pDC
->FeWork
.desc
.discardInvalidateTiles
.attachmentMask
= attachmentMask
;
1517 pDC
->FeWork
.desc
.discardInvalidateTiles
.rect
= rect
;
1518 pDC
->FeWork
.desc
.discardInvalidateTiles
.rect
&= g_MaxScissorRect
;
1519 pDC
->FeWork
.desc
.discardInvalidateTiles
.newTileState
= SWR_TILE_RESOLVED
;
1520 pDC
->FeWork
.desc
.discardInvalidateTiles
.createNewTiles
= true;
1521 pDC
->FeWork
.desc
.discardInvalidateTiles
.fullTilesOnly
= true;
1524 QueueDraw(pContext
);
1526 AR_API_EVENT(SwrDiscardRectEvent(pDC
->drawId
));
1529 //////////////////////////////////////////////////////////////////////////
1530 /// @brief SwrDispatch
1531 /// @param hContext - Handle passed back from SwrCreateContext
1532 /// @param threadGroupCountX - Number of thread groups dispatched in X direction
1533 /// @param threadGroupCountY - Number of thread groups dispatched in Y direction
1534 /// @param threadGroupCountZ - Number of thread groups dispatched in Z direction
1535 void SwrDispatch(HANDLE hContext
,
1536 uint32_t threadGroupCountX
,
1537 uint32_t threadGroupCountY
,
1538 uint32_t threadGroupCountZ
1547 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1548 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1550 RDTSC_BEGIN(pContext
->pBucketMgr
, APIDispatch
, pDC
->drawId
);
1552 DispatchEvent(pDC
->drawId
, threadGroupCountX
, threadGroupCountY
, threadGroupCountZ
));
1553 pDC
->isCompute
= true; // This is a compute context.
1555 COMPUTE_DESC
* pTaskData
= (COMPUTE_DESC
*)pDC
->pArena
->AllocAligned(sizeof(COMPUTE_DESC
), 64);
1557 pTaskData
->threadGroupCountX
= threadGroupCountX
;
1558 pTaskData
->threadGroupCountY
= threadGroupCountY
;
1559 pTaskData
->threadGroupCountZ
= threadGroupCountZ
;
1561 pTaskData
->enableThreadDispatch
= false;
1563 uint32_t totalThreadGroups
= threadGroupCountX
* threadGroupCountY
* threadGroupCountZ
;
1564 uint32_t dcIndex
= pDC
->drawId
% pContext
->MAX_DRAWS_IN_FLIGHT
;
1565 pDC
->pDispatch
= &pContext
->pDispatchQueueArray
[dcIndex
];
1566 pDC
->pDispatch
->initialize(totalThreadGroups
, pTaskData
, &ProcessComputeBE
);
1568 QueueDispatch(pContext
);
1569 RDTSC_END(pContext
->pBucketMgr
,
1571 threadGroupCountX
* threadGroupCountY
* threadGroupCountZ
);
1574 // Deswizzles, converts and stores current contents of the hot tiles to surface
1575 // described by pState
1576 void SWR_API
SwrStoreTiles(HANDLE hContext
,
1577 uint32_t attachmentMask
,
1578 SWR_TILE_STATE postStoreTileState
,
1579 const SWR_RECT
& storeRect
)
1586 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1587 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1589 RDTSC_BEGIN(pContext
->pBucketMgr
, APIStoreTiles
, pDC
->drawId
);
1591 pDC
->FeWork
.type
= STORETILES
;
1592 pDC
->FeWork
.pfnWork
= ProcessStoreTiles
;
1593 pDC
->FeWork
.desc
.storeTiles
.attachmentMask
= attachmentMask
;
1594 pDC
->FeWork
.desc
.storeTiles
.postStoreTileState
= postStoreTileState
;
1595 pDC
->FeWork
.desc
.storeTiles
.rect
= storeRect
;
1596 pDC
->FeWork
.desc
.storeTiles
.rect
&= g_MaxScissorRect
;
1599 QueueDraw(pContext
);
1601 AR_API_EVENT(SwrStoreTilesEvent(pDC
->drawId
));
1603 RDTSC_END(pContext
->pBucketMgr
, APIStoreTiles
, 1);
1606 //////////////////////////////////////////////////////////////////////////
1607 /// @brief SwrClearRenderTarget - Clear attached render targets / depth / stencil
1608 /// @param hContext - Handle passed back from SwrCreateContext
1609 /// @param attachmentMask - combination of SWR_ATTACHMENT_*_BIT attachments to clear
1610 /// @param renderTargetArrayIndex - the RT array index to clear
1611 /// @param clearColor - color use for clearing render targets
1612 /// @param z - depth value use for clearing depth buffer
1613 /// @param stencil - stencil value used for clearing stencil buffer
1614 /// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers
1615 void SWR_API
SwrClearRenderTarget(HANDLE hContext
,
1616 uint32_t attachmentMask
,
1617 uint32_t renderTargetArrayIndex
,
1618 const float clearColor
[4],
1621 const SWR_RECT
& clearRect
)
1628 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1629 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1631 RDTSC_BEGIN(pContext
->pBucketMgr
, APIClearRenderTarget
, pDC
->drawId
);
1633 pDC
->FeWork
.type
= CLEAR
;
1634 pDC
->FeWork
.pfnWork
= ProcessClear
;
1635 pDC
->FeWork
.desc
.clear
.rect
= clearRect
;
1636 pDC
->FeWork
.desc
.clear
.rect
&= g_MaxScissorRect
;
1637 pDC
->FeWork
.desc
.clear
.attachmentMask
= attachmentMask
;
1638 pDC
->FeWork
.desc
.clear
.renderTargetArrayIndex
= renderTargetArrayIndex
;
1639 pDC
->FeWork
.desc
.clear
.clearDepth
= z
;
1640 pDC
->FeWork
.desc
.clear
.clearRTColor
[0] = clearColor
[0];
1641 pDC
->FeWork
.desc
.clear
.clearRTColor
[1] = clearColor
[1];
1642 pDC
->FeWork
.desc
.clear
.clearRTColor
[2] = clearColor
[2];
1643 pDC
->FeWork
.desc
.clear
.clearRTColor
[3] = clearColor
[3];
1644 pDC
->FeWork
.desc
.clear
.clearStencil
= stencil
;
1647 QueueDraw(pContext
);
1649 RDTSC_END(pContext
->pBucketMgr
, APIClearRenderTarget
, 1);
1652 //////////////////////////////////////////////////////////////////////////
1653 /// @brief Returns a pointer to the private context state for the current
1654 /// draw operation. This is used for external componets such as the
1656 /// SWR is responsible for the allocation of the private context state.
1657 /// @param hContext - Handle passed back from SwrCreateContext
1658 VOID
* SwrGetPrivateContextState(HANDLE hContext
)
1660 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1661 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1662 DRAW_STATE
* pState
= pDC
->pState
;
1664 if (pState
->pPrivateState
== nullptr)
1666 pState
->pPrivateState
= pState
->pArena
->AllocAligned(pContext
->privateStateSize
,
1667 KNOB_SIMD_WIDTH
* sizeof(float));
1670 return pState
->pPrivateState
;
1673 //////////////////////////////////////////////////////////////////////////
1674 /// @brief Clients can use this to allocate memory for draw/dispatch
1675 /// operations. The memory will automatically be freed once operation
1676 /// has completed. Client can use this to allocate binding tables,
1677 /// etc. needed for shader execution.
1678 /// @param hContext - Handle passed back from SwrCreateContext
1679 /// @param size - Size of allocation
1680 /// @param align - Alignment needed for allocation.
1681 VOID
* SwrAllocDrawContextMemory(HANDLE hContext
, uint32_t size
, uint32_t align
)
1683 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1684 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1686 return pDC
->pState
->pArena
->AllocAligned(size
, align
);
1689 //////////////////////////////////////////////////////////////////////////
1690 /// @brief Enables stats counting
1691 /// @param hContext - Handle passed back from SwrCreateContext
1692 /// @param enable - If true then counts are incremented.
1693 void SwrEnableStatsFE(HANDLE hContext
, bool enable
)
1695 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1696 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1698 pDC
->pState
->state
.enableStatsFE
= enable
;
1701 //////////////////////////////////////////////////////////////////////////
1702 /// @brief Enables stats counting
1703 /// @param hContext - Handle passed back from SwrCreateContext
1704 /// @param enable - If true then counts are incremented.
1705 void SwrEnableStatsBE(HANDLE hContext
, bool enable
)
1707 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1708 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1710 pDC
->pState
->state
.enableStatsBE
= enable
;
1713 //////////////////////////////////////////////////////////////////////////
1714 /// @brief Mark end of frame - used for performance profiling
1715 /// @param hContext - Handle passed back from SwrCreateContext
1716 void SWR_API
SwrEndFrame(HANDLE hContext
)
1718 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1719 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1720 (void)pDC
; // var used
1722 RDTSC_ENDFRAME(pContext
->pBucketMgr
);
1723 AR_API_EVENT(FrameEndEvent(pContext
->frameCount
, pDC
->drawId
));
1725 pContext
->frameCount
++;
1728 void InitSimLoadTilesTable();
1729 void InitSimStoreTilesTable();
1730 void InitSimClearTilesTable();
1732 void InitClearTilesTable();
1733 void InitBackendFuncTables();
1735 //////////////////////////////////////////////////////////////////////////
1736 /// @brief Initialize swr backend and memory internal tables
1739 InitClearTilesTable();
1740 InitBackendFuncTables();
1741 InitRasterizerFunctions();
1744 void SwrGetInterface(SWR_INTERFACE
& out_funcs
)
1746 out_funcs
.pfnSwrCreateContext
= SwrCreateContext
;
1747 out_funcs
.pfnSwrDestroyContext
= SwrDestroyContext
;
1748 out_funcs
.pfnSwrBindApiThread
= SwrBindApiThread
;
1749 out_funcs
.pfnSwrSaveState
= SwrSaveState
;
1750 out_funcs
.pfnSwrRestoreState
= SwrRestoreState
;
1751 out_funcs
.pfnSwrSync
= SwrSync
;
1752 out_funcs
.pfnSwrStallBE
= SwrStallBE
;
1753 out_funcs
.pfnSwrWaitForIdle
= SwrWaitForIdle
;
1754 out_funcs
.pfnSwrWaitForIdleFE
= SwrWaitForIdleFE
;
1755 out_funcs
.pfnSwrSetVertexBuffers
= SwrSetVertexBuffers
;
1756 out_funcs
.pfnSwrSetIndexBuffer
= SwrSetIndexBuffer
;
1757 out_funcs
.pfnSwrSetFetchFunc
= SwrSetFetchFunc
;
1758 out_funcs
.pfnSwrSetSoFunc
= SwrSetSoFunc
;
1759 out_funcs
.pfnSwrSetSoState
= SwrSetSoState
;
1760 out_funcs
.pfnSwrSetSoBuffers
= SwrSetSoBuffers
;
1761 out_funcs
.pfnSwrSetVertexFunc
= SwrSetVertexFunc
;
1762 out_funcs
.pfnSwrSetFrontendState
= SwrSetFrontendState
;
1763 out_funcs
.pfnSwrSetGsState
= SwrSetGsState
;
1764 out_funcs
.pfnSwrSetGsFunc
= SwrSetGsFunc
;
1765 out_funcs
.pfnSwrSetCsFunc
= SwrSetCsFunc
;
1766 out_funcs
.pfnSwrSetTsState
= SwrSetTsState
;
1767 out_funcs
.pfnSwrSetHsFunc
= SwrSetHsFunc
;
1768 out_funcs
.pfnSwrSetDsFunc
= SwrSetDsFunc
;
1769 out_funcs
.pfnSwrSetDepthStencilState
= SwrSetDepthStencilState
;
1770 out_funcs
.pfnSwrSetBackendState
= SwrSetBackendState
;
1771 out_funcs
.pfnSwrSetDepthBoundsState
= SwrSetDepthBoundsState
;
1772 out_funcs
.pfnSwrSetPixelShaderState
= SwrSetPixelShaderState
;
1773 out_funcs
.pfnSwrSetBlendState
= SwrSetBlendState
;
1774 out_funcs
.pfnSwrSetBlendFunc
= SwrSetBlendFunc
;
1775 out_funcs
.pfnSwrDraw
= SwrDraw
;
1776 out_funcs
.pfnSwrDrawInstanced
= SwrDrawInstanced
;
1777 out_funcs
.pfnSwrDrawIndexed
= SwrDrawIndexed
;
1778 out_funcs
.pfnSwrDrawIndexedInstanced
= SwrDrawIndexedInstanced
;
1779 out_funcs
.pfnSwrInvalidateTiles
= SwrInvalidateTiles
;
1780 out_funcs
.pfnSwrDiscardRect
= SwrDiscardRect
;
1781 out_funcs
.pfnSwrDispatch
= SwrDispatch
;
1782 out_funcs
.pfnSwrStoreTiles
= SwrStoreTiles
;
1783 out_funcs
.pfnSwrClearRenderTarget
= SwrClearRenderTarget
;
1784 out_funcs
.pfnSwrSetRastState
= SwrSetRastState
;
1785 out_funcs
.pfnSwrSetViewports
= SwrSetViewports
;
1786 out_funcs
.pfnSwrSetScissorRects
= SwrSetScissorRects
;
1787 out_funcs
.pfnSwrGetPrivateContextState
= SwrGetPrivateContextState
;
1788 out_funcs
.pfnSwrAllocDrawContextMemory
= SwrAllocDrawContextMemory
;
1789 out_funcs
.pfnSwrEnableStatsFE
= SwrEnableStatsFE
;
1790 out_funcs
.pfnSwrEnableStatsBE
= SwrEnableStatsBE
;
1791 out_funcs
.pfnSwrEndFrame
= SwrEndFrame
;
1792 out_funcs
.pfnSwrInit
= SwrInit
;