1 /****************************************************************************
2 * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief API implementation
27 ******************************************************************************/
35 #include "core/backend.h"
36 #include "core/context.h"
37 #include "core/depthstencil.h"
38 #include "core/frontend.h"
39 #include "core/rasterizer.h"
40 #include "core/rdtsc_core.h"
41 #include "core/threads.h"
42 #include "core/tilemgr.h"
43 #include "core/clip.h"
44 #include "core/utils.h"
45 #include "core/tileset.h"
47 #include "common/os.h"
49 static const SWR_RECT g_MaxScissorRect
= {0, 0, KNOB_MAX_SCISSOR_X
, KNOB_MAX_SCISSOR_Y
};
51 void SetupDefaultState(SWR_CONTEXT
* pContext
);
53 static INLINE SWR_CONTEXT
* GetContext(HANDLE hContext
)
55 return (SWR_CONTEXT
*)hContext
;
58 void WakeAllThreads(SWR_CONTEXT
* pContext
)
60 pContext
->FifosNotEmpty
.notify_all();
63 //////////////////////////////////////////////////////////////////////////
64 /// @brief Create SWR Context.
65 /// @param pCreateInfo - pointer to creation info.
66 HANDLE
SwrCreateContext(SWR_CREATECONTEXT_INFO
* pCreateInfo
)
68 void* pContextMem
= AlignedMalloc(sizeof(SWR_CONTEXT
), KNOB_SIMD_WIDTH
* 4);
69 memset(pContextMem
, 0, sizeof(SWR_CONTEXT
));
70 SWR_CONTEXT
* pContext
= new (pContextMem
) SWR_CONTEXT();
72 pContext
->privateStateSize
= pCreateInfo
->privateStateSize
;
74 // initialize callback functions
75 pContext
->pfnLoadTile
= pCreateInfo
->pfnLoadTile
;
76 pContext
->pfnStoreTile
= pCreateInfo
->pfnStoreTile
;
77 pContext
->pfnTranslateGfxptrForRead
= pCreateInfo
->pfnTranslateGfxptrForRead
;
78 pContext
->pfnTranslateGfxptrForWrite
= pCreateInfo
->pfnTranslateGfxptrForWrite
;
79 pContext
->pfnMakeGfxPtr
= pCreateInfo
->pfnMakeGfxPtr
;
80 pContext
->pfnCreateMemoryContext
= pCreateInfo
->pfnCreateMemoryContext
;
81 pContext
->pfnDestroyMemoryContext
= pCreateInfo
->pfnDestroyMemoryContext
;
82 pContext
->pfnUpdateSoWriteOffset
= pCreateInfo
->pfnUpdateSoWriteOffset
;
83 pContext
->pfnUpdateStats
= pCreateInfo
->pfnUpdateStats
;
84 pContext
->pfnUpdateStatsFE
= pCreateInfo
->pfnUpdateStatsFE
;
85 pContext
->pfnUpdateStreamOut
= pCreateInfo
->pfnUpdateStreamOut
;
88 pContext
->hExternalMemory
= pCreateInfo
->hExternalMemory
;
90 pContext
->MAX_DRAWS_IN_FLIGHT
= KNOB_MAX_DRAWS_IN_FLIGHT
;
91 if (pCreateInfo
->MAX_DRAWS_IN_FLIGHT
!= 0)
93 pContext
->MAX_DRAWS_IN_FLIGHT
= pCreateInfo
->MAX_DRAWS_IN_FLIGHT
;
96 pContext
->dcRing
.Init(pContext
->MAX_DRAWS_IN_FLIGHT
);
97 pContext
->dsRing
.Init(pContext
->MAX_DRAWS_IN_FLIGHT
);
99 pContext
->pMacroTileManagerArray
=
100 (MacroTileMgr
*)AlignedMalloc(sizeof(MacroTileMgr
) * pContext
->MAX_DRAWS_IN_FLIGHT
, 64);
101 pContext
->pDispatchQueueArray
=
102 (DispatchQueue
*)AlignedMalloc(sizeof(DispatchQueue
) * pContext
->MAX_DRAWS_IN_FLIGHT
, 64);
104 for (uint32_t dc
= 0; dc
< pContext
->MAX_DRAWS_IN_FLIGHT
; ++dc
)
106 pContext
->dcRing
[dc
].pArena
= new CachingArena(pContext
->cachingArenaAllocator
);
107 new (&pContext
->pMacroTileManagerArray
[dc
]) MacroTileMgr(*pContext
->dcRing
[dc
].pArena
);
108 new (&pContext
->pDispatchQueueArray
[dc
]) DispatchQueue();
110 pContext
->dsRing
[dc
].pArena
= new CachingArena(pContext
->cachingArenaAllocator
);
113 if (pCreateInfo
->pThreadInfo
)
115 pContext
->threadInfo
= *pCreateInfo
->pThreadInfo
;
119 pContext
->threadInfo
.MAX_WORKER_THREADS
= KNOB_MAX_WORKER_THREADS
;
120 pContext
->threadInfo
.BASE_NUMA_NODE
= KNOB_BASE_NUMA_NODE
;
121 pContext
->threadInfo
.BASE_CORE
= KNOB_BASE_CORE
;
122 pContext
->threadInfo
.BASE_THREAD
= KNOB_BASE_THREAD
;
123 pContext
->threadInfo
.MAX_NUMA_NODES
= KNOB_MAX_NUMA_NODES
;
124 pContext
->threadInfo
.MAX_CORES_PER_NUMA_NODE
= KNOB_MAX_CORES_PER_NUMA_NODE
;
125 pContext
->threadInfo
.MAX_THREADS_PER_CORE
= KNOB_MAX_THREADS_PER_CORE
;
126 pContext
->threadInfo
.SINGLE_THREADED
= KNOB_SINGLE_THREADED
;
129 if (pCreateInfo
->pApiThreadInfo
)
131 pContext
->apiThreadInfo
= *pCreateInfo
->pApiThreadInfo
;
135 pContext
->apiThreadInfo
.bindAPIThread0
= true;
136 pContext
->apiThreadInfo
.numAPIReservedThreads
= 1;
137 pContext
->apiThreadInfo
.numAPIThreadsPerCore
= 1;
140 if (pCreateInfo
->pWorkerPrivateState
)
142 pContext
->workerPrivateState
= *pCreateInfo
->pWorkerPrivateState
;
145 memset(&pContext
->WaitLock
, 0, sizeof(pContext
->WaitLock
));
146 memset(&pContext
->FifosNotEmpty
, 0, sizeof(pContext
->FifosNotEmpty
));
147 new (&pContext
->WaitLock
) std::mutex();
148 new (&pContext
->FifosNotEmpty
) std::condition_variable();
150 CreateThreadPool(pContext
, &pContext
->threadPool
);
152 if (pContext
->apiThreadInfo
.bindAPIThread0
)
154 BindApiThread(pContext
, 0);
157 if (pContext
->threadInfo
.SINGLE_THREADED
)
159 pContext
->pSingleThreadLockedTiles
= new TileSet();
162 pContext
->ppScratch
= new uint8_t*[pContext
->NumWorkerThreads
];
164 (SWR_STATS
*)AlignedMalloc(sizeof(SWR_STATS
) * pContext
->NumWorkerThreads
, 64);
166 #if defined(KNOB_ENABLE_AR)
167 // Setup ArchRast thread contexts which includes +1 for API thread.
168 pContext
->pArContext
= new HANDLE
[pContext
->NumWorkerThreads
+ 1];
169 pContext
->pArContext
[pContext
->NumWorkerThreads
] =
170 ArchRast::CreateThreadContext(ArchRast::AR_THREAD::API
);
173 #if defined(KNOB_ENABLE_RDTSC)
174 pContext
->pBucketMgr
= new BucketManager(pCreateInfo
->contextName
);
175 RDTSC_RESET(pContext
->pBucketMgr
);
176 RDTSC_INIT(pContext
->pBucketMgr
, 0);
179 // Allocate scratch space for workers.
180 ///@note We could lazily allocate this but its rather small amount of memory.
181 for (uint32_t i
= 0; i
< pContext
->NumWorkerThreads
; ++i
)
185 pContext
->threadPool
.pThreadData
? pContext
->threadPool
.pThreadData
[i
].numaId
: 0;
186 pContext
->ppScratch
[i
] = (uint8_t*)VirtualAllocExNuma(GetCurrentProcess(),
188 KNOB_WORKER_SCRATCH_SPACE_SIZE
,
189 MEM_RESERVE
| MEM_COMMIT
,
193 pContext
->ppScratch
[i
] =
194 (uint8_t*)AlignedMalloc(KNOB_WORKER_SCRATCH_SPACE_SIZE
, KNOB_SIMD_WIDTH
* 4);
197 #if defined(KNOB_ENABLE_AR)
198 // Initialize worker thread context for ArchRast.
199 pContext
->pArContext
[i
] = ArchRast::CreateThreadContext(ArchRast::AR_THREAD::WORKER
);
201 SWR_WORKER_DATA
* pWorkerData
= (SWR_WORKER_DATA
*)pContext
->threadPool
.pThreadData
[i
].pWorkerPrivateData
;
202 pWorkerData
->hArContext
= pContext
->pArContext
[i
];
208 #if defined(KNOB_ENABLE_AR)
209 // cache the API thread event manager, for use with sim layer
210 pCreateInfo
->hArEventManager
= pContext
->pArContext
[pContext
->NumWorkerThreads
];
213 // State setup AFTER context is fully initialized
214 SetupDefaultState(pContext
);
216 // initialize hot tile manager
217 pContext
->pHotTileMgr
= new HotTileMgr();
219 // pass pointer to bucket manager back to caller
220 #ifdef KNOB_ENABLE_RDTSC
221 pCreateInfo
->pBucketMgr
= pContext
->pBucketMgr
;
224 pCreateInfo
->contextSaveSize
= sizeof(API_STATE
);
226 StartThreadPool(pContext
, &pContext
->threadPool
);
228 return (HANDLE
)pContext
;
231 void CopyState(DRAW_STATE
& dst
, const DRAW_STATE
& src
)
233 memcpy(&dst
.state
, &src
.state
, sizeof(API_STATE
));
236 template <bool IsDraw
>
237 void QueueWork(SWR_CONTEXT
* pContext
)
239 DRAW_CONTEXT
* pDC
= pContext
->pCurDrawContext
;
240 uint32_t dcIndex
= pDC
->drawId
% pContext
->MAX_DRAWS_IN_FLIGHT
;
244 pDC
->pTileMgr
= &pContext
->pMacroTileManagerArray
[dcIndex
];
245 pDC
->pTileMgr
->initialize();
248 // Each worker thread looks at a DC for both FE and BE work at different times and so we
249 // multiply threadDone by 2. When the threadDone counter has reached 0 then all workers
250 // have moved past this DC. (i.e. Each worker has checked this DC for both FE and BE work and
251 // then moved on if all work is done.)
252 pContext
->pCurDrawContext
->threadsDone
= pContext
->NumFEThreads
+ pContext
->NumBEThreads
;
256 InterlockedIncrement(&pContext
->drawsOutstandingFE
);
261 std::unique_lock
<std::mutex
> lock(pContext
->WaitLock
);
262 pContext
->dcRing
.Enqueue();
265 if (pContext
->threadInfo
.SINGLE_THREADED
)
267 uint32_t mxcsr
= SetOptimalVectorCSR();
271 uint32_t curDraw
[2] = {pContext
->pCurDrawContext
->drawId
,
272 pContext
->pCurDrawContext
->drawId
};
273 WorkOnFifoFE(pContext
, 0, curDraw
[0]);
274 WorkOnFifoBE(pContext
, 0, curDraw
[1], *pContext
->pSingleThreadLockedTiles
, 0, 0);
278 uint32_t curDispatch
= pContext
->pCurDrawContext
->drawId
;
279 WorkOnCompute(pContext
, 0, curDispatch
);
282 // Dequeue the work here, if not already done, since we're single threaded (i.e. no
284 while (CompleteDrawContext(pContext
, pContext
->pCurDrawContext
) > 0)
289 RestoreVectorCSR(mxcsr
);
293 RDTSC_BEGIN(pContext
->pBucketMgr
, APIDrawWakeAllThreads
, pDC
->drawId
);
294 WakeAllThreads(pContext
);
295 RDTSC_END(pContext
->pBucketMgr
, APIDrawWakeAllThreads
, 1);
298 // Set current draw context to NULL so that next state call forces a new draw context to be
299 // created and populated.
300 pContext
->pPrevDrawContext
= pContext
->pCurDrawContext
;
301 pContext
->pCurDrawContext
= nullptr;
304 INLINE
void QueueDraw(SWR_CONTEXT
* pContext
)
306 QueueWork
<true>(pContext
);
309 INLINE
void QueueDispatch(SWR_CONTEXT
* pContext
)
311 QueueWork
<false>(pContext
);
314 DRAW_CONTEXT
* GetDrawContext(SWR_CONTEXT
* pContext
, bool isSplitDraw
= false)
316 RDTSC_BEGIN(pContext
->pBucketMgr
, APIGetDrawContext
, 0);
317 // If current draw context is null then need to obtain a new draw context to use from ring.
318 if (pContext
->pCurDrawContext
== nullptr)
320 // Need to wait for a free entry.
321 while (pContext
->dcRing
.IsFull())
326 uint64_t curDraw
= pContext
->dcRing
.GetHead();
327 uint32_t dcIndex
= curDraw
% pContext
->MAX_DRAWS_IN_FLIGHT
;
329 if ((pContext
->frameCount
- pContext
->lastFrameChecked
) > 2 ||
330 (curDraw
- pContext
->lastDrawChecked
) > 0x10000)
332 // Take this opportunity to clean-up old arena allocations
333 pContext
->cachingArenaAllocator
.FreeOldBlocks();
335 pContext
->lastFrameChecked
= pContext
->frameCount
;
336 pContext
->lastDrawChecked
= curDraw
;
339 DRAW_CONTEXT
* pCurDrawContext
= &pContext
->dcRing
[dcIndex
];
340 pContext
->pCurDrawContext
= pCurDrawContext
;
342 // Assign next available entry in DS ring to this DC.
343 uint32_t dsIndex
= pContext
->curStateId
% pContext
->MAX_DRAWS_IN_FLIGHT
;
344 pCurDrawContext
->pState
= &pContext
->dsRing
[dsIndex
];
346 // Copy previous state to current state.
347 if (pContext
->pPrevDrawContext
)
349 DRAW_CONTEXT
* pPrevDrawContext
= pContext
->pPrevDrawContext
;
351 // If we're splitting our draw then we can just use the same state from the previous
352 // draw. In this case, we won't increment the DS ring index so the next non-split
353 // draw can receive the state.
354 if (isSplitDraw
== false)
356 CopyState(*pCurDrawContext
->pState
, *pPrevDrawContext
->pState
);
358 // Should have been cleaned up previously
359 SWR_ASSERT(pCurDrawContext
->pState
->pArena
->IsEmpty() == true);
361 pCurDrawContext
->pState
->pPrivateState
= nullptr;
363 pContext
->curStateId
++; // Progress state ring index forward.
367 // If its a split draw then just copy the state pointer over
368 // since its the same draw.
369 pCurDrawContext
->pState
= pPrevDrawContext
->pState
;
370 SWR_ASSERT(pPrevDrawContext
->cleanupState
== false);
375 SWR_ASSERT(pCurDrawContext
->pState
->pArena
->IsEmpty() == true);
376 pContext
->curStateId
++; // Progress state ring index forward.
379 SWR_ASSERT(pCurDrawContext
->pArena
->IsEmpty() == true);
382 pCurDrawContext
->dependent
= false;
383 pCurDrawContext
->dependentFE
= false;
385 pCurDrawContext
->pContext
= pContext
;
386 pCurDrawContext
->isCompute
= false; // Dispatch has to set this to true.
388 pCurDrawContext
->doneFE
= false;
389 pCurDrawContext
->FeLock
= 0;
390 pCurDrawContext
->threadsDone
= 0;
391 pCurDrawContext
->retireCallback
.pfnCallbackFunc
= nullptr;
393 pCurDrawContext
->dynState
.Reset(pContext
->NumWorkerThreads
);
395 // Assign unique drawId for this DC
396 pCurDrawContext
->drawId
= pContext
->dcRing
.GetHead();
398 pCurDrawContext
->cleanupState
= true;
402 SWR_ASSERT(isSplitDraw
== false, "Split draw should only be used when obtaining a new DC");
405 RDTSC_END(pContext
->pBucketMgr
, APIGetDrawContext
, 0);
406 return pContext
->pCurDrawContext
;
409 API_STATE
* GetDrawState(SWR_CONTEXT
* pContext
)
411 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
412 SWR_ASSERT(pDC
->pState
!= nullptr);
414 return &pDC
->pState
->state
;
417 void SwrDestroyContext(HANDLE hContext
)
419 SWR_CONTEXT
* pContext
= GetContext(hContext
);
420 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
422 pDC
->FeWork
.type
= SHUTDOWN
;
423 pDC
->FeWork
.pfnWork
= ProcessShutdown
;
428 DestroyThreadPool(pContext
, &pContext
->threadPool
);
431 for (uint32_t i
= 0; i
< pContext
->MAX_DRAWS_IN_FLIGHT
; ++i
)
433 AlignedFree(pContext
->dcRing
[i
].dynState
.pStats
);
434 delete pContext
->dcRing
[i
].pArena
;
435 delete pContext
->dsRing
[i
].pArena
;
436 pContext
->pMacroTileManagerArray
[i
].~MacroTileMgr();
437 pContext
->pDispatchQueueArray
[i
].~DispatchQueue();
440 AlignedFree(pContext
->pDispatchQueueArray
);
441 AlignedFree(pContext
->pMacroTileManagerArray
);
443 // Free scratch space.
444 for (uint32_t i
= 0; i
< pContext
->NumWorkerThreads
; ++i
)
447 VirtualFree(pContext
->ppScratch
[i
], 0, MEM_RELEASE
);
449 AlignedFree(pContext
->ppScratch
[i
]);
452 #if defined(KNOB_ENABLE_AR)
453 ArchRast::DestroyThreadContext(pContext
->pArContext
[i
]);
457 #if defined(KNOB_ENABLE_RDTSC)
458 delete pContext
->pBucketMgr
;
461 delete[] pContext
->ppScratch
;
462 AlignedFree(pContext
->pStats
);
464 delete pContext
->pHotTileMgr
;
465 delete pContext
->pSingleThreadLockedTiles
;
467 pContext
->~SWR_CONTEXT();
468 AlignedFree(GetContext(hContext
));
471 void SwrBindApiThread(HANDLE hContext
, uint32_t apiThreadId
)
473 SWR_CONTEXT
* pContext
= GetContext(hContext
);
474 BindApiThread(pContext
, apiThreadId
);
477 void SWR_API
SwrSaveState(HANDLE hContext
, void* pOutputStateBlock
, size_t memSize
)
479 SWR_CONTEXT
* pContext
= GetContext(hContext
);
480 auto pSrc
= GetDrawState(pContext
);
481 assert(pOutputStateBlock
&& memSize
>= sizeof(*pSrc
));
483 memcpy(pOutputStateBlock
, pSrc
, sizeof(*pSrc
));
486 void SWR_API
SwrRestoreState(HANDLE hContext
, const void* pStateBlock
, size_t memSize
)
488 SWR_CONTEXT
* pContext
= GetContext(hContext
);
489 auto pDst
= GetDrawState(pContext
);
490 assert(pStateBlock
&& memSize
>= sizeof(*pDst
));
492 memcpy(pDst
, pStateBlock
, sizeof(*pDst
));
495 void SetupDefaultState(SWR_CONTEXT
* pContext
)
497 API_STATE
* pState
= GetDrawState(pContext
);
499 pState
->rastState
.cullMode
= SWR_CULLMODE_NONE
;
500 pState
->rastState
.frontWinding
= SWR_FRONTWINDING_CCW
;
502 pState
->depthBoundsState
.depthBoundsTestEnable
= false;
503 pState
->depthBoundsState
.depthBoundsTestMinValue
= 0.0f
;
504 pState
->depthBoundsState
.depthBoundsTestMaxValue
= 1.0f
;
507 void SWR_API
SwrSync(HANDLE hContext
,
508 PFN_CALLBACK_FUNC pfnFunc
,
513 SWR_ASSERT(pfnFunc
!= nullptr);
515 SWR_CONTEXT
* pContext
= GetContext(hContext
);
516 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
518 RDTSC_BEGIN(pContext
->pBucketMgr
, APISync
, 0);
520 pDC
->FeWork
.type
= SYNC
;
521 pDC
->FeWork
.pfnWork
= ProcessSync
;
523 // Setup callback function
524 pDC
->retireCallback
.pfnCallbackFunc
= pfnFunc
;
525 pDC
->retireCallback
.userData
= userData
;
526 pDC
->retireCallback
.userData2
= userData2
;
527 pDC
->retireCallback
.userData3
= userData3
;
529 AR_API_EVENT(SwrSyncEvent(pDC
->drawId
));
534 RDTSC_END(pContext
->pBucketMgr
, APISync
, 1);
537 void SwrStallBE(HANDLE hContext
)
539 SWR_CONTEXT
* pContext
= GetContext(hContext
);
540 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
542 pDC
->dependent
= true;
545 void SwrWaitForIdle(HANDLE hContext
)
547 SWR_CONTEXT
* pContext
= GetContext(hContext
);
549 RDTSC_BEGIN(pContext
->pBucketMgr
, APIWaitForIdle
, 0);
551 while (!pContext
->dcRing
.IsEmpty())
556 RDTSC_END(pContext
->pBucketMgr
, APIWaitForIdle
, 1);
559 void SwrWaitForIdleFE(HANDLE hContext
)
561 SWR_CONTEXT
* pContext
= GetContext(hContext
);
563 RDTSC_BEGIN(pContext
->pBucketMgr
, APIWaitForIdle
, 0);
565 while (pContext
->drawsOutstandingFE
> 0)
570 RDTSC_END(pContext
->pBucketMgr
, APIWaitForIdle
, 1);
573 void SwrSetVertexBuffers(HANDLE hContext
,
575 const SWR_VERTEX_BUFFER_STATE
* pVertexBuffers
)
577 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
579 for (uint32_t i
= 0; i
< numBuffers
; ++i
)
581 const SWR_VERTEX_BUFFER_STATE
* pVB
= &pVertexBuffers
[i
];
582 pState
->vertexBuffers
[pVB
->index
] = *pVB
;
586 void SwrSetIndexBuffer(HANDLE hContext
, const SWR_INDEX_BUFFER_STATE
* pIndexBuffer
)
588 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
590 pState
->indexBuffer
= *pIndexBuffer
;
593 void SwrSetFetchFunc(HANDLE hContext
, PFN_FETCH_FUNC pfnFetchFunc
)
595 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
597 pState
->pfnFetchFunc
= pfnFetchFunc
;
600 void SwrSetSoFunc(HANDLE hContext
, PFN_SO_FUNC pfnSoFunc
, uint32_t streamIndex
)
602 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
604 SWR_ASSERT(streamIndex
< MAX_SO_STREAMS
);
606 pState
->pfnSoFunc
[streamIndex
] = pfnSoFunc
;
609 void SwrSetSoState(HANDLE hContext
, SWR_STREAMOUT_STATE
* pSoState
)
611 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
613 pState
->soState
= *pSoState
;
616 void SwrSetSoBuffers(HANDLE hContext
, SWR_STREAMOUT_BUFFER
* pSoBuffer
, uint32_t slot
)
618 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
620 SWR_ASSERT((slot
< MAX_SO_STREAMS
), "There are only 4 SO buffer slots [0, 3]\nSlot requested: %d", slot
);
622 // remember buffer status in case of future resume StreamOut
623 if ((pState
->soBuffer
[slot
].pBuffer
!= 0) && (pSoBuffer
->pBuffer
== 0))
624 pState
->soPausedBuffer
[slot
] = pState
->soBuffer
[slot
];
627 if (pState
->soPausedBuffer
[slot
].pBuffer
== pSoBuffer
->pBuffer
)
628 pState
->soBuffer
[slot
] = pState
->soPausedBuffer
[slot
];
630 pState
->soBuffer
[slot
] = *pSoBuffer
;
633 void SwrSetVertexFunc(HANDLE hContext
, PFN_VERTEX_FUNC pfnVertexFunc
)
635 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
637 pState
->pfnVertexFunc
= pfnVertexFunc
;
640 void SwrSetFrontendState(HANDLE hContext
, SWR_FRONTEND_STATE
* pFEState
)
642 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
643 pState
->frontendState
= *pFEState
;
646 void SwrSetGsState(HANDLE hContext
, SWR_GS_STATE
* pGSState
)
648 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
649 pState
->gsState
= *pGSState
;
652 void SwrSetGsFunc(HANDLE hContext
, PFN_GS_FUNC pfnGsFunc
)
654 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
655 pState
->pfnGsFunc
= pfnGsFunc
;
658 void SwrSetCsFunc(HANDLE hContext
,
659 PFN_CS_FUNC pfnCsFunc
,
660 uint32_t totalThreadsInGroup
,
661 uint32_t totalSpillFillSize
,
662 uint32_t scratchSpaceSizePerWarp
,
665 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
666 pState
->pfnCsFunc
= pfnCsFunc
;
667 pState
->totalThreadsInGroup
= totalThreadsInGroup
;
668 pState
->totalSpillFillSize
= totalSpillFillSize
;
669 pState
->scratchSpaceSizePerWarp
= scratchSpaceSizePerWarp
;
670 pState
->scratchSpaceNumWarps
= numWarps
;
673 void SwrSetTsState(HANDLE hContext
, SWR_TS_STATE
* pState
)
675 API_STATE
* pApiState
= GetDrawState(GetContext(hContext
));
676 pApiState
->tsState
= *pState
;
679 void SwrSetHsFunc(HANDLE hContext
, PFN_HS_FUNC pfnFunc
)
681 API_STATE
* pApiState
= GetDrawState(GetContext(hContext
));
682 pApiState
->pfnHsFunc
= pfnFunc
;
685 void SwrSetDsFunc(HANDLE hContext
, PFN_DS_FUNC pfnFunc
)
687 API_STATE
* pApiState
= GetDrawState(GetContext(hContext
));
688 pApiState
->pfnDsFunc
= pfnFunc
;
691 void SwrSetDepthStencilState(HANDLE hContext
, SWR_DEPTH_STENCIL_STATE
* pDSState
)
693 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
695 pState
->depthStencilState
= *pDSState
;
698 void SwrSetBackendState(HANDLE hContext
, SWR_BACKEND_STATE
* pBEState
)
700 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
702 pState
->backendState
= *pBEState
;
705 void SwrSetDepthBoundsState(HANDLE hContext
, SWR_DEPTH_BOUNDS_STATE
* pDBState
)
707 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
709 pState
->depthBoundsState
= *pDBState
;
712 void SwrSetPixelShaderState(HANDLE hContext
, SWR_PS_STATE
* pPSState
)
714 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
715 pState
->psState
= *pPSState
;
718 void SwrSetBlendState(HANDLE hContext
, SWR_BLEND_STATE
* pBlendState
)
720 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
721 memcpy(&pState
->blendState
, pBlendState
, sizeof(SWR_BLEND_STATE
));
724 void SwrSetBlendFunc(HANDLE hContext
, uint32_t renderTarget
, PFN_BLEND_JIT_FUNC pfnBlendFunc
)
726 SWR_ASSERT(renderTarget
< SWR_NUM_RENDERTARGETS
);
727 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
728 pState
->pfnBlendFunc
[renderTarget
] = pfnBlendFunc
;
731 // update guardband multipliers for the viewport
732 void updateGuardbands(API_STATE
* pState
)
734 uint32_t numGbs
= pState
->backendState
.readViewportArrayIndex
? KNOB_NUM_VIEWPORTS_SCISSORS
: 1;
736 for (uint32_t i
= 0; i
< numGbs
; ++i
)
738 // guardband center is viewport center
739 pState
->gbState
.left
[i
] = KNOB_GUARDBAND_WIDTH
/ pState
->vp
[i
].width
;
740 pState
->gbState
.right
[i
] = KNOB_GUARDBAND_WIDTH
/ pState
->vp
[i
].width
;
741 pState
->gbState
.top
[i
] = KNOB_GUARDBAND_HEIGHT
/ pState
->vp
[i
].height
;
742 pState
->gbState
.bottom
[i
] = KNOB_GUARDBAND_HEIGHT
/ pState
->vp
[i
].height
;
746 void SwrSetRastState(HANDLE hContext
, const SWR_RASTSTATE
* pRastState
)
748 SWR_CONTEXT
* pContext
= GetContext(hContext
);
749 API_STATE
* pState
= GetDrawState(pContext
);
751 memcpy(&pState
->rastState
, pRastState
, sizeof(SWR_RASTSTATE
));
754 void SwrSetViewports(HANDLE hContext
,
755 uint32_t numViewports
,
756 const SWR_VIEWPORT
* pViewports
,
757 const SWR_VIEWPORT_MATRICES
* pMatrices
)
759 SWR_ASSERT(numViewports
<= KNOB_NUM_VIEWPORTS_SCISSORS
, "Invalid number of viewports.");
761 SWR_CONTEXT
* pContext
= GetContext(hContext
);
762 API_STATE
* pState
= GetDrawState(pContext
);
764 memcpy(&pState
->vp
[0], pViewports
, sizeof(SWR_VIEWPORT
) * numViewports
);
765 // @todo Faster to copy portions of the SOA or just copy all of it?
766 memcpy(&pState
->vpMatrices
, pMatrices
, sizeof(SWR_VIEWPORT_MATRICES
));
769 void SwrSetScissorRects(HANDLE hContext
, uint32_t numScissors
, const SWR_RECT
* pScissors
)
771 SWR_ASSERT(numScissors
<= KNOB_NUM_VIEWPORTS_SCISSORS
, "Invalid number of scissor rects.");
773 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
774 memcpy(&pState
->scissorRects
[0], pScissors
, numScissors
* sizeof(pScissors
[0]));
777 void SetupMacroTileScissors(DRAW_CONTEXT
* pDC
)
779 API_STATE
* pState
= &pDC
->pState
->state
;
780 uint32_t numScissors
=
781 pState
->backendState
.readViewportArrayIndex
? KNOB_NUM_VIEWPORTS_SCISSORS
: 1;
782 pState
->scissorsTileAligned
= true;
784 for (uint32_t index
= 0; index
< numScissors
; ++index
)
786 SWR_RECT
& scissorInFixedPoint
= pState
->scissorsInFixedPoint
[index
];
788 // Set up scissor dimensions based on scissor or viewport
789 if (pState
->rastState
.scissorEnable
)
791 scissorInFixedPoint
= pState
->scissorRects
[index
];
795 // the vp width and height must be added to origin un-rounded then the result round to
796 // -inf. The cast to int works for rounding assuming all [left, right, top, bottom] are
798 scissorInFixedPoint
.xmin
= (int32_t)pState
->vp
[index
].x
;
799 scissorInFixedPoint
.xmax
= (int32_t)(pState
->vp
[index
].x
+ pState
->vp
[index
].width
);
800 scissorInFixedPoint
.ymin
= (int32_t)pState
->vp
[index
].y
;
801 scissorInFixedPoint
.ymax
= (int32_t)(pState
->vp
[index
].y
+ pState
->vp
[index
].height
);
805 scissorInFixedPoint
&= g_MaxScissorRect
;
807 // Test for tile alignment
809 tileAligned
= (scissorInFixedPoint
.xmin
% KNOB_TILE_X_DIM
) == 0;
810 tileAligned
&= (scissorInFixedPoint
.ymin
% KNOB_TILE_Y_DIM
) == 0;
811 tileAligned
&= (scissorInFixedPoint
.xmax
% KNOB_TILE_X_DIM
) == 0;
812 tileAligned
&= (scissorInFixedPoint
.ymax
% KNOB_TILE_Y_DIM
) == 0;
814 pState
->scissorsTileAligned
&= tileAligned
;
816 // Scale to fixed point
817 scissorInFixedPoint
.xmin
*= FIXED_POINT_SCALE
;
818 scissorInFixedPoint
.xmax
*= FIXED_POINT_SCALE
;
819 scissorInFixedPoint
.ymin
*= FIXED_POINT_SCALE
;
820 scissorInFixedPoint
.ymax
*= FIXED_POINT_SCALE
;
822 // Make scissor inclusive
823 scissorInFixedPoint
.xmax
-= 1;
824 scissorInFixedPoint
.ymax
-= 1;
829 // templated backend function tables
831 void SetupPipeline(DRAW_CONTEXT
* pDC
)
833 DRAW_STATE
* pState
= pDC
->pState
;
834 const SWR_RASTSTATE
& rastState
= pState
->state
.rastState
;
835 const SWR_PS_STATE
& psState
= pState
->state
.psState
;
836 BACKEND_FUNCS
& backendFuncs
= pState
->backendFuncs
;
839 if (psState
.pfnPixelShader
== nullptr)
841 backendFuncs
.pfnBackend
= gBackendNullPs
[pState
->state
.rastState
.sampleCount
];
845 const uint32_t forcedSampleCount
= (rastState
.forcedSampleCount
) ? 1 : 0;
846 const bool bMultisampleEnable
=
847 ((rastState
.sampleCount
> SWR_MULTISAMPLE_1X
) || forcedSampleCount
) ? 1 : 0;
848 const uint32_t centroid
=
849 ((psState
.barycentricsMask
& SWR_BARYCENTRIC_CENTROID_MASK
) > 0) ? 1 : 0;
850 const uint32_t canEarlyZ
=
851 (psState
.forceEarlyZ
|| (!psState
.writesODepth
&& !psState
.usesUAV
)) ? 1 : 0;
852 SWR_BARYCENTRICS_MASK barycentricsMask
= (SWR_BARYCENTRICS_MASK
)psState
.barycentricsMask
;
854 // select backend function
855 switch (psState
.shadingRate
)
857 case SWR_SHADING_RATE_PIXEL
:
858 if (bMultisampleEnable
)
860 // always need to generate I & J per sample for Z interpolation
862 (SWR_BARYCENTRICS_MASK
)(barycentricsMask
| SWR_BARYCENTRIC_PER_SAMPLE_MASK
);
863 backendFuncs
.pfnBackend
=
864 gBackendPixelRateTable
[rastState
.sampleCount
][rastState
.bIsCenterPattern
]
865 [psState
.inputCoverage
][centroid
][forcedSampleCount
]
871 // always need to generate I & J per pixel for Z interpolation
873 (SWR_BARYCENTRICS_MASK
)(barycentricsMask
| SWR_BARYCENTRIC_PER_PIXEL_MASK
);
874 backendFuncs
.pfnBackend
=
875 gBackendSingleSample
[psState
.inputCoverage
][centroid
][canEarlyZ
];
878 case SWR_SHADING_RATE_SAMPLE
:
879 SWR_ASSERT(rastState
.bIsCenterPattern
!= true);
880 // always need to generate I & J per sample for Z interpolation
882 (SWR_BARYCENTRICS_MASK
)(barycentricsMask
| SWR_BARYCENTRIC_PER_SAMPLE_MASK
);
883 backendFuncs
.pfnBackend
=
884 gBackendSampleRateTable
[rastState
.sampleCount
][psState
.inputCoverage
][centroid
]
888 SWR_ASSERT(0 && "Invalid shading rate");
893 SWR_ASSERT(backendFuncs
.pfnBackend
);
895 PFN_PROCESS_PRIMS pfnBinner
;
896 #if USE_SIMD16_FRONTEND
897 PFN_PROCESS_PRIMS_SIMD16 pfnBinner_simd16
;
899 switch (pState
->state
.topology
)
902 pState
->pfnProcessPrims
= ClipPoints
;
903 pfnBinner
= BinPoints
;
904 #if USE_SIMD16_FRONTEND
905 pState
->pfnProcessPrims_simd16
= ClipPoints_simd16
;
906 pfnBinner_simd16
= BinPoints_simd16
;
912 case TOP_LINE_LIST_ADJ
:
913 case TOP_LISTSTRIP_ADJ
:
914 pState
->pfnProcessPrims
= ClipLines
;
915 pfnBinner
= BinLines
;
916 #if USE_SIMD16_FRONTEND
917 pState
->pfnProcessPrims_simd16
= ClipLines_simd16
;
918 pfnBinner_simd16
= BinLines_simd16
;
922 pState
->pfnProcessPrims
= ClipTriangles
;
923 pfnBinner
= GetBinTrianglesFunc((rastState
.conservativeRast
> 0));
924 #if USE_SIMD16_FRONTEND
925 pState
->pfnProcessPrims_simd16
= ClipTriangles_simd16
;
926 pfnBinner_simd16
= GetBinTrianglesFunc_simd16((rastState
.conservativeRast
> 0));
932 // Disable clipper if viewport transform is disabled or if clipper is disabled
933 if (pState
->state
.frontendState
.vpTransformDisable
|| !pState
->state
.rastState
.clipEnable
)
935 pState
->pfnProcessPrims
= pfnBinner
;
936 #if USE_SIMD16_FRONTEND
937 pState
->pfnProcessPrims_simd16
= pfnBinner_simd16
;
941 // Disable rasterizer and backend if no pixel, no depth/stencil, and no attributes
942 if ((pState
->state
.psState
.pfnPixelShader
== nullptr) &&
943 (pState
->state
.depthStencilState
.depthTestEnable
== FALSE
) &&
944 (pState
->state
.depthStencilState
.depthWriteEnable
== FALSE
) &&
945 (pState
->state
.depthStencilState
.stencilTestEnable
== FALSE
) &&
946 (pState
->state
.depthStencilState
.stencilWriteEnable
== FALSE
) &&
947 (pState
->state
.backendState
.numAttributes
== 0))
949 pState
->pfnProcessPrims
= nullptr;
950 #if USE_SIMD16_FRONTEND
951 pState
->pfnProcessPrims_simd16
= nullptr;
955 if (pState
->state
.soState
.rasterizerDisable
== true)
957 pState
->pfnProcessPrims
= nullptr;
958 #if USE_SIMD16_FRONTEND
959 pState
->pfnProcessPrims_simd16
= nullptr;
964 // set up the frontend attribute count
965 pState
->state
.feNumAttributes
= 0;
966 const SWR_BACKEND_STATE
& backendState
= pState
->state
.backendState
;
967 if (backendState
.swizzleEnable
)
969 // attribute swizzling is enabled, iterate over the map and record the max attribute used
970 for (uint32_t i
= 0; i
< backendState
.numAttributes
; ++i
)
972 pState
->state
.feNumAttributes
=
973 std::max(pState
->state
.feNumAttributes
,
974 (uint32_t)backendState
.swizzleMap
[i
].sourceAttrib
+ 1);
979 pState
->state
.feNumAttributes
= pState
->state
.backendState
.numAttributes
;
982 if (pState
->state
.soState
.soEnable
)
984 uint64_t streamMasks
= 0;
985 for (uint32_t i
= 0; i
< 4; ++i
)
987 streamMasks
|= pState
->state
.soState
.streamMasks
[i
];
991 if (_BitScanReverse64(&maxAttrib
, streamMasks
))
993 pState
->state
.feNumAttributes
=
994 std::max(pState
->state
.feNumAttributes
, (uint32_t)(maxAttrib
+ 1));
998 // complicated logic to test for cases where we don't need backing hottile memory for a draw
999 // have to check for the special case where depth/stencil test is enabled but depthwrite is
1001 pState
->state
.depthHottileEnable
=
1002 ((!(pState
->state
.depthStencilState
.depthTestEnable
&&
1003 !pState
->state
.depthStencilState
.depthWriteEnable
&&
1004 !pState
->state
.depthBoundsState
.depthBoundsTestEnable
&&
1005 pState
->state
.depthStencilState
.depthTestFunc
== ZFUNC_ALWAYS
)) &&
1006 (pState
->state
.depthStencilState
.depthTestEnable
||
1007 pState
->state
.depthStencilState
.depthWriteEnable
||
1008 pState
->state
.depthBoundsState
.depthBoundsTestEnable
))
1012 pState
->state
.stencilHottileEnable
=
1013 (((!(pState
->state
.depthStencilState
.stencilTestEnable
&&
1014 !pState
->state
.depthStencilState
.stencilWriteEnable
&&
1015 pState
->state
.depthStencilState
.stencilTestFunc
== ZFUNC_ALWAYS
)) ||
1016 // for stencil we have to check the double sided state as well
1017 (!(pState
->state
.depthStencilState
.doubleSidedStencilTestEnable
&&
1018 !pState
->state
.depthStencilState
.stencilWriteEnable
&&
1019 pState
->state
.depthStencilState
.backfaceStencilTestFunc
== ZFUNC_ALWAYS
))) &&
1020 (pState
->state
.depthStencilState
.stencilTestEnable
||
1021 pState
->state
.depthStencilState
.stencilWriteEnable
))
1025 uint32_t hotTileEnable
= pState
->state
.psState
.renderTargetMask
;
1027 // Disable hottile for surfaces with no writes
1028 if (psState
.pfnPixelShader
!= nullptr)
1031 uint32_t rtMask
= pState
->state
.psState
.renderTargetMask
;
1032 while (_BitScanForward(&rt
, rtMask
))
1034 rtMask
&= ~(1 << rt
);
1036 if (pState
->state
.blendState
.renderTarget
[rt
].writeDisableAlpha
&&
1037 pState
->state
.blendState
.renderTarget
[rt
].writeDisableRed
&&
1038 pState
->state
.blendState
.renderTarget
[rt
].writeDisableGreen
&&
1039 pState
->state
.blendState
.renderTarget
[rt
].writeDisableBlue
)
1041 hotTileEnable
&= ~(1 << rt
);
1046 pState
->state
.colorHottileEnable
= hotTileEnable
;
1048 // Setup depth quantization function
1049 if (pState
->state
.depthHottileEnable
)
1051 switch (pState
->state
.rastState
.depthFormat
)
1053 case R32_FLOAT_X8X24_TYPELESS
:
1054 pState
->state
.pfnQuantizeDepth
= QuantizeDepth
<R32_FLOAT_X8X24_TYPELESS
>;
1057 pState
->state
.pfnQuantizeDepth
= QuantizeDepth
<R32_FLOAT
>;
1059 case R24_UNORM_X8_TYPELESS
:
1060 pState
->state
.pfnQuantizeDepth
= QuantizeDepth
<R24_UNORM_X8_TYPELESS
>;
1063 pState
->state
.pfnQuantizeDepth
= QuantizeDepth
<R16_UNORM
>;
1066 SWR_INVALID("Unsupported depth format for depth quantiztion.");
1067 pState
->state
.pfnQuantizeDepth
= QuantizeDepth
<R32_FLOAT
>;
1072 // set up pass-through quantize if depth isn't enabled
1073 pState
->state
.pfnQuantizeDepth
= QuantizeDepth
<R32_FLOAT
>;
1076 // Generate guardbands
1077 updateGuardbands(&pState
->state
);
1080 //////////////////////////////////////////////////////////////////////////
1082 /// @param pDC - Draw context to initialize for this draw.
1083 void InitDraw(DRAW_CONTEXT
* pDC
, bool isSplitDraw
)
1085 // We don't need to re-setup the scissors/pipeline state again for split draw.
1086 if (isSplitDraw
== false)
1088 SetupMacroTileScissors(pDC
);
1094 //////////////////////////////////////////////////////////////////////////
1095 /// @brief We can split the draw for certain topologies for better performance.
1096 /// @param totalVerts - Total vertices for draw
1097 /// @param topology - Topology used for draw
1098 uint32_t MaxVertsPerDraw(DRAW_CONTEXT
* pDC
, uint32_t totalVerts
, PRIMITIVE_TOPOLOGY topology
)
1100 API_STATE
& state
= pDC
->pState
->state
;
1102 // We can not split draws that have streamout enabled because there is no practical way
1103 // to support multiple threads generating SO data for a single set of buffers.
1104 if (state
.soState
.soEnable
)
1109 // The Primitive Assembly code can only handle 1 RECT at a time. Specified with only 3 verts.
1110 if (topology
== TOP_RECT_LIST
)
1115 // Is split drawing disabled?
1116 if (KNOB_DISABLE_SPLIT_DRAW
)
1121 uint32_t vertsPerDraw
= totalVerts
;
1125 case TOP_POINT_LIST
:
1126 case TOP_TRIANGLE_LIST
:
1127 vertsPerDraw
= KNOB_MAX_PRIMS_PER_DRAW
;
1130 case TOP_PATCHLIST_1
:
1131 case TOP_PATCHLIST_2
:
1132 case TOP_PATCHLIST_3
:
1133 case TOP_PATCHLIST_4
:
1134 case TOP_PATCHLIST_5
:
1135 case TOP_PATCHLIST_6
:
1136 case TOP_PATCHLIST_7
:
1137 case TOP_PATCHLIST_8
:
1138 case TOP_PATCHLIST_9
:
1139 case TOP_PATCHLIST_10
:
1140 case TOP_PATCHLIST_11
:
1141 case TOP_PATCHLIST_12
:
1142 case TOP_PATCHLIST_13
:
1143 case TOP_PATCHLIST_14
:
1144 case TOP_PATCHLIST_15
:
1145 case TOP_PATCHLIST_16
:
1146 case TOP_PATCHLIST_17
:
1147 case TOP_PATCHLIST_18
:
1148 case TOP_PATCHLIST_19
:
1149 case TOP_PATCHLIST_20
:
1150 case TOP_PATCHLIST_21
:
1151 case TOP_PATCHLIST_22
:
1152 case TOP_PATCHLIST_23
:
1153 case TOP_PATCHLIST_24
:
1154 case TOP_PATCHLIST_25
:
1155 case TOP_PATCHLIST_26
:
1156 case TOP_PATCHLIST_27
:
1157 case TOP_PATCHLIST_28
:
1158 case TOP_PATCHLIST_29
:
1159 case TOP_PATCHLIST_30
:
1160 case TOP_PATCHLIST_31
:
1161 case TOP_PATCHLIST_32
:
1162 if (pDC
->pState
->state
.tsState
.tsEnable
)
1164 uint32_t vertsPerPrim
= topology
- TOP_PATCHLIST_BASE
;
1165 vertsPerDraw
= vertsPerPrim
* KNOB_MAX_TESS_PRIMS_PER_DRAW
;
1169 // We are not splitting up draws for other topologies.
1173 return vertsPerDraw
;
1176 //////////////////////////////////////////////////////////////////////////
1177 /// @brief DrawInstanced
1178 /// @param hContext - Handle passed back from SwrCreateContext
1179 /// @param topology - Specifies topology for draw.
1180 /// @param numVerts - How many vertices to read sequentially from vertex data (per instance).
1181 /// @param startVertex - Specifies start vertex for draw. (vertex data)
1182 /// @param numInstances - How many instances to render.
1183 /// @param startInstance - Which instance to start sequentially fetching from in each buffer
1184 /// (instanced data)
1185 void DrawInstanced(HANDLE hContext
,
1186 PRIMITIVE_TOPOLOGY topology
,
1187 uint32_t numVertices
,
1188 uint32_t startVertex
,
1189 uint32_t numInstances
= 1,
1190 uint32_t startInstance
= 0)
1197 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1198 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1200 RDTSC_BEGIN(pContext
->pBucketMgr
, APIDraw
, pDC
->drawId
);
1202 uint32_t maxVertsPerDraw
= MaxVertsPerDraw(pDC
, numVertices
, topology
);
1203 uint32_t primsPerDraw
= GetNumPrims(topology
, maxVertsPerDraw
);
1204 uint32_t remainingVerts
= numVertices
;
1206 API_STATE
* pState
= &pDC
->pState
->state
;
1207 pState
->topology
= topology
;
1208 pState
->forceFront
= false;
1210 // disable culling for points/lines
1211 uint32_t oldCullMode
= pState
->rastState
.cullMode
;
1212 if (topology
== TOP_POINT_LIST
)
1214 pState
->rastState
.cullMode
= SWR_CULLMODE_NONE
;
1215 pState
->forceFront
= true;
1217 else if (topology
== TOP_RECT_LIST
)
1219 pState
->rastState
.cullMode
= SWR_CULLMODE_NONE
;
1223 while (remainingVerts
)
1225 uint32_t numVertsForDraw
=
1226 (remainingVerts
< maxVertsPerDraw
) ? remainingVerts
: maxVertsPerDraw
;
1228 bool isSplitDraw
= (draw
> 0) ? !KNOB_DISABLE_SPLIT_DRAW
: false;
1229 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
, isSplitDraw
);
1230 InitDraw(pDC
, isSplitDraw
);
1232 pDC
->FeWork
.type
= DRAW
;
1233 pDC
->FeWork
.pfnWork
= GetProcessDrawFunc(false, // IsIndexed
1234 false, // bEnableCutIndex
1235 pState
->tsState
.tsEnable
,
1236 pState
->gsState
.gsEnable
,
1237 pState
->soState
.soEnable
,
1238 pDC
->pState
->pfnProcessPrims
!= nullptr);
1239 pDC
->FeWork
.desc
.draw
.numVerts
= numVertsForDraw
;
1240 pDC
->FeWork
.desc
.draw
.startVertex
= startVertex
;
1241 pDC
->FeWork
.desc
.draw
.numInstances
= numInstances
;
1242 pDC
->FeWork
.desc
.draw
.startInstance
= startInstance
;
1243 pDC
->FeWork
.desc
.draw
.startPrimID
= draw
* primsPerDraw
;
1244 pDC
->FeWork
.desc
.draw
.startVertexID
= draw
* maxVertsPerDraw
;
1246 pDC
->cleanupState
= (remainingVerts
== numVertsForDraw
);
1249 QueueDraw(pContext
);
1251 AR_API_EVENT(DrawInstancedEvent(pDC
->drawId
,
1257 pState
->tsState
.tsEnable
,
1258 pState
->gsState
.gsEnable
,
1259 pState
->soState
.soEnable
,
1260 pState
->gsState
.outputTopology
,
1263 remainingVerts
-= numVertsForDraw
;
1267 // restore culling state
1268 pDC
= GetDrawContext(pContext
);
1269 pDC
->pState
->state
.rastState
.cullMode
= oldCullMode
;
1271 RDTSC_END(pContext
->pBucketMgr
, APIDraw
, numVertices
* numInstances
);
1274 //////////////////////////////////////////////////////////////////////////
1276 /// @param hContext - Handle passed back from SwrCreateContext
1277 /// @param topology - Specifies topology for draw.
1278 /// @param startVertex - Specifies start vertex in vertex buffer for draw.
1279 /// @param primCount - Number of vertices.
1280 void SwrDraw(HANDLE hContext
,
1281 PRIMITIVE_TOPOLOGY topology
,
1282 uint32_t startVertex
,
1283 uint32_t numVertices
)
1285 DrawInstanced(hContext
, topology
, numVertices
, startVertex
);
1288 //////////////////////////////////////////////////////////////////////////
1289 /// @brief SwrDrawInstanced
1290 /// @param hContext - Handle passed back from SwrCreateContext
1291 /// @param topology - Specifies topology for draw.
1292 /// @param numVertsPerInstance - How many vertices to read sequentially from vertex data.
1293 /// @param numInstances - How many instances to render.
1294 /// @param startVertex - Specifies start vertex for draw. (vertex data)
1295 /// @param startInstance - Which instance to start sequentially fetching from in each buffer
1296 /// (instanced data)
1297 void SwrDrawInstanced(HANDLE hContext
,
1298 PRIMITIVE_TOPOLOGY topology
,
1299 uint32_t numVertsPerInstance
,
1300 uint32_t numInstances
,
1301 uint32_t startVertex
,
1302 uint32_t startInstance
)
1305 hContext
, topology
, numVertsPerInstance
, startVertex
, numInstances
, startInstance
);
1308 //////////////////////////////////////////////////////////////////////////
1309 /// @brief DrawIndexedInstanced
1310 /// @param hContext - Handle passed back from SwrCreateContext
1311 /// @param topology - Specifies topology for draw.
1312 /// @param numIndices - Number of indices to read sequentially from index buffer.
1313 /// @param indexOffset - Starting index into index buffer.
1314 /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
1315 /// @param numInstances - Number of instances to render.
1316 /// @param startInstance - Which instance to start sequentially fetching from in each buffer
1317 /// (instanced data)
1318 void DrawIndexedInstance(HANDLE hContext
,
1319 PRIMITIVE_TOPOLOGY topology
,
1320 uint32_t numIndices
,
1321 uint32_t indexOffset
,
1323 uint32_t numInstances
= 1,
1324 uint32_t startInstance
= 0)
1331 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1332 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1333 API_STATE
* pState
= &pDC
->pState
->state
;
1335 RDTSC_BEGIN(pContext
->pBucketMgr
, APIDrawIndexed
, pDC
->drawId
);
1337 uint32_t maxIndicesPerDraw
= MaxVertsPerDraw(pDC
, numIndices
, topology
);
1338 uint32_t primsPerDraw
= GetNumPrims(topology
, maxIndicesPerDraw
);
1339 uint32_t remainingIndices
= numIndices
;
1341 uint32_t indexSize
= 0;
1342 switch (pState
->indexBuffer
.format
)
1345 indexSize
= sizeof(uint32_t);
1348 indexSize
= sizeof(uint16_t);
1351 indexSize
= sizeof(uint8_t);
1354 SWR_INVALID("Invalid index buffer format: %d", pState
->indexBuffer
.format
);
1358 gfxptr_t xpIB
= pState
->indexBuffer
.xpIndices
;
1359 xpIB
+= (uint64_t)indexOffset
* (uint64_t)indexSize
;
1361 pState
->topology
= topology
;
1362 pState
->forceFront
= false;
1364 // disable culling for points/lines
1365 uint32_t oldCullMode
= pState
->rastState
.cullMode
;
1366 if (topology
== TOP_POINT_LIST
)
1368 pState
->rastState
.cullMode
= SWR_CULLMODE_NONE
;
1369 pState
->forceFront
= true;
1371 else if (topology
== TOP_RECT_LIST
)
1373 pState
->rastState
.cullMode
= SWR_CULLMODE_NONE
;
1376 while (remainingIndices
)
1378 uint32_t numIndicesForDraw
=
1379 (remainingIndices
< maxIndicesPerDraw
) ? remainingIndices
: maxIndicesPerDraw
;
1381 // When breaking up draw, we need to obtain new draw context for each iteration.
1382 bool isSplitDraw
= (draw
> 0) ? !KNOB_DISABLE_SPLIT_DRAW
: false;
1384 pDC
= GetDrawContext(pContext
, isSplitDraw
);
1385 InitDraw(pDC
, isSplitDraw
);
1387 pDC
->FeWork
.type
= DRAW
;
1388 pDC
->FeWork
.pfnWork
= GetProcessDrawFunc(true, // IsIndexed
1389 pState
->frontendState
.bEnableCutIndex
,
1390 pState
->tsState
.tsEnable
,
1391 pState
->gsState
.gsEnable
,
1392 pState
->soState
.soEnable
,
1393 pDC
->pState
->pfnProcessPrims
!= nullptr);
1394 pDC
->FeWork
.desc
.draw
.pDC
= pDC
;
1395 pDC
->FeWork
.desc
.draw
.numIndices
= numIndicesForDraw
;
1396 pDC
->FeWork
.desc
.draw
.xpIB
= xpIB
;
1397 pDC
->FeWork
.desc
.draw
.type
= pDC
->pState
->state
.indexBuffer
.format
;
1399 pDC
->FeWork
.desc
.draw
.numInstances
= numInstances
;
1400 pDC
->FeWork
.desc
.draw
.startInstance
= startInstance
;
1401 pDC
->FeWork
.desc
.draw
.baseVertex
= baseVertex
;
1402 pDC
->FeWork
.desc
.draw
.startPrimID
= draw
* primsPerDraw
;
1404 pDC
->cleanupState
= (remainingIndices
== numIndicesForDraw
);
1407 QueueDraw(pContext
);
1409 AR_API_EVENT(DrawIndexedInstancedEvent(pDC
->drawId
,
1416 pState
->tsState
.tsEnable
,
1417 pState
->gsState
.gsEnable
,
1418 pState
->soState
.soEnable
,
1419 pState
->gsState
.outputTopology
,
1422 xpIB
+= maxIndicesPerDraw
* indexSize
;
1423 remainingIndices
-= numIndicesForDraw
;
1427 // Restore culling state
1428 pDC
= GetDrawContext(pContext
);
1429 pDC
->pState
->state
.rastState
.cullMode
= oldCullMode
;
1431 RDTSC_END(pContext
->pBucketMgr
, APIDrawIndexed
, numIndices
* numInstances
);
1434 //////////////////////////////////////////////////////////////////////////
1435 /// @brief DrawIndexed
1436 /// @param hContext - Handle passed back from SwrCreateContext
1437 /// @param topology - Specifies topology for draw.
1438 /// @param numIndices - Number of indices to read sequentially from index buffer.
1439 /// @param indexOffset - Starting index into index buffer.
1440 /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
1441 void SwrDrawIndexed(HANDLE hContext
,
1442 PRIMITIVE_TOPOLOGY topology
,
1443 uint32_t numIndices
,
1444 uint32_t indexOffset
,
1447 DrawIndexedInstance(hContext
, topology
, numIndices
, indexOffset
, baseVertex
);
1450 //////////////////////////////////////////////////////////////////////////
1451 /// @brief SwrDrawIndexedInstanced
1452 /// @param hContext - Handle passed back from SwrCreateContext
1453 /// @param topology - Specifies topology for draw.
1454 /// @param numIndices - Number of indices to read sequentially from index buffer.
1455 /// @param numInstances - Number of instances to render.
1456 /// @param indexOffset - Starting index into index buffer.
1457 /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
1458 /// @param startInstance - Which instance to start sequentially fetching from in each buffer
1459 /// (instanced data)
1460 void SwrDrawIndexedInstanced(HANDLE hContext
,
1461 PRIMITIVE_TOPOLOGY topology
,
1462 uint32_t numIndices
,
1463 uint32_t numInstances
,
1464 uint32_t indexOffset
,
1466 uint32_t startInstance
)
1468 DrawIndexedInstance(
1469 hContext
, topology
, numIndices
, indexOffset
, baseVertex
, numInstances
, startInstance
);
1472 //////////////////////////////////////////////////////////////////////////
1473 /// @brief SwrInvalidateTiles
1474 /// @param hContext - Handle passed back from SwrCreateContext
1475 /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to
1477 /// @param invalidateRect - The pixel-coordinate rectangle to invalidate. This will be expanded to
1478 /// be hottile size-aligned.
1479 void SWR_API
SwrInvalidateTiles(HANDLE hContext
,
1480 uint32_t attachmentMask
,
1481 const SWR_RECT
& invalidateRect
)
1488 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1489 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1491 pDC
->FeWork
.type
= DISCARDINVALIDATETILES
;
1492 pDC
->FeWork
.pfnWork
= ProcessDiscardInvalidateTiles
;
1493 pDC
->FeWork
.desc
.discardInvalidateTiles
.attachmentMask
= attachmentMask
;
1494 pDC
->FeWork
.desc
.discardInvalidateTiles
.rect
= invalidateRect
;
1495 pDC
->FeWork
.desc
.discardInvalidateTiles
.rect
&= g_MaxScissorRect
;
1496 pDC
->FeWork
.desc
.discardInvalidateTiles
.newTileState
= SWR_TILE_INVALID
;
1497 pDC
->FeWork
.desc
.discardInvalidateTiles
.createNewTiles
= false;
1498 pDC
->FeWork
.desc
.discardInvalidateTiles
.fullTilesOnly
= false;
1501 QueueDraw(pContext
);
1503 AR_API_EVENT(SwrInvalidateTilesEvent(pDC
->drawId
));
1506 //////////////////////////////////////////////////////////////////////////
1507 /// @brief SwrDiscardRect
1508 /// @param hContext - Handle passed back from SwrCreateContext
1509 /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard.
1510 /// @param rect - The pixel-coordinate rectangle to discard. Only fully-covered hottiles will be
1512 void SWR_API
SwrDiscardRect(HANDLE hContext
, uint32_t attachmentMask
, const SWR_RECT
& rect
)
1519 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1520 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1522 // Queue a load to the hottile
1523 pDC
->FeWork
.type
= DISCARDINVALIDATETILES
;
1524 pDC
->FeWork
.pfnWork
= ProcessDiscardInvalidateTiles
;
1525 pDC
->FeWork
.desc
.discardInvalidateTiles
.attachmentMask
= attachmentMask
;
1526 pDC
->FeWork
.desc
.discardInvalidateTiles
.rect
= rect
;
1527 pDC
->FeWork
.desc
.discardInvalidateTiles
.rect
&= g_MaxScissorRect
;
1528 pDC
->FeWork
.desc
.discardInvalidateTiles
.newTileState
= SWR_TILE_RESOLVED
;
1529 pDC
->FeWork
.desc
.discardInvalidateTiles
.createNewTiles
= true;
1530 pDC
->FeWork
.desc
.discardInvalidateTiles
.fullTilesOnly
= true;
1533 QueueDraw(pContext
);
1535 AR_API_EVENT(SwrDiscardRectEvent(pDC
->drawId
));
1538 //////////////////////////////////////////////////////////////////////////
1539 /// @brief SwrDispatch
1540 /// @param hContext - Handle passed back from SwrCreateContext
1541 /// @param threadGroupCountX - Number of thread groups dispatched in X direction
1542 /// @param threadGroupCountY - Number of thread groups dispatched in Y direction
1543 /// @param threadGroupCountZ - Number of thread groups dispatched in Z direction
1544 void SwrDispatch(HANDLE hContext
,
1545 uint32_t threadGroupCountX
,
1546 uint32_t threadGroupCountY
,
1547 uint32_t threadGroupCountZ
1556 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1557 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1559 RDTSC_BEGIN(pContext
->pBucketMgr
, APIDispatch
, pDC
->drawId
);
1561 DispatchEvent(pDC
->drawId
, threadGroupCountX
, threadGroupCountY
, threadGroupCountZ
));
1562 pDC
->isCompute
= true; // This is a compute context.
1564 COMPUTE_DESC
* pTaskData
= (COMPUTE_DESC
*)pDC
->pArena
->AllocAligned(sizeof(COMPUTE_DESC
), 64);
1566 pTaskData
->threadGroupCountX
= threadGroupCountX
;
1567 pTaskData
->threadGroupCountY
= threadGroupCountY
;
1568 pTaskData
->threadGroupCountZ
= threadGroupCountZ
;
1570 pTaskData
->enableThreadDispatch
= false;
1572 uint32_t totalThreadGroups
= threadGroupCountX
* threadGroupCountY
* threadGroupCountZ
;
1573 uint32_t dcIndex
= pDC
->drawId
% pContext
->MAX_DRAWS_IN_FLIGHT
;
1574 pDC
->pDispatch
= &pContext
->pDispatchQueueArray
[dcIndex
];
1575 pDC
->pDispatch
->initialize(totalThreadGroups
, pTaskData
, &ProcessComputeBE
);
1577 QueueDispatch(pContext
);
1578 RDTSC_END(pContext
->pBucketMgr
,
1580 threadGroupCountX
* threadGroupCountY
* threadGroupCountZ
);
1583 // Deswizzles, converts and stores current contents of the hot tiles to surface
1584 // described by pState
1585 void SWR_API
SwrStoreTiles(HANDLE hContext
,
1586 uint32_t attachmentMask
,
1587 SWR_TILE_STATE postStoreTileState
,
1588 const SWR_RECT
& storeRect
)
1595 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1596 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1598 RDTSC_BEGIN(pContext
->pBucketMgr
, APIStoreTiles
, pDC
->drawId
);
1600 pDC
->FeWork
.type
= STORETILES
;
1601 pDC
->FeWork
.pfnWork
= ProcessStoreTiles
;
1602 pDC
->FeWork
.desc
.storeTiles
.attachmentMask
= attachmentMask
;
1603 pDC
->FeWork
.desc
.storeTiles
.postStoreTileState
= postStoreTileState
;
1604 pDC
->FeWork
.desc
.storeTiles
.rect
= storeRect
;
1605 pDC
->FeWork
.desc
.storeTiles
.rect
&= g_MaxScissorRect
;
1608 QueueDraw(pContext
);
1610 AR_API_EVENT(SwrStoreTilesEvent(pDC
->drawId
));
1612 RDTSC_END(pContext
->pBucketMgr
, APIStoreTiles
, 1);
1615 //////////////////////////////////////////////////////////////////////////
1616 /// @brief SwrClearRenderTarget - Clear attached render targets / depth / stencil
1617 /// @param hContext - Handle passed back from SwrCreateContext
1618 /// @param attachmentMask - combination of SWR_ATTACHMENT_*_BIT attachments to clear
1619 /// @param renderTargetArrayIndex - the RT array index to clear
1620 /// @param clearColor - color use for clearing render targets
1621 /// @param z - depth value use for clearing depth buffer
1622 /// @param stencil - stencil value used for clearing stencil buffer
1623 /// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers
1624 void SWR_API
SwrClearRenderTarget(HANDLE hContext
,
1625 uint32_t attachmentMask
,
1626 uint32_t renderTargetArrayIndex
,
1627 const float clearColor
[4],
1630 const SWR_RECT
& clearRect
)
1637 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1638 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1640 RDTSC_BEGIN(pContext
->pBucketMgr
, APIClearRenderTarget
, pDC
->drawId
);
1642 pDC
->FeWork
.type
= CLEAR
;
1643 pDC
->FeWork
.pfnWork
= ProcessClear
;
1644 pDC
->FeWork
.desc
.clear
.rect
= clearRect
;
1645 pDC
->FeWork
.desc
.clear
.rect
&= g_MaxScissorRect
;
1646 pDC
->FeWork
.desc
.clear
.attachmentMask
= attachmentMask
;
1647 pDC
->FeWork
.desc
.clear
.renderTargetArrayIndex
= renderTargetArrayIndex
;
1648 pDC
->FeWork
.desc
.clear
.clearDepth
= z
;
1649 pDC
->FeWork
.desc
.clear
.clearRTColor
[0] = clearColor
[0];
1650 pDC
->FeWork
.desc
.clear
.clearRTColor
[1] = clearColor
[1];
1651 pDC
->FeWork
.desc
.clear
.clearRTColor
[2] = clearColor
[2];
1652 pDC
->FeWork
.desc
.clear
.clearRTColor
[3] = clearColor
[3];
1653 pDC
->FeWork
.desc
.clear
.clearStencil
= stencil
;
1656 QueueDraw(pContext
);
1658 RDTSC_END(pContext
->pBucketMgr
, APIClearRenderTarget
, 1);
1661 //////////////////////////////////////////////////////////////////////////
1662 /// @brief Returns a pointer to the private context state for the current
1663 /// draw operation. This is used for external componets such as the
1665 /// SWR is responsible for the allocation of the private context state.
1666 /// @param hContext - Handle passed back from SwrCreateContext
1667 VOID
* SwrGetPrivateContextState(HANDLE hContext
)
1669 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1670 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1671 DRAW_STATE
* pState
= pDC
->pState
;
1673 if (pState
->pPrivateState
== nullptr)
1675 pState
->pPrivateState
= pState
->pArena
->AllocAligned(pContext
->privateStateSize
,
1676 KNOB_SIMD_WIDTH
* sizeof(float));
1679 return pState
->pPrivateState
;
1682 //////////////////////////////////////////////////////////////////////////
1683 /// @brief Clients can use this to allocate memory for draw/dispatch
1684 /// operations. The memory will automatically be freed once operation
1685 /// has completed. Client can use this to allocate binding tables,
1686 /// etc. needed for shader execution.
1687 /// @param hContext - Handle passed back from SwrCreateContext
1688 /// @param size - Size of allocation
1689 /// @param align - Alignment needed for allocation.
1690 VOID
* SwrAllocDrawContextMemory(HANDLE hContext
, uint32_t size
, uint32_t align
)
1692 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1693 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1695 return pDC
->pState
->pArena
->AllocAligned(size
, align
);
1698 //////////////////////////////////////////////////////////////////////////
1699 /// @brief Enables stats counting
1700 /// @param hContext - Handle passed back from SwrCreateContext
1701 /// @param enable - If true then counts are incremented.
1702 void SwrEnableStatsFE(HANDLE hContext
, bool enable
)
1704 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1705 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1707 pDC
->pState
->state
.enableStatsFE
= enable
;
1710 //////////////////////////////////////////////////////////////////////////
1711 /// @brief Enables stats counting
1712 /// @param hContext - Handle passed back from SwrCreateContext
1713 /// @param enable - If true then counts are incremented.
1714 void SwrEnableStatsBE(HANDLE hContext
, bool enable
)
1716 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1717 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1719 pDC
->pState
->state
.enableStatsBE
= enable
;
1722 //////////////////////////////////////////////////////////////////////////
1723 /// @brief Mark end of frame - used for performance profiling
1724 /// @param hContext - Handle passed back from SwrCreateContext
1725 void SWR_API
SwrEndFrame(HANDLE hContext
)
1727 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1728 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1729 (void)pDC
; // var used
1731 RDTSC_ENDFRAME(pContext
->pBucketMgr
);
1732 AR_API_EVENT(FrameEndEvent(pContext
->frameCount
, pDC
->drawId
));
1734 pContext
->frameCount
++;
1737 void InitSimLoadTilesTable();
1738 void InitSimStoreTilesTable();
1739 void InitSimClearTilesTable();
1741 void InitClearTilesTable();
1742 void InitBackendFuncTables();
1744 //////////////////////////////////////////////////////////////////////////
1745 /// @brief Initialize swr backend and memory internal tables
1748 InitClearTilesTable();
1749 InitBackendFuncTables();
1750 InitRasterizerFunctions();
1753 void SwrGetInterface(SWR_INTERFACE
& out_funcs
)
1755 out_funcs
.pfnSwrCreateContext
= SwrCreateContext
;
1756 out_funcs
.pfnSwrDestroyContext
= SwrDestroyContext
;
1757 out_funcs
.pfnSwrBindApiThread
= SwrBindApiThread
;
1758 out_funcs
.pfnSwrSaveState
= SwrSaveState
;
1759 out_funcs
.pfnSwrRestoreState
= SwrRestoreState
;
1760 out_funcs
.pfnSwrSync
= SwrSync
;
1761 out_funcs
.pfnSwrStallBE
= SwrStallBE
;
1762 out_funcs
.pfnSwrWaitForIdle
= SwrWaitForIdle
;
1763 out_funcs
.pfnSwrWaitForIdleFE
= SwrWaitForIdleFE
;
1764 out_funcs
.pfnSwrSetVertexBuffers
= SwrSetVertexBuffers
;
1765 out_funcs
.pfnSwrSetIndexBuffer
= SwrSetIndexBuffer
;
1766 out_funcs
.pfnSwrSetFetchFunc
= SwrSetFetchFunc
;
1767 out_funcs
.pfnSwrSetSoFunc
= SwrSetSoFunc
;
1768 out_funcs
.pfnSwrSetSoState
= SwrSetSoState
;
1769 out_funcs
.pfnSwrSetSoBuffers
= SwrSetSoBuffers
;
1770 out_funcs
.pfnSwrSetVertexFunc
= SwrSetVertexFunc
;
1771 out_funcs
.pfnSwrSetFrontendState
= SwrSetFrontendState
;
1772 out_funcs
.pfnSwrSetGsState
= SwrSetGsState
;
1773 out_funcs
.pfnSwrSetGsFunc
= SwrSetGsFunc
;
1774 out_funcs
.pfnSwrSetCsFunc
= SwrSetCsFunc
;
1775 out_funcs
.pfnSwrSetTsState
= SwrSetTsState
;
1776 out_funcs
.pfnSwrSetHsFunc
= SwrSetHsFunc
;
1777 out_funcs
.pfnSwrSetDsFunc
= SwrSetDsFunc
;
1778 out_funcs
.pfnSwrSetDepthStencilState
= SwrSetDepthStencilState
;
1779 out_funcs
.pfnSwrSetBackendState
= SwrSetBackendState
;
1780 out_funcs
.pfnSwrSetDepthBoundsState
= SwrSetDepthBoundsState
;
1781 out_funcs
.pfnSwrSetPixelShaderState
= SwrSetPixelShaderState
;
1782 out_funcs
.pfnSwrSetBlendState
= SwrSetBlendState
;
1783 out_funcs
.pfnSwrSetBlendFunc
= SwrSetBlendFunc
;
1784 out_funcs
.pfnSwrDraw
= SwrDraw
;
1785 out_funcs
.pfnSwrDrawInstanced
= SwrDrawInstanced
;
1786 out_funcs
.pfnSwrDrawIndexed
= SwrDrawIndexed
;
1787 out_funcs
.pfnSwrDrawIndexedInstanced
= SwrDrawIndexedInstanced
;
1788 out_funcs
.pfnSwrInvalidateTiles
= SwrInvalidateTiles
;
1789 out_funcs
.pfnSwrDiscardRect
= SwrDiscardRect
;
1790 out_funcs
.pfnSwrDispatch
= SwrDispatch
;
1791 out_funcs
.pfnSwrStoreTiles
= SwrStoreTiles
;
1792 out_funcs
.pfnSwrClearRenderTarget
= SwrClearRenderTarget
;
1793 out_funcs
.pfnSwrSetRastState
= SwrSetRastState
;
1794 out_funcs
.pfnSwrSetViewports
= SwrSetViewports
;
1795 out_funcs
.pfnSwrSetScissorRects
= SwrSetScissorRects
;
1796 out_funcs
.pfnSwrGetPrivateContextState
= SwrGetPrivateContextState
;
1797 out_funcs
.pfnSwrAllocDrawContextMemory
= SwrAllocDrawContextMemory
;
1798 out_funcs
.pfnSwrEnableStatsFE
= SwrEnableStatsFE
;
1799 out_funcs
.pfnSwrEnableStatsBE
= SwrEnableStatsBE
;
1800 out_funcs
.pfnSwrEndFrame
= SwrEndFrame
;
1801 out_funcs
.pfnSwrInit
= SwrInit
;