1 /****************************************************************************
2 * Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief API implementation
27 ******************************************************************************/
35 #include "core/backend.h"
36 #include "core/context.h"
37 #include "core/depthstencil.h"
38 #include "core/frontend.h"
39 #include "core/rasterizer.h"
40 #include "core/rdtsc_core.h"
41 #include "core/threads.h"
42 #include "core/tilemgr.h"
43 #include "core/clip.h"
44 #include "core/utils.h"
46 #include "common/os.h"
48 static const SWR_RECT g_MaxScissorRect
= { 0, 0, KNOB_MAX_SCISSOR_X
, KNOB_MAX_SCISSOR_Y
};
50 void SetupDefaultState(SWR_CONTEXT
*pContext
);
52 static INLINE SWR_CONTEXT
* GetContext(HANDLE hContext
)
54 return (SWR_CONTEXT
*)hContext
;
57 void WakeAllThreads(SWR_CONTEXT
*pContext
)
59 pContext
->FifosNotEmpty
.notify_all();
62 //////////////////////////////////////////////////////////////////////////
63 /// @brief Create SWR Context.
64 /// @param pCreateInfo - pointer to creation info.
65 HANDLE
SwrCreateContext(
66 SWR_CREATECONTEXT_INFO
* pCreateInfo
)
71 void* pContextMem
= AlignedMalloc(sizeof(SWR_CONTEXT
), KNOB_SIMD_WIDTH
* 4);
72 memset(pContextMem
, 0, sizeof(SWR_CONTEXT
));
73 SWR_CONTEXT
*pContext
= new (pContextMem
) SWR_CONTEXT();
75 pContext
->privateStateSize
= pCreateInfo
->privateStateSize
;
77 pContext
->MAX_DRAWS_IN_FLIGHT
= KNOB_MAX_DRAWS_IN_FLIGHT
;
78 if (pCreateInfo
->MAX_DRAWS_IN_FLIGHT
!= 0)
80 pContext
->MAX_DRAWS_IN_FLIGHT
= pCreateInfo
->MAX_DRAWS_IN_FLIGHT
;
83 pContext
->dcRing
.Init(pContext
->MAX_DRAWS_IN_FLIGHT
);
84 pContext
->dsRing
.Init(pContext
->MAX_DRAWS_IN_FLIGHT
);
86 pContext
->pMacroTileManagerArray
= (MacroTileMgr
*)AlignedMalloc(sizeof(MacroTileMgr
) * pContext
->MAX_DRAWS_IN_FLIGHT
, 64);
87 pContext
->pDispatchQueueArray
= (DispatchQueue
*)AlignedMalloc(sizeof(DispatchQueue
) * pContext
->MAX_DRAWS_IN_FLIGHT
, 64);
89 for (uint32_t dc
= 0; dc
< pContext
->MAX_DRAWS_IN_FLIGHT
; ++dc
)
91 pContext
->dcRing
[dc
].pArena
= new CachingArena(pContext
->cachingArenaAllocator
);
92 new (&pContext
->pMacroTileManagerArray
[dc
]) MacroTileMgr(*pContext
->dcRing
[dc
].pArena
);
93 new (&pContext
->pDispatchQueueArray
[dc
]) DispatchQueue();
95 pContext
->dsRing
[dc
].pArena
= new CachingArena(pContext
->cachingArenaAllocator
);
98 if (pCreateInfo
->pThreadInfo
)
100 pContext
->threadInfo
= *pCreateInfo
->pThreadInfo
;
104 pContext
->threadInfo
.MAX_WORKER_THREADS
= KNOB_MAX_WORKER_THREADS
;
105 pContext
->threadInfo
.BASE_NUMA_NODE
= KNOB_BASE_NUMA_NODE
;
106 pContext
->threadInfo
.BASE_CORE
= KNOB_BASE_CORE
;
107 pContext
->threadInfo
.BASE_THREAD
= KNOB_BASE_THREAD
;
108 pContext
->threadInfo
.MAX_NUMA_NODES
= KNOB_MAX_NUMA_NODES
;
109 pContext
->threadInfo
.MAX_CORES_PER_NUMA_NODE
= KNOB_MAX_CORES_PER_NUMA_NODE
;
110 pContext
->threadInfo
.MAX_THREADS_PER_CORE
= KNOB_MAX_THREADS_PER_CORE
;
111 pContext
->threadInfo
.SINGLE_THREADED
= KNOB_SINGLE_THREADED
;
114 if (pCreateInfo
->pApiThreadInfo
)
116 pContext
->apiThreadInfo
= *pCreateInfo
->pApiThreadInfo
;
120 pContext
->apiThreadInfo
.bindAPIThread0
= true;
121 pContext
->apiThreadInfo
.numAPIReservedThreads
= 1;
122 pContext
->apiThreadInfo
.numAPIThreadsPerCore
= 1;
125 memset(&pContext
->WaitLock
, 0, sizeof(pContext
->WaitLock
));
126 memset(&pContext
->FifosNotEmpty
, 0, sizeof(pContext
->FifosNotEmpty
));
127 new (&pContext
->WaitLock
) std::mutex();
128 new (&pContext
->FifosNotEmpty
) std::condition_variable();
130 CreateThreadPool(pContext
, &pContext
->threadPool
);
132 if (pContext
->apiThreadInfo
.bindAPIThread0
)
134 BindApiThread(pContext
, 0);
137 pContext
->ppScratch
= new uint8_t*[pContext
->NumWorkerThreads
];
138 pContext
->pStats
= (SWR_STATS
*)AlignedMalloc(sizeof(SWR_STATS
) * pContext
->NumWorkerThreads
, 64);
140 #if defined(KNOB_ENABLE_AR)
141 // Setup ArchRast thread contexts which includes +1 for API thread.
142 pContext
->pArContext
= new HANDLE
[pContext
->NumWorkerThreads
+1];
143 pContext
->pArContext
[pContext
->NumWorkerThreads
] = ArchRast::CreateThreadContext(ArchRast::AR_THREAD::API
);
146 // Allocate scratch space for workers.
147 ///@note We could lazily allocate this but its rather small amount of memory.
148 for (uint32_t i
= 0; i
< pContext
->NumWorkerThreads
; ++i
)
151 uint32_t numaNode
= pContext
->threadPool
.pThreadData
?
152 pContext
->threadPool
.pThreadData
[i
].numaId
: 0;
153 pContext
->ppScratch
[i
] = (uint8_t*)VirtualAllocExNuma(
154 GetCurrentProcess(), nullptr, 32 * sizeof(KILOBYTE
),
155 MEM_RESERVE
| MEM_COMMIT
, PAGE_READWRITE
,
158 pContext
->ppScratch
[i
] = (uint8_t*)AlignedMalloc(32 * sizeof(KILOBYTE
), KNOB_SIMD_WIDTH
* 4);
161 #if defined(KNOB_ENABLE_AR)
162 // Initialize worker thread context for ArchRast.
163 pContext
->pArContext
[i
] = ArchRast::CreateThreadContext(ArchRast::AR_THREAD::WORKER
);
167 #if defined(KNOB_ENABLE_AR)
168 // cache the API thread event manager, for use with sim layer
169 pCreateInfo
->hArEventManager
= pContext
->pArContext
[pContext
->NumWorkerThreads
+ 1];
172 // State setup AFTER context is fully initialized
173 SetupDefaultState(pContext
);
175 // initialize hot tile manager
176 pContext
->pHotTileMgr
= new HotTileMgr();
178 // initialize callback functions
179 pContext
->pfnLoadTile
= pCreateInfo
->pfnLoadTile
;
180 pContext
->pfnStoreTile
= pCreateInfo
->pfnStoreTile
;
181 pContext
->pfnClearTile
= pCreateInfo
->pfnClearTile
;
182 pContext
->pfnUpdateSoWriteOffset
= pCreateInfo
->pfnUpdateSoWriteOffset
;
183 pContext
->pfnUpdateStats
= pCreateInfo
->pfnUpdateStats
;
184 pContext
->pfnUpdateStatsFE
= pCreateInfo
->pfnUpdateStatsFE
;
187 // pass pointer to bucket manager back to caller
188 #ifdef KNOB_ENABLE_RDTSC
189 pCreateInfo
->pBucketMgr
= &gBucketMgr
;
192 pCreateInfo
->contextSaveSize
= sizeof(API_STATE
);
194 StartThreadPool(pContext
, &pContext
->threadPool
);
196 return (HANDLE
)pContext
;
199 void CopyState(DRAW_STATE
& dst
, const DRAW_STATE
& src
)
201 memcpy(&dst
.state
, &src
.state
, sizeof(API_STATE
));
204 template<bool IsDraw
>
205 void QueueWork(SWR_CONTEXT
*pContext
)
207 DRAW_CONTEXT
* pDC
= pContext
->pCurDrawContext
;
208 uint32_t dcIndex
= pDC
->drawId
% pContext
->MAX_DRAWS_IN_FLIGHT
;
212 pDC
->pTileMgr
= &pContext
->pMacroTileManagerArray
[dcIndex
];
213 pDC
->pTileMgr
->initialize();
216 // Each worker thread looks at a DC for both FE and BE work at different times and so we
217 // multiply threadDone by 2. When the threadDone counter has reached 0 then all workers
218 // have moved past this DC. (i.e. Each worker has checked this DC for both FE and BE work and
219 // then moved on if all work is done.)
220 pContext
->pCurDrawContext
->threadsDone
= pContext
->NumFEThreads
+ pContext
->NumBEThreads
;
224 InterlockedIncrement(&pContext
->drawsOutstandingFE
);
229 std::unique_lock
<std::mutex
> lock(pContext
->WaitLock
);
230 pContext
->dcRing
.Enqueue();
233 if (pContext
->threadInfo
.SINGLE_THREADED
)
235 // flush denormals to 0
236 uint32_t mxcsr
= _mm_getcsr();
237 _mm_setcsr(mxcsr
| _MM_FLUSH_ZERO_ON
| _MM_DENORMALS_ZERO_ON
);
241 uint32_t curDraw
[2] = { pContext
->pCurDrawContext
->drawId
, pContext
->pCurDrawContext
->drawId
};
242 WorkOnFifoFE(pContext
, 0, curDraw
[0]);
243 WorkOnFifoBE(pContext
, 0, curDraw
[1], pContext
->singleThreadLockedTiles
, 0, 0);
247 uint32_t curDispatch
= pContext
->pCurDrawContext
->drawId
;
248 WorkOnCompute(pContext
, 0, curDispatch
);
251 // Dequeue the work here, if not already done, since we're single threaded (i.e. no workers).
252 while (CompleteDrawContext(pContext
, pContext
->pCurDrawContext
) > 0) {}
259 RDTSC_BEGIN(APIDrawWakeAllThreads
, pDC
->drawId
);
260 WakeAllThreads(pContext
);
261 RDTSC_END(APIDrawWakeAllThreads
, 1);
264 // Set current draw context to NULL so that next state call forces a new draw context to be created and populated.
265 pContext
->pPrevDrawContext
= pContext
->pCurDrawContext
;
266 pContext
->pCurDrawContext
= nullptr;
269 INLINE
void QueueDraw(SWR_CONTEXT
* pContext
)
271 QueueWork
<true>(pContext
);
274 INLINE
void QueueDispatch(SWR_CONTEXT
* pContext
)
276 QueueWork
<false>(pContext
);
279 DRAW_CONTEXT
* GetDrawContext(SWR_CONTEXT
*pContext
, bool isSplitDraw
= false)
281 RDTSC_BEGIN(APIGetDrawContext
, 0);
282 // If current draw context is null then need to obtain a new draw context to use from ring.
283 if (pContext
->pCurDrawContext
== nullptr)
285 // Need to wait for a free entry.
286 while (pContext
->dcRing
.IsFull())
291 uint64_t curDraw
= pContext
->dcRing
.GetHead();
292 uint32_t dcIndex
= curDraw
% pContext
->MAX_DRAWS_IN_FLIGHT
;
294 if ((pContext
->frameCount
- pContext
->lastFrameChecked
) > 2 ||
295 (curDraw
- pContext
->lastDrawChecked
) > 0x10000)
297 // Take this opportunity to clean-up old arena allocations
298 pContext
->cachingArenaAllocator
.FreeOldBlocks();
300 pContext
->lastFrameChecked
= pContext
->frameCount
;
301 pContext
->lastDrawChecked
= curDraw
;
304 DRAW_CONTEXT
* pCurDrawContext
= &pContext
->dcRing
[dcIndex
];
305 pContext
->pCurDrawContext
= pCurDrawContext
;
307 // Assign next available entry in DS ring to this DC.
308 uint32_t dsIndex
= pContext
->curStateId
% pContext
->MAX_DRAWS_IN_FLIGHT
;
309 pCurDrawContext
->pState
= &pContext
->dsRing
[dsIndex
];
311 // Copy previous state to current state.
312 if (pContext
->pPrevDrawContext
)
314 DRAW_CONTEXT
* pPrevDrawContext
= pContext
->pPrevDrawContext
;
316 // If we're splitting our draw then we can just use the same state from the previous
317 // draw. In this case, we won't increment the DS ring index so the next non-split
318 // draw can receive the state.
319 if (isSplitDraw
== false)
321 CopyState(*pCurDrawContext
->pState
, *pPrevDrawContext
->pState
);
323 // Should have been cleaned up previously
324 SWR_ASSERT(pCurDrawContext
->pState
->pArena
->IsEmpty() == true);
326 pCurDrawContext
->pState
->pPrivateState
= nullptr;
328 pContext
->curStateId
++; // Progress state ring index forward.
332 // If its a split draw then just copy the state pointer over
333 // since its the same draw.
334 pCurDrawContext
->pState
= pPrevDrawContext
->pState
;
335 SWR_ASSERT(pPrevDrawContext
->cleanupState
== false);
340 SWR_ASSERT(pCurDrawContext
->pState
->pArena
->IsEmpty() == true);
341 pContext
->curStateId
++; // Progress state ring index forward.
344 SWR_ASSERT(pCurDrawContext
->pArena
->IsEmpty() == true);
347 pCurDrawContext
->dependent
= false;
348 pCurDrawContext
->dependentFE
= false;
350 pCurDrawContext
->pContext
= pContext
;
351 pCurDrawContext
->isCompute
= false; // Dispatch has to set this to true.
353 pCurDrawContext
->doneFE
= false;
354 pCurDrawContext
->FeLock
= 0;
355 pCurDrawContext
->threadsDone
= 0;
356 pCurDrawContext
->retireCallback
.pfnCallbackFunc
= nullptr;
358 pCurDrawContext
->dynState
.Reset(pContext
->NumWorkerThreads
);
360 // Assign unique drawId for this DC
361 pCurDrawContext
->drawId
= pContext
->dcRing
.GetHead();
363 pCurDrawContext
->cleanupState
= true;
367 SWR_ASSERT(isSplitDraw
== false, "Split draw should only be used when obtaining a new DC");
370 RDTSC_END(APIGetDrawContext
, 0);
371 return pContext
->pCurDrawContext
;
374 API_STATE
* GetDrawState(SWR_CONTEXT
*pContext
)
376 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
377 SWR_ASSERT(pDC
->pState
!= nullptr);
379 return &pDC
->pState
->state
;
382 void SwrDestroyContext(HANDLE hContext
)
384 SWR_CONTEXT
*pContext
= GetContext(hContext
);
385 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
387 pDC
->FeWork
.type
= SHUTDOWN
;
388 pDC
->FeWork
.pfnWork
= ProcessShutdown
;
393 DestroyThreadPool(pContext
, &pContext
->threadPool
);
396 for (uint32_t i
= 0; i
< pContext
->MAX_DRAWS_IN_FLIGHT
; ++i
)
398 AlignedFree(pContext
->dcRing
[i
].dynState
.pStats
);
399 delete pContext
->dcRing
[i
].pArena
;
400 delete pContext
->dsRing
[i
].pArena
;
401 pContext
->pMacroTileManagerArray
[i
].~MacroTileMgr();
402 pContext
->pDispatchQueueArray
[i
].~DispatchQueue();
405 AlignedFree(pContext
->pDispatchQueueArray
);
406 AlignedFree(pContext
->pMacroTileManagerArray
);
408 // Free scratch space.
409 for (uint32_t i
= 0; i
< pContext
->NumWorkerThreads
; ++i
)
412 VirtualFree(pContext
->ppScratch
[i
], 0, MEM_RELEASE
);
414 AlignedFree(pContext
->ppScratch
[i
]);
417 #if defined(KNOB_ENABLE_AR)
418 ArchRast::DestroyThreadContext(pContext
->pArContext
[i
]);
422 delete[] pContext
->ppScratch
;
423 AlignedFree(pContext
->pStats
);
425 delete(pContext
->pHotTileMgr
);
427 pContext
->~SWR_CONTEXT();
428 AlignedFree(GetContext(hContext
));
431 void SwrBindApiThread(HANDLE hContext
, uint32_t apiThreadId
)
433 SWR_CONTEXT
*pContext
= GetContext(hContext
);
434 BindApiThread(pContext
, apiThreadId
);
437 void SWR_API
SwrSaveState(
439 void* pOutputStateBlock
,
442 SWR_CONTEXT
*pContext
= GetContext(hContext
);
443 auto pSrc
= GetDrawState(pContext
);
444 SWR_ASSERT(pOutputStateBlock
&& memSize
>= sizeof(*pSrc
));
446 memcpy(pOutputStateBlock
, pSrc
, sizeof(*pSrc
));
449 void SWR_API
SwrRestoreState(
451 const void* pStateBlock
,
454 SWR_CONTEXT
*pContext
= GetContext(hContext
);
455 auto pDst
= GetDrawState(pContext
);
456 SWR_ASSERT(pStateBlock
&& memSize
>= sizeof(*pDst
));
458 memcpy(pDst
, pStateBlock
, sizeof(*pDst
));
461 void SetupDefaultState(SWR_CONTEXT
*pContext
)
463 API_STATE
* pState
= GetDrawState(pContext
);
465 pState
->rastState
.cullMode
= SWR_CULLMODE_NONE
;
466 pState
->rastState
.frontWinding
= SWR_FRONTWINDING_CCW
;
468 pState
->depthBoundsState
.depthBoundsTestEnable
= false;
469 pState
->depthBoundsState
.depthBoundsTestMinValue
= 0.0f
;
470 pState
->depthBoundsState
.depthBoundsTestMaxValue
= 1.0f
;
473 void SwrSync(HANDLE hContext
, PFN_CALLBACK_FUNC pfnFunc
, uint64_t userData
, uint64_t userData2
, uint64_t userData3
)
475 SWR_ASSERT(pfnFunc
!= nullptr);
477 SWR_CONTEXT
*pContext
= GetContext(hContext
);
478 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
480 RDTSC_BEGIN(APISync
, 0);
482 pDC
->FeWork
.type
= SYNC
;
483 pDC
->FeWork
.pfnWork
= ProcessSync
;
485 // Setup callback function
486 pDC
->retireCallback
.pfnCallbackFunc
= pfnFunc
;
487 pDC
->retireCallback
.userData
= userData
;
488 pDC
->retireCallback
.userData2
= userData2
;
489 pDC
->retireCallback
.userData3
= userData3
;
491 AR_API_EVENT(SwrSyncEvent(pDC
->drawId
));
496 RDTSC_END(APISync
, 1);
499 void SwrStallBE(HANDLE hContext
)
501 SWR_CONTEXT
* pContext
= GetContext(hContext
);
502 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
504 pDC
->dependent
= true;
507 void SwrWaitForIdle(HANDLE hContext
)
509 SWR_CONTEXT
*pContext
= GetContext(hContext
);
511 RDTSC_BEGIN(APIWaitForIdle
, 0);
513 while (!pContext
->dcRing
.IsEmpty())
518 RDTSC_END(APIWaitForIdle
, 1);
521 void SwrWaitForIdleFE(HANDLE hContext
)
523 SWR_CONTEXT
*pContext
= GetContext(hContext
);
525 RDTSC_BEGIN(APIWaitForIdle
, 0);
527 while (pContext
->drawsOutstandingFE
> 0)
532 RDTSC_END(APIWaitForIdle
, 1);
535 void SwrSetVertexBuffers(
538 const SWR_VERTEX_BUFFER_STATE
* pVertexBuffers
)
540 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
542 for (uint32_t i
= 0; i
< numBuffers
; ++i
)
544 const SWR_VERTEX_BUFFER_STATE
*pVB
= &pVertexBuffers
[i
];
545 pState
->vertexBuffers
[pVB
->index
] = *pVB
;
549 void SwrSetIndexBuffer(
551 const SWR_INDEX_BUFFER_STATE
* pIndexBuffer
)
553 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
555 pState
->indexBuffer
= *pIndexBuffer
;
558 void SwrSetFetchFunc(
560 PFN_FETCH_FUNC pfnFetchFunc
)
562 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
564 pState
->pfnFetchFunc
= pfnFetchFunc
;
569 PFN_SO_FUNC pfnSoFunc
,
570 uint32_t streamIndex
)
572 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
574 SWR_ASSERT(streamIndex
< MAX_SO_STREAMS
);
576 pState
->pfnSoFunc
[streamIndex
] = pfnSoFunc
;
581 SWR_STREAMOUT_STATE
* pSoState
)
583 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
585 pState
->soState
= *pSoState
;
588 void SwrSetSoBuffers(
590 SWR_STREAMOUT_BUFFER
* pSoBuffer
,
593 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
595 SWR_ASSERT((slot
< 4), "There are only 4 SO buffer slots [0, 3]\nSlot requested: %d", slot
);
597 pState
->soBuffer
[slot
] = *pSoBuffer
;
600 void SwrSetVertexFunc(
602 PFN_VERTEX_FUNC pfnVertexFunc
)
604 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
606 pState
->pfnVertexFunc
= pfnVertexFunc
;
609 void SwrSetFrontendState(
611 SWR_FRONTEND_STATE
*pFEState
)
613 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
614 pState
->frontendState
= *pFEState
;
619 SWR_GS_STATE
*pGSState
)
621 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
622 pState
->gsState
= *pGSState
;
627 PFN_GS_FUNC pfnGsFunc
)
629 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
630 pState
->pfnGsFunc
= pfnGsFunc
;
635 PFN_CS_FUNC pfnCsFunc
,
636 uint32_t totalThreadsInGroup
,
637 uint32_t totalSpillFillSize
,
638 uint32_t scratchSpaceSizePerInstance
,
639 uint32_t numInstances
)
641 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
642 pState
->pfnCsFunc
= pfnCsFunc
;
643 pState
->totalThreadsInGroup
= totalThreadsInGroup
;
644 pState
->totalSpillFillSize
= totalSpillFillSize
;
645 pState
->scratchSpaceSize
= scratchSpaceSizePerInstance
;
646 pState
->scratchSpaceNumInstances
= numInstances
;
651 SWR_TS_STATE
*pState
)
653 API_STATE
* pApiState
= GetDrawState(GetContext(hContext
));
654 pApiState
->tsState
= *pState
;
661 API_STATE
* pApiState
= GetDrawState(GetContext(hContext
));
662 pApiState
->pfnHsFunc
= pfnFunc
;
669 API_STATE
* pApiState
= GetDrawState(GetContext(hContext
));
670 pApiState
->pfnDsFunc
= pfnFunc
;
673 void SwrSetDepthStencilState(
675 SWR_DEPTH_STENCIL_STATE
*pDSState
)
677 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
679 pState
->depthStencilState
= *pDSState
;
682 void SwrSetBackendState(
684 SWR_BACKEND_STATE
*pBEState
)
686 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
688 pState
->backendState
= *pBEState
;
691 void SwrSetDepthBoundsState(
693 SWR_DEPTH_BOUNDS_STATE
*pDBState
)
695 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
697 pState
->depthBoundsState
= *pDBState
;
700 void SwrSetPixelShaderState(
702 SWR_PS_STATE
*pPSState
)
704 API_STATE
*pState
= GetDrawState(GetContext(hContext
));
705 pState
->psState
= *pPSState
;
708 void SwrSetBlendState(
710 SWR_BLEND_STATE
*pBlendState
)
712 API_STATE
*pState
= GetDrawState(GetContext(hContext
));
713 memcpy(&pState
->blendState
, pBlendState
, sizeof(SWR_BLEND_STATE
));
716 void SwrSetBlendFunc(
718 uint32_t renderTarget
,
719 PFN_BLEND_JIT_FUNC pfnBlendFunc
)
721 SWR_ASSERT(renderTarget
< SWR_NUM_RENDERTARGETS
);
722 API_STATE
*pState
= GetDrawState(GetContext(hContext
));
723 pState
->pfnBlendFunc
[renderTarget
] = pfnBlendFunc
;
726 // update guardband multipliers for the viewport
727 void updateGuardbands(API_STATE
*pState
)
729 uint32_t numGbs
= pState
->backendState
.readViewportArrayIndex
? KNOB_NUM_VIEWPORTS_SCISSORS
: 1;
731 for(uint32_t i
= 0; i
< numGbs
; ++i
)
733 // guardband center is viewport center
734 pState
->gbState
.left
[i
] = KNOB_GUARDBAND_WIDTH
/ pState
->vp
[i
].width
;
735 pState
->gbState
.right
[i
] = KNOB_GUARDBAND_WIDTH
/ pState
->vp
[i
].width
;
736 pState
->gbState
.top
[i
] = KNOB_GUARDBAND_HEIGHT
/ pState
->vp
[i
].height
;
737 pState
->gbState
.bottom
[i
] = KNOB_GUARDBAND_HEIGHT
/ pState
->vp
[i
].height
;
741 void SwrSetRastState(
743 const SWR_RASTSTATE
*pRastState
)
745 SWR_CONTEXT
*pContext
= GetContext(hContext
);
746 API_STATE
* pState
= GetDrawState(pContext
);
748 memcpy(&pState
->rastState
, pRastState
, sizeof(SWR_RASTSTATE
));
751 void SwrSetViewports(
753 uint32_t numViewports
,
754 const SWR_VIEWPORT
* pViewports
,
755 const SWR_VIEWPORT_MATRICES
* pMatrices
)
757 SWR_ASSERT(numViewports
<= KNOB_NUM_VIEWPORTS_SCISSORS
,
758 "Invalid number of viewports.");
760 SWR_CONTEXT
*pContext
= GetContext(hContext
);
761 API_STATE
* pState
= GetDrawState(pContext
);
763 memcpy(&pState
->vp
[0], pViewports
, sizeof(SWR_VIEWPORT
) * numViewports
);
764 // @todo Faster to copy portions of the SOA or just copy all of it?
765 memcpy(&pState
->vpMatrices
, pMatrices
, sizeof(SWR_VIEWPORT_MATRICES
));
767 updateGuardbands(pState
);
770 void SwrSetScissorRects(
772 uint32_t numScissors
,
773 const SWR_RECT
* pScissors
)
775 SWR_ASSERT(numScissors
<= KNOB_NUM_VIEWPORTS_SCISSORS
,
776 "Invalid number of scissor rects.");
778 API_STATE
* pState
= GetDrawState(GetContext(hContext
));
779 memcpy(&pState
->scissorRects
[0], pScissors
, numScissors
* sizeof(pScissors
[0]));
782 void SetupMacroTileScissors(DRAW_CONTEXT
*pDC
)
784 API_STATE
*pState
= &pDC
->pState
->state
;
785 uint32_t numScissors
= pState
->backendState
.readViewportArrayIndex
? KNOB_NUM_VIEWPORTS_SCISSORS
: 1;
786 pState
->scissorsTileAligned
= true;
788 for (uint32_t index
= 0; index
< numScissors
; ++index
)
790 SWR_RECT
&scissorInFixedPoint
= pState
->scissorsInFixedPoint
[index
];
792 // Set up scissor dimensions based on scissor or viewport
793 if (pState
->rastState
.scissorEnable
)
795 scissorInFixedPoint
= pState
->scissorRects
[index
];
799 // the vp width and height must be added to origin un-rounded then the result round to -inf.
800 // The cast to int works for rounding assuming all [left, right, top, bottom] are positive.
801 scissorInFixedPoint
.xmin
= (int32_t)pState
->vp
[index
].x
;
802 scissorInFixedPoint
.xmax
= (int32_t)(pState
->vp
[index
].x
+ pState
->vp
[index
].width
);
803 scissorInFixedPoint
.ymin
= (int32_t)pState
->vp
[index
].y
;
804 scissorInFixedPoint
.ymax
= (int32_t)(pState
->vp
[index
].y
+ pState
->vp
[index
].height
);
808 scissorInFixedPoint
&= g_MaxScissorRect
;
810 // Test for tile alignment
812 tileAligned
= (scissorInFixedPoint
.xmin
% KNOB_TILE_X_DIM
) == 0;
813 tileAligned
&= (scissorInFixedPoint
.ymin
% KNOB_TILE_Y_DIM
) == 0;
814 tileAligned
&= (scissorInFixedPoint
.xmax
% KNOB_TILE_X_DIM
) == 0;
815 tileAligned
&= (scissorInFixedPoint
.ymax
% KNOB_TILE_Y_DIM
) == 0;
817 pState
->scissorsTileAligned
&= tileAligned
;
819 // Scale to fixed point
820 scissorInFixedPoint
.xmin
*= FIXED_POINT_SCALE
;
821 scissorInFixedPoint
.xmax
*= FIXED_POINT_SCALE
;
822 scissorInFixedPoint
.ymin
*= FIXED_POINT_SCALE
;
823 scissorInFixedPoint
.ymax
*= FIXED_POINT_SCALE
;
825 // Make scissor inclusive
826 scissorInFixedPoint
.xmax
-= 1;
827 scissorInFixedPoint
.ymax
-= 1;
832 // templated backend function tables
834 void SetupPipeline(DRAW_CONTEXT
*pDC
)
836 DRAW_STATE
* pState
= pDC
->pState
;
837 const SWR_RASTSTATE
&rastState
= pState
->state
.rastState
;
838 const SWR_PS_STATE
&psState
= pState
->state
.psState
;
839 BACKEND_FUNCS
& backendFuncs
= pState
->backendFuncs
;
842 if (psState
.pfnPixelShader
== nullptr)
844 backendFuncs
.pfnBackend
= gBackendNullPs
[pState
->state
.rastState
.sampleCount
];
848 const uint32_t forcedSampleCount
= (rastState
.forcedSampleCount
) ? 1 : 0;
849 const bool bMultisampleEnable
= ((rastState
.sampleCount
> SWR_MULTISAMPLE_1X
) || forcedSampleCount
) ? 1 : 0;
850 const uint32_t centroid
= ((psState
.barycentricsMask
& SWR_BARYCENTRIC_CENTROID_MASK
) > 0) ? 1 : 0;
851 const uint32_t canEarlyZ
= (psState
.forceEarlyZ
|| (!psState
.writesODepth
&& !psState
.usesUAV
)) ? 1 : 0;
852 SWR_BARYCENTRICS_MASK barycentricsMask
= (SWR_BARYCENTRICS_MASK
)psState
.barycentricsMask
;
854 // select backend function
855 switch(psState
.shadingRate
)
857 case SWR_SHADING_RATE_PIXEL
:
858 if(bMultisampleEnable
)
860 // always need to generate I & J per sample for Z interpolation
861 barycentricsMask
= (SWR_BARYCENTRICS_MASK
)(barycentricsMask
| SWR_BARYCENTRIC_PER_SAMPLE_MASK
);
862 backendFuncs
.pfnBackend
= gBackendPixelRateTable
[rastState
.sampleCount
][rastState
.bIsCenterPattern
][psState
.inputCoverage
]
863 [centroid
][forcedSampleCount
][canEarlyZ
]
868 // always need to generate I & J per pixel for Z interpolation
869 barycentricsMask
= (SWR_BARYCENTRICS_MASK
)(barycentricsMask
| SWR_BARYCENTRIC_PER_PIXEL_MASK
);
870 backendFuncs
.pfnBackend
= gBackendSingleSample
[psState
.inputCoverage
][centroid
][canEarlyZ
];
873 case SWR_SHADING_RATE_SAMPLE
:
874 SWR_ASSERT(rastState
.bIsCenterPattern
!= true);
875 // always need to generate I & J per sample for Z interpolation
876 barycentricsMask
= (SWR_BARYCENTRICS_MASK
)(barycentricsMask
| SWR_BARYCENTRIC_PER_SAMPLE_MASK
);
877 backendFuncs
.pfnBackend
= gBackendSampleRateTable
[rastState
.sampleCount
][psState
.inputCoverage
][centroid
][canEarlyZ
];
880 SWR_ASSERT(0 && "Invalid shading rate");
885 SWR_ASSERT(backendFuncs
.pfnBackend
);
887 PFN_PROCESS_PRIMS pfnBinner
;
888 #if USE_SIMD16_FRONTEND
889 PFN_PROCESS_PRIMS_SIMD16 pfnBinner_simd16
;
891 switch (pState
->state
.topology
)
894 pState
->pfnProcessPrims
= ClipPoints
;
895 pfnBinner
= BinPoints
;
896 #if USE_SIMD16_FRONTEND
897 pState
->pfnProcessPrims_simd16
= ClipPoints_simd16
;
898 pfnBinner_simd16
= BinPoints_simd16
;
904 case TOP_LINE_LIST_ADJ
:
905 case TOP_LISTSTRIP_ADJ
:
906 pState
->pfnProcessPrims
= ClipLines
;
907 pfnBinner
= BinLines
;
908 #if USE_SIMD16_FRONTEND
909 pState
->pfnProcessPrims_simd16
= ClipLines_simd16
;
910 pfnBinner_simd16
= BinLines_simd16
;
914 pState
->pfnProcessPrims
= ClipTriangles
;
915 pfnBinner
= GetBinTrianglesFunc((rastState
.conservativeRast
> 0));
916 #if USE_SIMD16_FRONTEND
917 pState
->pfnProcessPrims_simd16
= ClipTriangles_simd16
;
918 pfnBinner_simd16
= GetBinTrianglesFunc_simd16((rastState
.conservativeRast
> 0));
924 // disable clipper if viewport transform is disabled
925 if (pState
->state
.frontendState
.vpTransformDisable
)
927 pState
->pfnProcessPrims
= pfnBinner
;
928 #if USE_SIMD16_FRONTEND
929 pState
->pfnProcessPrims_simd16
= pfnBinner_simd16
;
933 if ((pState
->state
.psState
.pfnPixelShader
== nullptr) &&
934 (pState
->state
.depthStencilState
.depthTestEnable
== FALSE
) &&
935 (pState
->state
.depthStencilState
.depthWriteEnable
== FALSE
) &&
936 (pState
->state
.depthStencilState
.stencilTestEnable
== FALSE
) &&
937 (pState
->state
.depthStencilState
.stencilWriteEnable
== FALSE
) &&
938 (pState
->state
.backendState
.numAttributes
== 0))
940 pState
->pfnProcessPrims
= nullptr;
941 #if USE_SIMD16_FRONTEND
942 pState
->pfnProcessPrims_simd16
= nullptr;
946 if (pState
->state
.soState
.rasterizerDisable
== true)
948 pState
->pfnProcessPrims
= nullptr;
949 #if USE_SIMD16_FRONTEND
950 pState
->pfnProcessPrims_simd16
= nullptr;
955 // set up the frontend attribute count
956 pState
->state
.feNumAttributes
= 0;
957 const SWR_BACKEND_STATE
& backendState
= pState
->state
.backendState
;
958 if (backendState
.swizzleEnable
)
960 // attribute swizzling is enabled, iterate over the map and record the max attribute used
961 for (uint32_t i
= 0; i
< backendState
.numAttributes
; ++i
)
963 pState
->state
.feNumAttributes
= std::max(pState
->state
.feNumAttributes
, (uint32_t)backendState
.swizzleMap
[i
].sourceAttrib
+ 1);
968 pState
->state
.feNumAttributes
= pState
->state
.backendState
.numAttributes
;
971 if (pState
->state
.soState
.soEnable
)
973 uint32_t streamMasks
= 0;
974 for (uint32_t i
= 0; i
< 4; ++i
)
976 streamMasks
|= pState
->state
.soState
.streamMasks
[i
];
980 if (_BitScanReverse(&maxAttrib
, streamMasks
))
982 pState
->state
.feNumAttributes
= std::max(pState
->state
.feNumAttributes
, (uint32_t)(maxAttrib
+ 1));
986 // complicated logic to test for cases where we don't need backing hottile memory for a draw
987 // have to check for the special case where depth/stencil test is enabled but depthwrite is disabled.
988 pState
->state
.depthHottileEnable
= ((!(pState
->state
.depthStencilState
.depthTestEnable
&&
989 !pState
->state
.depthStencilState
.depthWriteEnable
&&
990 !pState
->state
.depthBoundsState
.depthBoundsTestEnable
&&
991 pState
->state
.depthStencilState
.depthTestFunc
== ZFUNC_ALWAYS
)) &&
992 (pState
->state
.depthStencilState
.depthTestEnable
||
993 pState
->state
.depthStencilState
.depthWriteEnable
||
994 pState
->state
.depthBoundsState
.depthBoundsTestEnable
)) ? true : false;
996 pState
->state
.stencilHottileEnable
= (((!(pState
->state
.depthStencilState
.stencilTestEnable
&&
997 !pState
->state
.depthStencilState
.stencilWriteEnable
&&
998 pState
->state
.depthStencilState
.stencilTestFunc
== ZFUNC_ALWAYS
)) ||
999 // for stencil we have to check the double sided state as well
1000 (!(pState
->state
.depthStencilState
.doubleSidedStencilTestEnable
&&
1001 !pState
->state
.depthStencilState
.stencilWriteEnable
&&
1002 pState
->state
.depthStencilState
.backfaceStencilTestFunc
== ZFUNC_ALWAYS
))) &&
1003 (pState
->state
.depthStencilState
.stencilTestEnable
||
1004 pState
->state
.depthStencilState
.stencilWriteEnable
)) ? true : false;
1007 uint32_t hotTileEnable
= pState
->state
.psState
.renderTargetMask
;
1009 // Disable hottile for surfaces with no writes
1010 if (psState
.pfnPixelShader
!= nullptr)
1013 uint32_t rtMask
= pState
->state
.psState
.renderTargetMask
;
1014 while (_BitScanForward(&rt
, rtMask
))
1016 rtMask
&= ~(1 << rt
);
1018 if (pState
->state
.blendState
.renderTarget
[rt
].writeDisableAlpha
&&
1019 pState
->state
.blendState
.renderTarget
[rt
].writeDisableRed
&&
1020 pState
->state
.blendState
.renderTarget
[rt
].writeDisableGreen
&&
1021 pState
->state
.blendState
.renderTarget
[rt
].writeDisableBlue
)
1023 hotTileEnable
&= ~(1 << rt
);
1028 pState
->state
.colorHottileEnable
= hotTileEnable
;
1031 // Setup depth quantization function
1032 if (pState
->state
.depthHottileEnable
)
1034 switch (pState
->state
.rastState
.depthFormat
)
1036 case R32_FLOAT_X8X24_TYPELESS
: pState
->state
.pfnQuantizeDepth
= QuantizeDepth
< R32_FLOAT_X8X24_TYPELESS
> ; break;
1037 case R32_FLOAT
: pState
->state
.pfnQuantizeDepth
= QuantizeDepth
< R32_FLOAT
> ; break;
1038 case R24_UNORM_X8_TYPELESS
: pState
->state
.pfnQuantizeDepth
= QuantizeDepth
< R24_UNORM_X8_TYPELESS
> ; break;
1039 case R16_UNORM
: pState
->state
.pfnQuantizeDepth
= QuantizeDepth
< R16_UNORM
> ; break;
1040 default: SWR_INVALID("Unsupported depth format for depth quantiztion.");
1041 pState
->state
.pfnQuantizeDepth
= QuantizeDepth
< R32_FLOAT
> ;
1046 // set up pass-through quantize if depth isn't enabled
1047 pState
->state
.pfnQuantizeDepth
= QuantizeDepth
< R32_FLOAT
> ;
1051 //////////////////////////////////////////////////////////////////////////
1053 /// @param pDC - Draw context to initialize for this draw.
1058 // We don't need to re-setup the scissors/pipeline state again for split draw.
1059 if (isSplitDraw
== false)
1061 SetupMacroTileScissors(pDC
);
1068 //////////////////////////////////////////////////////////////////////////
1069 /// @brief We can split the draw for certain topologies for better performance.
1070 /// @param totalVerts - Total vertices for draw
1071 /// @param topology - Topology used for draw
1072 uint32_t MaxVertsPerDraw(
1074 uint32_t totalVerts
,
1075 PRIMITIVE_TOPOLOGY topology
)
1077 API_STATE
& state
= pDC
->pState
->state
;
1079 uint32_t vertsPerDraw
= totalVerts
;
1081 if (state
.soState
.soEnable
)
1088 case TOP_POINT_LIST
:
1089 case TOP_TRIANGLE_LIST
:
1090 vertsPerDraw
= KNOB_MAX_PRIMS_PER_DRAW
;
1093 case TOP_PATCHLIST_1
:
1094 case TOP_PATCHLIST_2
:
1095 case TOP_PATCHLIST_3
:
1096 case TOP_PATCHLIST_4
:
1097 case TOP_PATCHLIST_5
:
1098 case TOP_PATCHLIST_6
:
1099 case TOP_PATCHLIST_7
:
1100 case TOP_PATCHLIST_8
:
1101 case TOP_PATCHLIST_9
:
1102 case TOP_PATCHLIST_10
:
1103 case TOP_PATCHLIST_11
:
1104 case TOP_PATCHLIST_12
:
1105 case TOP_PATCHLIST_13
:
1106 case TOP_PATCHLIST_14
:
1107 case TOP_PATCHLIST_15
:
1108 case TOP_PATCHLIST_16
:
1109 case TOP_PATCHLIST_17
:
1110 case TOP_PATCHLIST_18
:
1111 case TOP_PATCHLIST_19
:
1112 case TOP_PATCHLIST_20
:
1113 case TOP_PATCHLIST_21
:
1114 case TOP_PATCHLIST_22
:
1115 case TOP_PATCHLIST_23
:
1116 case TOP_PATCHLIST_24
:
1117 case TOP_PATCHLIST_25
:
1118 case TOP_PATCHLIST_26
:
1119 case TOP_PATCHLIST_27
:
1120 case TOP_PATCHLIST_28
:
1121 case TOP_PATCHLIST_29
:
1122 case TOP_PATCHLIST_30
:
1123 case TOP_PATCHLIST_31
:
1124 case TOP_PATCHLIST_32
:
1125 if (pDC
->pState
->state
.tsState
.tsEnable
)
1127 uint32_t vertsPerPrim
= topology
- TOP_PATCHLIST_BASE
;
1128 vertsPerDraw
= vertsPerPrim
* KNOB_MAX_TESS_PRIMS_PER_DRAW
;
1132 // The Primitive Assembly code can only handle 1 RECT at a time.
1138 // We are not splitting up draws for other topologies.
1142 return vertsPerDraw
;
1146 //////////////////////////////////////////////////////////////////////////
1147 /// @brief DrawInstanced
1148 /// @param hContext - Handle passed back from SwrCreateContext
1149 /// @param topology - Specifies topology for draw.
1150 /// @param numVerts - How many vertices to read sequentially from vertex data (per instance).
1151 /// @param startVertex - Specifies start vertex for draw. (vertex data)
1152 /// @param numInstances - How many instances to render.
1153 /// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
1156 PRIMITIVE_TOPOLOGY topology
,
1157 uint32_t numVertices
,
1158 uint32_t startVertex
,
1159 uint32_t numInstances
= 1,
1160 uint32_t startInstance
= 0)
1167 SWR_CONTEXT
*pContext
= GetContext(hContext
);
1168 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1170 RDTSC_BEGIN(APIDraw
, pDC
->drawId
);
1171 AR_API_EVENT(DrawInstancedEvent(pDC
->drawId
, topology
, numVertices
, startVertex
, numInstances
, startInstance
));
1173 uint32_t maxVertsPerDraw
= MaxVertsPerDraw(pDC
, numVertices
, topology
);
1174 uint32_t primsPerDraw
= GetNumPrims(topology
, maxVertsPerDraw
);
1175 uint32_t remainingVerts
= numVertices
;
1177 API_STATE
*pState
= &pDC
->pState
->state
;
1178 pState
->topology
= topology
;
1179 pState
->forceFront
= false;
1181 // disable culling for points/lines
1182 uint32_t oldCullMode
= pState
->rastState
.cullMode
;
1183 if (topology
== TOP_POINT_LIST
)
1185 pState
->rastState
.cullMode
= SWR_CULLMODE_NONE
;
1186 pState
->forceFront
= true;
1188 else if (topology
== TOP_RECT_LIST
)
1190 pState
->rastState
.cullMode
= SWR_CULLMODE_NONE
;
1194 while (remainingVerts
)
1196 uint32_t numVertsForDraw
= (remainingVerts
< maxVertsPerDraw
) ?
1197 remainingVerts
: maxVertsPerDraw
;
1199 bool isSplitDraw
= (draw
> 0) ? true : false;
1200 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
, isSplitDraw
);
1201 InitDraw(pDC
, isSplitDraw
);
1203 pDC
->FeWork
.type
= DRAW
;
1204 pDC
->FeWork
.pfnWork
= GetProcessDrawFunc(
1206 false, // bEnableCutIndex
1207 pState
->tsState
.tsEnable
,
1208 pState
->gsState
.gsEnable
,
1209 pState
->soState
.soEnable
,
1210 pDC
->pState
->pfnProcessPrims
!= nullptr);
1211 pDC
->FeWork
.desc
.draw
.numVerts
= numVertsForDraw
;
1212 pDC
->FeWork
.desc
.draw
.startVertex
= startVertex
;
1213 pDC
->FeWork
.desc
.draw
.numInstances
= numInstances
;
1214 pDC
->FeWork
.desc
.draw
.startInstance
= startInstance
;
1215 pDC
->FeWork
.desc
.draw
.startPrimID
= draw
* primsPerDraw
;
1216 pDC
->FeWork
.desc
.draw
.startVertexID
= draw
* maxVertsPerDraw
;
1218 pDC
->cleanupState
= (remainingVerts
== numVertsForDraw
);
1221 QueueDraw(pContext
);
1223 AR_API_EVENT(DrawInstancedSplitEvent(pDC
->drawId
));
1225 remainingVerts
-= numVertsForDraw
;
1229 // restore culling state
1230 pDC
= GetDrawContext(pContext
);
1231 pDC
->pState
->state
.rastState
.cullMode
= oldCullMode
;
1233 RDTSC_END(APIDraw
, numVertices
* numInstances
);
1236 //////////////////////////////////////////////////////////////////////////
1238 /// @param hContext - Handle passed back from SwrCreateContext
1239 /// @param topology - Specifies topology for draw.
1240 /// @param startVertex - Specifies start vertex in vertex buffer for draw.
1241 /// @param primCount - Number of vertices.
1244 PRIMITIVE_TOPOLOGY topology
,
1245 uint32_t startVertex
,
1246 uint32_t numVertices
)
1248 DrawInstanced(hContext
, topology
, numVertices
, startVertex
);
1251 //////////////////////////////////////////////////////////////////////////
1252 /// @brief SwrDrawInstanced
1253 /// @param hContext - Handle passed back from SwrCreateContext
1254 /// @param topology - Specifies topology for draw.
1255 /// @param numVertsPerInstance - How many vertices to read sequentially from vertex data.
1256 /// @param numInstances - How many instances to render.
1257 /// @param startVertex - Specifies start vertex for draw. (vertex data)
1258 /// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
1259 void SwrDrawInstanced(
1261 PRIMITIVE_TOPOLOGY topology
,
1262 uint32_t numVertsPerInstance
,
1263 uint32_t numInstances
,
1264 uint32_t startVertex
,
1265 uint32_t startInstance
1268 DrawInstanced(hContext
, topology
, numVertsPerInstance
, startVertex
, numInstances
, startInstance
);
1271 //////////////////////////////////////////////////////////////////////////
1272 /// @brief DrawIndexedInstanced
1273 /// @param hContext - Handle passed back from SwrCreateContext
1274 /// @param topology - Specifies topology for draw.
1275 /// @param numIndices - Number of indices to read sequentially from index buffer.
1276 /// @param indexOffset - Starting index into index buffer.
1277 /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
1278 /// @param numInstances - Number of instances to render.
1279 /// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
1280 void DrawIndexedInstance(
1282 PRIMITIVE_TOPOLOGY topology
,
1283 uint32_t numIndices
,
1284 uint32_t indexOffset
,
1286 uint32_t numInstances
= 1,
1287 uint32_t startInstance
= 0)
1294 SWR_CONTEXT
*pContext
= GetContext(hContext
);
1295 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1296 API_STATE
* pState
= &pDC
->pState
->state
;
1298 RDTSC_BEGIN(APIDrawIndexed
, pDC
->drawId
);
1299 AR_API_EVENT(DrawIndexedInstancedEvent(pDC
->drawId
, topology
, numIndices
, indexOffset
, baseVertex
, numInstances
, startInstance
));
1301 uint32_t maxIndicesPerDraw
= MaxVertsPerDraw(pDC
, numIndices
, topology
);
1302 uint32_t primsPerDraw
= GetNumPrims(topology
, maxIndicesPerDraw
);
1303 uint32_t remainingIndices
= numIndices
;
1305 uint32_t indexSize
= 0;
1306 switch (pState
->indexBuffer
.format
)
1308 case R32_UINT
: indexSize
= sizeof(uint32_t); break;
1309 case R16_UINT
: indexSize
= sizeof(uint16_t); break;
1310 case R8_UINT
: indexSize
= sizeof(uint8_t); break;
1312 SWR_INVALID("Invalid index buffer format: %d", pState
->indexBuffer
.format
);
1316 uint8_t *pIB
= (uint8_t*)pState
->indexBuffer
.pIndices
;
1317 pIB
+= (uint64_t)indexOffset
* (uint64_t)indexSize
;
1319 pState
->topology
= topology
;
1320 pState
->forceFront
= false;
1322 // disable culling for points/lines
1323 uint32_t oldCullMode
= pState
->rastState
.cullMode
;
1324 if (topology
== TOP_POINT_LIST
)
1326 pState
->rastState
.cullMode
= SWR_CULLMODE_NONE
;
1327 pState
->forceFront
= true;
1329 else if (topology
== TOP_RECT_LIST
)
1331 pState
->rastState
.cullMode
= SWR_CULLMODE_NONE
;
1334 while (remainingIndices
)
1336 uint32_t numIndicesForDraw
= (remainingIndices
< maxIndicesPerDraw
) ?
1337 remainingIndices
: maxIndicesPerDraw
;
1339 // When breaking up draw, we need to obtain new draw context for each iteration.
1340 bool isSplitDraw
= (draw
> 0) ? true : false;
1342 pDC
= GetDrawContext(pContext
, isSplitDraw
);
1343 InitDraw(pDC
, isSplitDraw
);
1345 pDC
->FeWork
.type
= DRAW
;
1346 pDC
->FeWork
.pfnWork
= GetProcessDrawFunc(
1348 pState
->frontendState
.bEnableCutIndex
,
1349 pState
->tsState
.tsEnable
,
1350 pState
->gsState
.gsEnable
,
1351 pState
->soState
.soEnable
,
1352 pDC
->pState
->pfnProcessPrims
!= nullptr);
1353 pDC
->FeWork
.desc
.draw
.pDC
= pDC
;
1354 pDC
->FeWork
.desc
.draw
.numIndices
= numIndicesForDraw
;
1355 pDC
->FeWork
.desc
.draw
.pIB
= (int*)pIB
;
1356 pDC
->FeWork
.desc
.draw
.type
= pDC
->pState
->state
.indexBuffer
.format
;
1358 pDC
->FeWork
.desc
.draw
.numInstances
= numInstances
;
1359 pDC
->FeWork
.desc
.draw
.startInstance
= startInstance
;
1360 pDC
->FeWork
.desc
.draw
.baseVertex
= baseVertex
;
1361 pDC
->FeWork
.desc
.draw
.startPrimID
= draw
* primsPerDraw
;
1363 pDC
->cleanupState
= (remainingIndices
== numIndicesForDraw
);
1366 QueueDraw(pContext
);
1368 AR_API_EVENT(DrawIndexedInstancedSplitEvent(pDC
->drawId
));
1370 pIB
+= maxIndicesPerDraw
* indexSize
;
1371 remainingIndices
-= numIndicesForDraw
;
1375 // Restore culling state
1376 pDC
= GetDrawContext(pContext
);
1377 pDC
->pState
->state
.rastState
.cullMode
= oldCullMode
;
1379 RDTSC_END(APIDrawIndexed
, numIndices
* numInstances
);
1383 //////////////////////////////////////////////////////////////////////////
1384 /// @brief DrawIndexed
1385 /// @param hContext - Handle passed back from SwrCreateContext
1386 /// @param topology - Specifies topology for draw.
1387 /// @param numIndices - Number of indices to read sequentially from index buffer.
1388 /// @param indexOffset - Starting index into index buffer.
1389 /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
1390 void SwrDrawIndexed(
1392 PRIMITIVE_TOPOLOGY topology
,
1393 uint32_t numIndices
,
1394 uint32_t indexOffset
,
1398 DrawIndexedInstance(hContext
, topology
, numIndices
, indexOffset
, baseVertex
);
1401 //////////////////////////////////////////////////////////////////////////
1402 /// @brief SwrDrawIndexedInstanced
1403 /// @param hContext - Handle passed back from SwrCreateContext
1404 /// @param topology - Specifies topology for draw.
1405 /// @param numIndices - Number of indices to read sequentially from index buffer.
1406 /// @param numInstances - Number of instances to render.
1407 /// @param indexOffset - Starting index into index buffer.
1408 /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
1409 /// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
1410 void SwrDrawIndexedInstanced(
1412 PRIMITIVE_TOPOLOGY topology
,
1413 uint32_t numIndices
,
1414 uint32_t numInstances
,
1415 uint32_t indexOffset
,
1417 uint32_t startInstance
)
1419 DrawIndexedInstance(hContext
, topology
, numIndices
, indexOffset
, baseVertex
, numInstances
, startInstance
);
1422 //////////////////////////////////////////////////////////////////////////
1423 /// @brief SwrInvalidateTiles
1424 /// @param hContext - Handle passed back from SwrCreateContext
1425 /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate.
1426 /// @param invalidateRect - The pixel-coordinate rectangle to invalidate. This will be expanded to
1427 /// be hottile size-aligned.
1428 void SWR_API
SwrInvalidateTiles(
1430 uint32_t attachmentMask
,
1431 const SWR_RECT
& invalidateRect
)
1438 SWR_CONTEXT
*pContext
= GetContext(hContext
);
1439 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1441 pDC
->FeWork
.type
= DISCARDINVALIDATETILES
;
1442 pDC
->FeWork
.pfnWork
= ProcessDiscardInvalidateTiles
;
1443 pDC
->FeWork
.desc
.discardInvalidateTiles
.attachmentMask
= attachmentMask
;
1444 pDC
->FeWork
.desc
.discardInvalidateTiles
.rect
= invalidateRect
;
1445 pDC
->FeWork
.desc
.discardInvalidateTiles
.rect
&= g_MaxScissorRect
;
1446 pDC
->FeWork
.desc
.discardInvalidateTiles
.newTileState
= SWR_TILE_INVALID
;
1447 pDC
->FeWork
.desc
.discardInvalidateTiles
.createNewTiles
= false;
1448 pDC
->FeWork
.desc
.discardInvalidateTiles
.fullTilesOnly
= false;
1451 QueueDraw(pContext
);
1453 AR_API_EVENT(SwrInvalidateTilesEvent(pDC
->drawId
));
1456 //////////////////////////////////////////////////////////////////////////
1457 /// @brief SwrDiscardRect
1458 /// @param hContext - Handle passed back from SwrCreateContext
1459 /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard.
1460 /// @param rect - The pixel-coordinate rectangle to discard. Only fully-covered hottiles will be
1462 void SWR_API
SwrDiscardRect(
1464 uint32_t attachmentMask
,
1465 const SWR_RECT
& rect
)
1472 SWR_CONTEXT
*pContext
= GetContext(hContext
);
1473 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1475 // Queue a load to the hottile
1476 pDC
->FeWork
.type
= DISCARDINVALIDATETILES
;
1477 pDC
->FeWork
.pfnWork
= ProcessDiscardInvalidateTiles
;
1478 pDC
->FeWork
.desc
.discardInvalidateTiles
.attachmentMask
= attachmentMask
;
1479 pDC
->FeWork
.desc
.discardInvalidateTiles
.rect
= rect
;
1480 pDC
->FeWork
.desc
.discardInvalidateTiles
.rect
&= g_MaxScissorRect
;
1481 pDC
->FeWork
.desc
.discardInvalidateTiles
.newTileState
= SWR_TILE_RESOLVED
;
1482 pDC
->FeWork
.desc
.discardInvalidateTiles
.createNewTiles
= true;
1483 pDC
->FeWork
.desc
.discardInvalidateTiles
.fullTilesOnly
= true;
1486 QueueDraw(pContext
);
1488 AR_API_EVENT(SwrDiscardRectEvent(pDC
->drawId
));
1491 //////////////////////////////////////////////////////////////////////////
1492 /// @brief SwrDispatch
1493 /// @param hContext - Handle passed back from SwrCreateContext
1494 /// @param threadGroupCountX - Number of thread groups dispatched in X direction
1495 /// @param threadGroupCountY - Number of thread groups dispatched in Y direction
1496 /// @param threadGroupCountZ - Number of thread groups dispatched in Z direction
1499 uint32_t threadGroupCountX
,
1500 uint32_t threadGroupCountY
,
1501 uint32_t threadGroupCountZ
)
1508 SWR_CONTEXT
*pContext
= GetContext(hContext
);
1509 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1511 RDTSC_BEGIN(APIDispatch
, pDC
->drawId
);
1512 AR_API_EVENT(DispatchEvent(pDC
->drawId
, threadGroupCountX
, threadGroupCountY
, threadGroupCountZ
));
1513 pDC
->isCompute
= true; // This is a compute context.
1515 COMPUTE_DESC
* pTaskData
= (COMPUTE_DESC
*)pDC
->pArena
->AllocAligned(sizeof(COMPUTE_DESC
), 64);
1517 pTaskData
->threadGroupCountX
= threadGroupCountX
;
1518 pTaskData
->threadGroupCountY
= threadGroupCountY
;
1519 pTaskData
->threadGroupCountZ
= threadGroupCountZ
;
1521 uint32_t totalThreadGroups
= threadGroupCountX
* threadGroupCountY
* threadGroupCountZ
;
1522 uint32_t dcIndex
= pDC
->drawId
% pContext
->MAX_DRAWS_IN_FLIGHT
;
1523 pDC
->pDispatch
= &pContext
->pDispatchQueueArray
[dcIndex
];
1524 pDC
->pDispatch
->initialize(totalThreadGroups
, pTaskData
, &ProcessComputeBE
);
1526 QueueDispatch(pContext
);
1527 RDTSC_END(APIDispatch
, threadGroupCountX
* threadGroupCountY
* threadGroupCountZ
);
1530 // Deswizzles, converts and stores current contents of the hot tiles to surface
1531 // described by pState
1532 void SWR_API
SwrStoreTiles(
1534 uint32_t attachmentMask
,
1535 SWR_TILE_STATE postStoreTileState
,
1536 const SWR_RECT
& storeRect
)
1543 SWR_CONTEXT
*pContext
= GetContext(hContext
);
1544 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1546 RDTSC_BEGIN(APIStoreTiles
, pDC
->drawId
);
1548 pDC
->FeWork
.type
= STORETILES
;
1549 pDC
->FeWork
.pfnWork
= ProcessStoreTiles
;
1550 pDC
->FeWork
.desc
.storeTiles
.attachmentMask
= attachmentMask
;
1551 pDC
->FeWork
.desc
.storeTiles
.postStoreTileState
= postStoreTileState
;
1552 pDC
->FeWork
.desc
.storeTiles
.rect
= storeRect
;
1553 pDC
->FeWork
.desc
.storeTiles
.rect
&= g_MaxScissorRect
;
1556 QueueDraw(pContext
);
1558 AR_API_EVENT(SwrStoreTilesEvent(pDC
->drawId
));
1560 RDTSC_END(APIStoreTiles
, 1);
1563 //////////////////////////////////////////////////////////////////////////
1564 /// @brief SwrClearRenderTarget - Clear attached render targets / depth / stencil
1565 /// @param hContext - Handle passed back from SwrCreateContext
1566 /// @param attachmentMask - combination of SWR_ATTACHMENT_*_BIT attachments to clear
1567 /// @param renderTargetArrayIndex - the RT array index to clear
1568 /// @param clearColor - color use for clearing render targets
1569 /// @param z - depth value use for clearing depth buffer
1570 /// @param stencil - stencil value used for clearing stencil buffer
1571 /// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers
1572 void SWR_API
SwrClearRenderTarget(
1574 uint32_t attachmentMask
,
1575 uint32_t renderTargetArrayIndex
,
1576 const float clearColor
[4],
1579 const SWR_RECT
& clearRect
)
1586 SWR_CONTEXT
*pContext
= GetContext(hContext
);
1587 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1589 RDTSC_BEGIN(APIClearRenderTarget
, pDC
->drawId
);
1591 pDC
->FeWork
.type
= CLEAR
;
1592 pDC
->FeWork
.pfnWork
= ProcessClear
;
1593 pDC
->FeWork
.desc
.clear
.rect
= clearRect
;
1594 pDC
->FeWork
.desc
.clear
.rect
&= g_MaxScissorRect
;
1595 pDC
->FeWork
.desc
.clear
.attachmentMask
= attachmentMask
;
1596 pDC
->FeWork
.desc
.clear
.renderTargetArrayIndex
= renderTargetArrayIndex
;
1597 pDC
->FeWork
.desc
.clear
.clearDepth
= z
;
1598 pDC
->FeWork
.desc
.clear
.clearRTColor
[0] = clearColor
[0];
1599 pDC
->FeWork
.desc
.clear
.clearRTColor
[1] = clearColor
[1];
1600 pDC
->FeWork
.desc
.clear
.clearRTColor
[2] = clearColor
[2];
1601 pDC
->FeWork
.desc
.clear
.clearRTColor
[3] = clearColor
[3];
1602 pDC
->FeWork
.desc
.clear
.clearStencil
= stencil
;
1605 QueueDraw(pContext
);
1607 RDTSC_END(APIClearRenderTarget
, 1);
1610 //////////////////////////////////////////////////////////////////////////
1611 /// @brief Returns a pointer to the private context state for the current
1612 /// draw operation. This is used for external componets such as the
1614 /// SWR is responsible for the allocation of the private context state.
1615 /// @param hContext - Handle passed back from SwrCreateContext
1616 VOID
* SwrGetPrivateContextState(
1619 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1620 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1621 DRAW_STATE
* pState
= pDC
->pState
;
1623 if (pState
->pPrivateState
== nullptr)
1625 pState
->pPrivateState
= pState
->pArena
->AllocAligned(pContext
->privateStateSize
, KNOB_SIMD_WIDTH
*sizeof(float));
1628 return pState
->pPrivateState
;
1631 //////////////////////////////////////////////////////////////////////////
1632 /// @brief Clients can use this to allocate memory for draw/dispatch
1633 /// operations. The memory will automatically be freed once operation
1634 /// has completed. Client can use this to allocate binding tables,
1635 /// etc. needed for shader execution.
1636 /// @param hContext - Handle passed back from SwrCreateContext
1637 /// @param size - Size of allocation
1638 /// @param align - Alignment needed for allocation.
1639 VOID
* SwrAllocDrawContextMemory(
1644 SWR_CONTEXT
* pContext
= GetContext(hContext
);
1645 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1647 return pDC
->pState
->pArena
->AllocAligned(size
, align
);
1650 //////////////////////////////////////////////////////////////////////////
1651 /// @brief Enables stats counting
1652 /// @param hContext - Handle passed back from SwrCreateContext
1653 /// @param enable - If true then counts are incremented.
1654 void SwrEnableStatsFE(
1658 SWR_CONTEXT
*pContext
= GetContext(hContext
);
1659 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1661 pDC
->pState
->state
.enableStatsFE
= enable
;
1664 //////////////////////////////////////////////////////////////////////////
1665 /// @brief Enables stats counting
1666 /// @param hContext - Handle passed back from SwrCreateContext
1667 /// @param enable - If true then counts are incremented.
1668 void SwrEnableStatsBE(
1672 SWR_CONTEXT
*pContext
= GetContext(hContext
);
1673 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1675 pDC
->pState
->state
.enableStatsBE
= enable
;
1678 //////////////////////////////////////////////////////////////////////////
1679 /// @brief Mark end of frame - used for performance profiling
1680 /// @param hContext - Handle passed back from SwrCreateContext
1681 void SWR_API
SwrEndFrame(
1684 SWR_CONTEXT
*pContext
= GetContext(hContext
);
1685 DRAW_CONTEXT
* pDC
= GetDrawContext(pContext
);
1686 (void)pDC
; // var used
1689 AR_API_EVENT(FrameEndEvent(pContext
->frameCount
, pDC
->drawId
));
1691 pContext
->frameCount
++;
1694 void InitSimLoadTilesTable();
1695 void InitSimStoreTilesTable();
1696 void InitSimClearTilesTable();
1698 void InitClearTilesTable();
1699 void InitBackendFuncTables();
1701 //////////////////////////////////////////////////////////////////////////
1702 /// @brief Initialize swr backend and memory internal tables
1705 InitSimLoadTilesTable();
1706 InitSimStoreTilesTable();
1707 InitSimClearTilesTable();
1709 InitClearTilesTable();
1710 InitBackendFuncTables();
1711 InitRasterizerFunctions();
1714 void SwrGetInterface(SWR_INTERFACE
&out_funcs
)
1716 out_funcs
.pfnSwrCreateContext
= SwrCreateContext
;
1717 out_funcs
.pfnSwrDestroyContext
= SwrDestroyContext
;
1718 out_funcs
.pfnSwrBindApiThread
= SwrBindApiThread
;
1719 out_funcs
.pfnSwrSaveState
= SwrSaveState
;
1720 out_funcs
.pfnSwrRestoreState
= SwrRestoreState
;
1721 out_funcs
.pfnSwrSync
= SwrSync
;
1722 out_funcs
.pfnSwrStallBE
= SwrStallBE
;
1723 out_funcs
.pfnSwrWaitForIdle
= SwrWaitForIdle
;
1724 out_funcs
.pfnSwrWaitForIdleFE
= SwrWaitForIdleFE
;
1725 out_funcs
.pfnSwrSetVertexBuffers
= SwrSetVertexBuffers
;
1726 out_funcs
.pfnSwrSetIndexBuffer
= SwrSetIndexBuffer
;
1727 out_funcs
.pfnSwrSetFetchFunc
= SwrSetFetchFunc
;
1728 out_funcs
.pfnSwrSetSoFunc
= SwrSetSoFunc
;
1729 out_funcs
.pfnSwrSetSoState
= SwrSetSoState
;
1730 out_funcs
.pfnSwrSetSoBuffers
= SwrSetSoBuffers
;
1731 out_funcs
.pfnSwrSetVertexFunc
= SwrSetVertexFunc
;
1732 out_funcs
.pfnSwrSetFrontendState
= SwrSetFrontendState
;
1733 out_funcs
.pfnSwrSetGsState
= SwrSetGsState
;
1734 out_funcs
.pfnSwrSetGsFunc
= SwrSetGsFunc
;
1735 out_funcs
.pfnSwrSetCsFunc
= SwrSetCsFunc
;
1736 out_funcs
.pfnSwrSetTsState
= SwrSetTsState
;
1737 out_funcs
.pfnSwrSetHsFunc
= SwrSetHsFunc
;
1738 out_funcs
.pfnSwrSetDsFunc
= SwrSetDsFunc
;
1739 out_funcs
.pfnSwrSetDepthStencilState
= SwrSetDepthStencilState
;
1740 out_funcs
.pfnSwrSetBackendState
= SwrSetBackendState
;
1741 out_funcs
.pfnSwrSetDepthBoundsState
= SwrSetDepthBoundsState
;
1742 out_funcs
.pfnSwrSetPixelShaderState
= SwrSetPixelShaderState
;
1743 out_funcs
.pfnSwrSetBlendState
= SwrSetBlendState
;
1744 out_funcs
.pfnSwrSetBlendFunc
= SwrSetBlendFunc
;
1745 out_funcs
.pfnSwrDraw
= SwrDraw
;
1746 out_funcs
.pfnSwrDrawInstanced
= SwrDrawInstanced
;
1747 out_funcs
.pfnSwrDrawIndexed
= SwrDrawIndexed
;
1748 out_funcs
.pfnSwrDrawIndexedInstanced
= SwrDrawIndexedInstanced
;
1749 out_funcs
.pfnSwrInvalidateTiles
= SwrInvalidateTiles
;
1750 out_funcs
.pfnSwrDiscardRect
= SwrDiscardRect
;
1751 out_funcs
.pfnSwrDispatch
= SwrDispatch
;
1752 out_funcs
.pfnSwrStoreTiles
= SwrStoreTiles
;
1753 out_funcs
.pfnSwrClearRenderTarget
= SwrClearRenderTarget
;
1754 out_funcs
.pfnSwrSetRastState
= SwrSetRastState
;
1755 out_funcs
.pfnSwrSetViewports
= SwrSetViewports
;
1756 out_funcs
.pfnSwrSetScissorRects
= SwrSetScissorRects
;
1757 out_funcs
.pfnSwrGetPrivateContextState
= SwrGetPrivateContextState
;
1758 out_funcs
.pfnSwrAllocDrawContextMemory
= SwrAllocDrawContextMemory
;
1759 out_funcs
.pfnSwrEnableStatsFE
= SwrEnableStatsFE
;
1760 out_funcs
.pfnSwrEnableStatsBE
= SwrEnableStatsBE
;
1761 out_funcs
.pfnSwrEndFrame
= SwrEndFrame
;
1762 out_funcs
.pfnSwrInit
= SwrInit
;
1763 out_funcs
.pfnSwrLoadHotTile
= SwrLoadHotTile
;
1764 out_funcs
.pfnSwrStoreHotTileToSurface
= SwrStoreHotTileToSurface
;
1765 out_funcs
.pfnSwrStoreHotTileClear
= SwrStoreHotTileClear
;