1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Definitions for SWR_CONTEXT and DRAW_CONTEXT
26 * The SWR_CONTEXT is our global context and contains the DC ring,
29 * The DRAW_CONTEXT contains all state associated with a draw operation.
31 ******************************************************************************/
34 #include <condition_variable>
38 #include "core/utils.h"
39 #include "core/arena.h"
40 #include "core/fifo.hpp"
41 #include "core/knobs.h"
42 #include "common/simdintrin.h"
43 #include "core/threads.h"
45 // x.8 fixed point precision values
46 #define FIXED_POINT_SHIFT 8
47 #define FIXED_POINT_SCALE 256
49 // x.16 fixed point precision values
50 #define FIXED_POINT16_SHIFT 16
51 #define FIXED_POINT16_SCALE 65536
58 uint32_t frontFacing
: 1;
60 uint32_t coverageMask
: (SIMD_TILE_X_DIM
* SIMD_TILE_Y_DIM
);
61 uint32_t reserved
: 32 - 1 - 1 - (SIMD_TILE_X_DIM
* SIMD_TILE_Y_DIM
);
64 uint32_t renderTargetArrayIndex
;
67 //////////////////////////////////////////////////////////////////////////
69 /////////////////////////////////////////////////////////////////////////
70 struct SWR_TRIANGLE_DESC
82 float *pUserClipBuffer
;
84 uint64_t coverageMask
[SWR_MAX_NUM_MULTISAMPLES
];
89 struct TRIANGLE_WORK_DESC
93 float *pUserClipBuffer
;
110 float clearRTColor
[4]; // RGBA_32F
111 float clearDepth
; // [0..1]
115 struct INVALIDATE_TILES_DESC
117 uint32_t attachmentMask
;
122 PFN_CALLBACK_FUNC pfnCallbackFunc
;
133 struct STORE_TILES_DESC
135 SWR_RENDERTARGET_ATTACHMENT attachment
;
136 SWR_TILE_STATE postStoreTileState
;
141 uint32_t threadGroupCountX
;
142 uint32_t threadGroupCountY
;
143 uint32_t threadGroupCountZ
;
146 typedef void(*PFN_WORK_FUNC
)(DRAW_CONTEXT
* pDC
, uint32_t workerId
, uint32_t macroTile
, void* pDesc
);
161 PFN_WORK_FUNC pfnWork
;
165 TRIANGLE_WORK_DESC tri
;
167 INVALIDATE_TILES_DESC invalidateTiles
;
168 STORE_TILES_DESC storeTiles
;
169 QUERY_DESC queryStats
;
178 uint32_t numIndices
; // DrawIndexed: Number of indices for draw.
179 uint32_t numVerts
; // Draw: Number of verts (triangles, lines, etc)
183 const int32_t* pIB
; // DrawIndexed: App supplied indices
184 uint32_t startVertex
; // Draw: Starting vertex in VB to render from.
187 uint32_t numInstances
; // Number of instances
188 uint32_t startInstance
; // Instance offset
189 uint32_t startPrimID
; // starting primitiveID for this draw batch
190 uint32_t startVertexID
; // starting VertexID for this draw batch (only needed for non-indexed draws)
191 SWR_FORMAT type
; // index buffer type
194 typedef void(*PFN_FE_WORK_FUNC
)(SWR_CONTEXT
* pContext
, DRAW_CONTEXT
* pDC
, uint32_t workerId
, void* pDesc
);
198 PFN_FE_WORK_FUNC pfnWork
;
204 INVALIDATE_TILES_DESC invalidateTiles
;
205 STORE_TILES_DESC storeTiles
;
206 QUERY_DESC queryStats
;
212 float left
, right
, top
, bottom
;
217 // function signature for pipeline stages that execute after primitive assembly
218 typedef void(*PFN_PROCESS_PRIMS
)(DRAW_CONTEXT
*pDC
, PA_STATE
& pa
, uint32_t workerId
, simdvector prims
[],
219 uint32_t primMask
, simdscalari primID
);
221 OSALIGNLINE(struct) API_STATE
224 SWR_VERTEX_BUFFER_STATE vertexBuffers
[KNOB_NUM_STREAMS
];
227 SWR_INDEX_BUFFER_STATE indexBuffer
;
229 // FS - Fetch Shader State
230 PFN_FETCH_FUNC pfnFetchFunc
;
232 // VS - Vertex Shader State
233 PFN_VERTEX_FUNC pfnVertexFunc
;
235 // GS - Geometry Shader State
236 PFN_GS_FUNC pfnGsFunc
;
237 SWR_GS_STATE gsState
;
239 // CS - Compute Shader
240 PFN_CS_FUNC pfnCsFunc
;
241 uint32_t totalThreadsInGroup
;
243 // FE - Frontend State
244 SWR_FRONTEND_STATE frontendState
;
246 // SOS - Streamout Shader State
247 PFN_SO_FUNC pfnSoFunc
[MAX_SO_STREAMS
];
250 SWR_STREAMOUT_STATE soState
;
251 mutable SWR_STREAMOUT_BUFFER soBuffer
[MAX_SO_STREAMS
];
253 // Tessellation State
254 PFN_HS_FUNC pfnHsFunc
;
255 PFN_DS_FUNC pfnDsFunc
;
256 SWR_TS_STATE tsState
;
258 // Specifies which VS outputs are sent to PS.
259 // Does not include position
260 uint32_t linkageMask
;
261 uint32_t linkageCount
;
262 uint8_t linkageMap
[MAX_ATTRIBUTES
];
264 // attrib mask, specifies the total set of attributes used
265 // by the frontend (vs, so, gs)
266 uint32_t feAttribMask
;
268 PRIMITIVE_TOPOLOGY topology
;
271 // RS - Rasterizer State
272 SWR_RASTSTATE rastState
;
273 // floating point multisample offsets
274 float samplePos
[SWR_MAX_NUM_MULTISAMPLES
* 2];
278 SWR_VIEWPORT vp
[KNOB_NUM_VIEWPORTS_SCISSORS
];
279 SWR_VIEWPORT_MATRIX vpMatrix
[KNOB_NUM_VIEWPORTS_SCISSORS
];
281 BBOX scissorRects
[KNOB_NUM_VIEWPORTS_SCISSORS
];
282 BBOX scissorInFixedPoint
;
285 SWR_BACKEND_STATE backendState
;
287 // PS - Pixel shader state
288 SWR_PS_STATE psState
;
290 SWR_DEPTH_STENCIL_STATE depthStencilState
;
292 // OM - Output Merger State
293 SWR_BLEND_STATE blendState
;
294 PFN_BLEND_JIT_FUNC pfnBlendFunc
[SWR_NUM_RENDERTARGETS
];
296 // Stats are incremented when this is true.
301 uint32_t colorHottileEnable
: 8;
302 uint32_t depthHottileEnable
: 1;
303 uint32_t stencilHottileEnable
: 1;
310 struct RenderOutputBuffers
312 uint8_t* pColor
[SWR_NUM_RENDERTARGETS
];
317 // Plane equation A/B/C coeffs used to evaluate I/J barycentric coords
318 struct BarycentricCoeffs
332 simdscalar vRecipDet
;
334 simdscalar vAOneOverW
;
335 simdscalar vBOneOverW
;
336 simdscalar vCOneOverW
;
339 // pipeline function pointer types
340 typedef void(*PFN_BACKEND_FUNC
)(DRAW_CONTEXT
*, uint32_t, uint32_t, uint32_t, SWR_TRIANGLE_DESC
&, RenderOutputBuffers
&);
341 typedef void(*PFN_OUTPUT_MERGER
)(SWR_PS_CONTEXT
&, uint8_t* (&)[SWR_NUM_RENDERTARGETS
], uint32_t, const SWR_BLEND_STATE
*,
342 const PFN_BLEND_JIT_FUNC (&)[SWR_NUM_RENDERTARGETS
], simdscalar
&, simdscalar
);
343 typedef void(*PFN_CALC_PIXEL_BARYCENTRICS
)(const BarycentricCoeffs
&, SWR_PS_CONTEXT
&);
344 typedef void(*PFN_CALC_SAMPLE_BARYCENTRICS
)(const BarycentricCoeffs
&, SWR_PS_CONTEXT
&);
345 typedef void(*PFN_CALC_CENTROID_BARYCENTRICS
)(const BarycentricCoeffs
&, SWR_PS_CONTEXT
&, const uint64_t *const, const uint32_t,
346 const simdscalar
, const simdscalar
);
350 PFN_BACKEND_FUNC pfnBackend
;
351 PFN_CALC_PIXEL_BARYCENTRICS pfnCalcPixelBarycentrics
;
352 PFN_CALC_SAMPLE_BARYCENTRICS pfnCalcSampleBarycentrics
;
353 PFN_CALC_CENTROID_BARYCENTRICS pfnCalcCentroidBarycentrics
;
354 PFN_OUTPUT_MERGER pfnOutputMerger
;
362 void* pPrivateState
; // Its required the driver sets this up for each draw.
364 // pipeline function pointers, filled in by API thread when setting up the draw
365 BACKEND_FUNCS backendFuncs
;
366 PFN_PROCESS_PRIMS pfnProcessPrims
;
368 Arena
* pArena
; // This should only be used by API thread.
372 // The api thread sets up a draw context that exists for the life of the draw.
373 // This draw context maintains all of the state needed for the draw operation.
376 SWR_CONTEXT
*pContext
;
380 bool isCompute
; // Is this DC a compute context?
383 volatile OSALIGNLINE(uint32_t) FeLock
;
384 volatile OSALIGNLINE(bool) inUse
;
385 volatile OSALIGNLINE(bool) doneFE
; // Is FE work done for this draw?
387 // Have all worker threads moved past draw in DC ring?
388 volatile OSALIGNLINE(uint32_t) threadsDoneFE
;
389 volatile OSALIGNLINE(uint32_t) threadsDoneBE
;
393 MacroTileMgr
* pTileMgr
;
395 // The following fields are valid if isCompute is true.
396 volatile OSALIGNLINE(bool) doneCompute
; // Is this dispatch done? (isCompute)
397 DispatchQueue
* pDispatch
; // Queue for thread groups. (isCompute)
402 uint8_t* pSpillFill
[KNOB_MAX_NUM_THREADS
]; // Scratch space used for spill fills.
405 INLINE
const API_STATE
& GetApiState(const DRAW_CONTEXT
* pDC
)
407 SWR_ASSERT(pDC
!= nullptr);
408 SWR_ASSERT(pDC
->pState
!= nullptr);
410 return pDC
->pState
->state
;
413 INLINE
void* GetPrivateState(const DRAW_CONTEXT
* pDC
)
415 SWR_ASSERT(pDC
!= nullptr);
416 SWR_ASSERT(pDC
->pState
!= nullptr);
418 return pDC
->pState
->pPrivateState
;
426 // Each draw needs its own state in order to support mulitple draws in flight across multiple threads.
427 // We maintain N draw contexts configured as a ring. The size of the ring limits the maximum number
428 // of draws that can be in flight at any given time.
431 // 1. State - When an application first sets state we'll request a new draw context to use.
432 // a. If there are no available draw contexts then we'll have to wait until one becomes free.
433 // b. If one is available then set pCurDrawContext to point to it and mark it in use.
434 // c. All state calls set state on pCurDrawContext.
435 // 2. Draw - Creates submits a work item that is associated with current draw context.
436 // a. Set pPrevDrawContext = pCurDrawContext
437 // b. Set pCurDrawContext to NULL.
438 // 3. State - When an applications sets state after draw
439 // a. Same as step 1.
440 // b. State is copied from prev draw context to current.
441 DRAW_CONTEXT
* dcRing
;
443 DRAW_CONTEXT
*pCurDrawContext
; // This points to DC entry in ring for an unsubmitted draw.
444 DRAW_CONTEXT
*pPrevDrawContext
; // This points to DC entry for the previous context submitted that we can copy state from.
447 // When draw are very large (lots of primitives) then the API thread will break these up.
448 // These split draws all have identical state. So instead of storing the state directly
449 // in the Draw Context (DC) we instead store it in a Draw State (DS). This allows multiple DCs
450 // to reference a single entry in the DS ring.
453 uint32_t curStateId
; // Current index to the next available entry in the DS ring.
455 DRAW_STATE
* subCtxSave
; // Save area for inactive contexts.
456 uint32_t curSubCtxId
; // Current index for active state subcontext.
457 uint32_t numSubContexts
; // Number of available subcontexts
459 uint32_t NumWorkerThreads
;
461 THREAD_POOL threadPool
; // Thread pool associated with this context
463 std::condition_variable FifosNotEmpty
;
466 // Draw Contexts will get a unique drawId generated from this
469 // most recent draw id enqueued by the API thread
470 // written by api thread, read by multiple workers
471 OSALIGNLINE(volatile uint64_t) DrawEnqueued
;
473 DRIVER_TYPE driverType
;
475 uint32_t privateStateSize
;
477 HotTileMgr
*pHotTileMgr
;
479 // tile load/store functions, passed in at create context time
480 PFN_LOAD_TILE pfnLoadTile
;
481 PFN_STORE_TILE pfnStoreTile
;
482 PFN_CLEAR_TILE pfnClearTile
;
485 SWR_STATS stats
[KNOB_MAX_NUM_THREADS
];
487 // Scratch space for workers.
488 uint8_t* pScratch
[KNOB_MAX_NUM_THREADS
];
491 void WaitForDependencies(SWR_CONTEXT
*pContext
, uint64_t drawId
);
492 void WakeAllThreads(SWR_CONTEXT
*pContext
);
494 #define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pContext->stats[workerId].name += count; }
495 #define SET_STAT(name, count) if (GetApiState(pDC).enableStats) { pContext->stats[workerId].name = count; }