swr: [rasterizer core] only use Viewport/Scissors during SwrDraw* operations
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / context.h
1 /****************************************************************************
2 * Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file context.h
24 *
25 * @brief Definitions for SWR_CONTEXT and DRAW_CONTEXT
26 * The SWR_CONTEXT is our global context and contains the DC ring,
27 * thread state, etc.
28 *
29 * The DRAW_CONTEXT contains all state associated with a draw operation.
30 *
31 ******************************************************************************/
32 #pragma once
33
34 #include <condition_variable>
35 #include <algorithm>
36
37 #include "core/api.h"
38 #include "core/utils.h"
39 #include "core/arena.h"
40 #include "core/fifo.hpp"
41 #include "core/knobs.h"
42 #include "common/simdintrin.h"
43 #include "core/threads.h"
44 #include "ringbuffer.h"
45
46 // x.8 fixed point precision values
47 #define FIXED_POINT_SHIFT 8
48 #define FIXED_POINT_SCALE 256
49
50 // x.16 fixed point precision values
51 #define FIXED_POINT16_SHIFT 16
52 #define FIXED_POINT16_SCALE 65536
53
54 struct SWR_CONTEXT;
55 struct DRAW_CONTEXT;
56
57 struct TRI_FLAGS
58 {
59 uint32_t frontFacing : 1;
60 uint32_t yMajor : 1;
61 uint32_t coverageMask : (SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM);
62 uint32_t reserved : 32 - 1 - 1 - (SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM);
63 float pointSize;
64 uint32_t primID;
65 uint32_t renderTargetArrayIndex;
66 };
67
68 //////////////////////////////////////////////////////////////////////////
69 /// SWR_TRIANGLE_DESC
70 /////////////////////////////////////////////////////////////////////////
71 struct SWR_TRIANGLE_DESC
72 {
73 float I[3];
74 float J[3];
75 float Z[3];
76 float OneOverW[3];
77 float recipDet;
78
79 float *pRecipW;
80 float *pAttribs;
81 float *pPerspAttribs;
82 float *pSamplePos;
83 float *pUserClipBuffer;
84
85 uint64_t coverageMask[SWR_MAX_NUM_MULTISAMPLES];
86 uint64_t innerCoverageMask; // Conservative rasterization inner coverage: marked covered if entire pixel is covered
87 uint64_t anyCoveredSamples;
88
89 TRI_FLAGS triFlags;
90 };
91
92 struct TRIANGLE_WORK_DESC
93 {
94 float *pTriBuffer;
95 float *pAttribs;
96 float *pUserClipBuffer;
97 uint32_t numAttribs;
98 TRI_FLAGS triFlags;
99 };
100
101 union CLEAR_FLAGS
102 {
103 struct
104 {
105 uint32_t mask : 3;
106 };
107 uint32_t bits;
108 };
109
110 struct CLEAR_DESC
111 {
112 SWR_RECT rect;
113 CLEAR_FLAGS flags;
114 float clearRTColor[4]; // RGBA_32F
115 float clearDepth; // [0..1]
116 uint8_t clearStencil;
117 };
118
119 struct DISCARD_INVALIDATE_TILES_DESC
120 {
121 uint32_t attachmentMask;
122 SWR_RECT rect;
123 SWR_TILE_STATE newTileState;
124 bool createNewTiles;
125 bool fullTilesOnly;
126 };
127
128 struct SYNC_DESC
129 {
130 PFN_CALLBACK_FUNC pfnCallbackFunc;
131 uint64_t userData;
132 uint64_t userData2;
133 uint64_t userData3;
134 };
135
136 struct STORE_TILES_DESC
137 {
138 SWR_RENDERTARGET_ATTACHMENT attachment;
139 SWR_TILE_STATE postStoreTileState;
140 SWR_RECT rect;
141 };
142
143 struct COMPUTE_DESC
144 {
145 uint32_t threadGroupCountX;
146 uint32_t threadGroupCountY;
147 uint32_t threadGroupCountZ;
148 };
149
150 typedef void(*PFN_WORK_FUNC)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc);
151
152 enum WORK_TYPE
153 {
154 SYNC,
155 DRAW,
156 CLEAR,
157 DISCARDINVALIDATETILES,
158 STORETILES,
159 };
160
161 struct BE_WORK
162 {
163 WORK_TYPE type;
164 PFN_WORK_FUNC pfnWork;
165 union
166 {
167 SYNC_DESC sync;
168 TRIANGLE_WORK_DESC tri;
169 CLEAR_DESC clear;
170 DISCARD_INVALIDATE_TILES_DESC discardInvalidateTiles;
171 STORE_TILES_DESC storeTiles;
172 } desc;
173 };
174
175 struct DRAW_WORK
176 {
177 DRAW_CONTEXT* pDC;
178 union
179 {
180 uint32_t numIndices; // DrawIndexed: Number of indices for draw.
181 uint32_t numVerts; // Draw: Number of verts (triangles, lines, etc)
182 };
183 union
184 {
185 const int32_t* pIB; // DrawIndexed: App supplied indices
186 uint32_t startVertex; // Draw: Starting vertex in VB to render from.
187 };
188 int32_t baseVertex;
189 uint32_t numInstances; // Number of instances
190 uint32_t startInstance; // Instance offset
191 uint32_t startPrimID; // starting primitiveID for this draw batch
192 uint32_t startVertexID; // starting VertexID for this draw batch (only needed for non-indexed draws)
193 SWR_FORMAT type; // index buffer type
194 };
195
196 typedef void(*PFN_FE_WORK_FUNC)(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pDesc);
197 struct FE_WORK
198 {
199 WORK_TYPE type;
200 PFN_FE_WORK_FUNC pfnWork;
201 union
202 {
203 SYNC_DESC sync;
204 DRAW_WORK draw;
205 CLEAR_DESC clear;
206 DISCARD_INVALIDATE_TILES_DESC discardInvalidateTiles;
207 STORE_TILES_DESC storeTiles;
208 } desc;
209 };
210
211 struct GUARDBAND
212 {
213 float left, right, top, bottom;
214 };
215
216 struct PA_STATE;
217
218 // function signature for pipeline stages that execute after primitive assembly
219 typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[],
220 uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
221
222 OSALIGNLINE(struct) API_STATE
223 {
224 // Vertex Buffers
225 SWR_VERTEX_BUFFER_STATE vertexBuffers[KNOB_NUM_STREAMS];
226
227 // Index Buffer
228 SWR_INDEX_BUFFER_STATE indexBuffer;
229
230 // FS - Fetch Shader State
231 PFN_FETCH_FUNC pfnFetchFunc;
232
233 // VS - Vertex Shader State
234 PFN_VERTEX_FUNC pfnVertexFunc;
235
236 // GS - Geometry Shader State
237 PFN_GS_FUNC pfnGsFunc;
238 SWR_GS_STATE gsState;
239
240 // CS - Compute Shader
241 PFN_CS_FUNC pfnCsFunc;
242 uint32_t totalThreadsInGroup;
243 uint32_t totalSpillFillSize;
244
245 // FE - Frontend State
246 SWR_FRONTEND_STATE frontendState;
247
248 // SOS - Streamout Shader State
249 PFN_SO_FUNC pfnSoFunc[MAX_SO_STREAMS];
250
251 // Streamout state
252 SWR_STREAMOUT_STATE soState;
253 mutable SWR_STREAMOUT_BUFFER soBuffer[MAX_SO_STREAMS];
254
255 // Tessellation State
256 PFN_HS_FUNC pfnHsFunc;
257 PFN_DS_FUNC pfnDsFunc;
258 SWR_TS_STATE tsState;
259
260 // Number of attributes used by the frontend (vs, so, gs)
261 uint32_t feNumAttributes;
262
263 PRIMITIVE_TOPOLOGY topology;
264 bool forceFront;
265
266 // RS - Rasterizer State
267 SWR_RASTSTATE rastState;
268 // floating point multisample offsets
269 float samplePos[SWR_MAX_NUM_MULTISAMPLES * 2];
270
271 GUARDBAND gbState;
272
273 SWR_VIEWPORT vp[KNOB_NUM_VIEWPORTS_SCISSORS];
274 SWR_VIEWPORT_MATRICES vpMatrices;
275
276 SWR_RECT scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
277 SWR_RECT scissorInFixedPoint;
278
279 // Backend state
280 SWR_BACKEND_STATE backendState;
281
282 // PS - Pixel shader state
283 SWR_PS_STATE psState;
284
285 SWR_DEPTH_STENCIL_STATE depthStencilState;
286
287 // OM - Output Merger State
288 SWR_BLEND_STATE blendState;
289 PFN_BLEND_JIT_FUNC pfnBlendFunc[SWR_NUM_RENDERTARGETS];
290
291 // Stats are incremented when this is true.
292 bool enableStats;
293
294 struct
295 {
296 uint32_t colorHottileEnable : 8;
297 uint32_t depthHottileEnable: 1;
298 uint32_t stencilHottileEnable : 1;
299 };
300
301 PFN_QUANTIZE_DEPTH pfnQuantizeDepth;
302 };
303
304 class MacroTileMgr;
305 class DispatchQueue;
306
307 struct RenderOutputBuffers
308 {
309 uint8_t* pColor[SWR_NUM_RENDERTARGETS];
310 uint8_t* pDepth;
311 uint8_t* pStencil;
312 };
313
314 // Plane equation A/B/C coeffs used to evaluate I/J barycentric coords
315 struct BarycentricCoeffs
316 {
317 simdscalar vIa;
318 simdscalar vIb;
319 simdscalar vIc;
320
321 simdscalar vJa;
322 simdscalar vJb;
323 simdscalar vJc;
324
325 simdscalar vZa;
326 simdscalar vZb;
327 simdscalar vZc;
328
329 simdscalar vRecipDet;
330
331 simdscalar vAOneOverW;
332 simdscalar vBOneOverW;
333 simdscalar vCOneOverW;
334 };
335
336 // pipeline function pointer types
337 typedef void(*PFN_BACKEND_FUNC)(DRAW_CONTEXT*, uint32_t, uint32_t, uint32_t, SWR_TRIANGLE_DESC&, RenderOutputBuffers&);
338 typedef void(*PFN_OUTPUT_MERGER)(SWR_PS_CONTEXT &, uint8_t* (&)[SWR_NUM_RENDERTARGETS], uint32_t, const SWR_BLEND_STATE*,
339 const PFN_BLEND_JIT_FUNC (&)[SWR_NUM_RENDERTARGETS], simdscalar&, simdscalar);
340 typedef void(*PFN_CALC_PIXEL_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT &);
341 typedef void(*PFN_CALC_SAMPLE_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT&);
342 typedef void(*PFN_CALC_CENTROID_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT &, const uint64_t *const, const uint32_t,
343 const simdscalar, const simdscalar);
344
345 struct BACKEND_FUNCS
346 {
347 PFN_BACKEND_FUNC pfnBackend;
348 };
349
350 // Draw State
351 struct DRAW_STATE
352 {
353 API_STATE state;
354
355 void* pPrivateState; // Its required the driver sets this up for each draw.
356
357 // pipeline function pointers, filled in by API thread when setting up the draw
358 BACKEND_FUNCS backendFuncs;
359 PFN_PROCESS_PRIMS pfnProcessPrims;
360
361 CachingArena* pArena; // This should only be used by API thread.
362 };
363
364 struct DRAW_DYNAMIC_STATE
365 {
366 ///@todo Currently assumes only a single FE can do stream output for a draw.
367 uint32_t SoWriteOffset[4];
368 bool SoWriteOffsetDirty[4];
369
370 SWR_STATS_FE statsFE; // Only one FE thread per DC.
371 SWR_STATS stats[KNOB_MAX_NUM_THREADS];
372 };
373
374 // Draw Context
375 // The api thread sets up a draw context that exists for the life of the draw.
376 // This draw context maintains all of the state needed for the draw operation.
377 struct DRAW_CONTEXT
378 {
379 SWR_CONTEXT* pContext;
380 union
381 {
382 MacroTileMgr* pTileMgr;
383 DispatchQueue* pDispatch; // Queue for thread groups. (isCompute)
384 };
385 DRAW_STATE* pState; // Read-only state. Core should not update this outside of API thread.
386 DRAW_DYNAMIC_STATE dynState;
387
388 CachingArena* pArena;
389
390 uint32_t drawId;
391 bool dependent;
392 bool isCompute; // Is this DC a compute context?
393 bool cleanupState; // True if this is the last draw using an entry in the state ring.
394 volatile bool doneFE; // Is FE work done for this draw?
395
396 FE_WORK FeWork;
397
398 volatile OSALIGNLINE(uint32_t) FeLock;
399 volatile int32_t threadsDone;
400
401 SYNC_DESC retireCallback; // Call this func when this DC is retired.
402 };
403
404 static_assert((sizeof(DRAW_CONTEXT) & 63) == 0, "Invalid size for DRAW_CONTEXT");
405
406 INLINE const API_STATE& GetApiState(const DRAW_CONTEXT* pDC)
407 {
408 SWR_ASSERT(pDC != nullptr);
409 SWR_ASSERT(pDC->pState != nullptr);
410
411 return pDC->pState->state;
412 }
413
414 INLINE void* GetPrivateState(const DRAW_CONTEXT* pDC)
415 {
416 SWR_ASSERT(pDC != nullptr);
417 SWR_ASSERT(pDC->pState != nullptr);
418
419 return pDC->pState->pPrivateState;
420 }
421
422 class HotTileMgr;
423
424 struct SWR_CONTEXT
425 {
426 // Draw Context Ring
427 // Each draw needs its own state in order to support mulitple draws in flight across multiple threads.
428 // We maintain N draw contexts configured as a ring. The size of the ring limits the maximum number
429 // of draws that can be in flight at any given time.
430 //
431 // Description:
432 // 1. State - When an application first sets state we'll request a new draw context to use.
433 // a. If there are no available draw contexts then we'll have to wait until one becomes free.
434 // b. If one is available then set pCurDrawContext to point to it and mark it in use.
435 // c. All state calls set state on pCurDrawContext.
436 // 2. Draw - Creates submits a work item that is associated with current draw context.
437 // a. Set pPrevDrawContext = pCurDrawContext
438 // b. Set pCurDrawContext to NULL.
439 // 3. State - When an applications sets state after draw
440 // a. Same as step 1.
441 // b. State is copied from prev draw context to current.
442 RingBuffer<DRAW_CONTEXT> dcRing;
443
444 DRAW_CONTEXT *pCurDrawContext; // This points to DC entry in ring for an unsubmitted draw.
445 DRAW_CONTEXT *pPrevDrawContext; // This points to DC entry for the previous context submitted that we can copy state from.
446
447 MacroTileMgr* pMacroTileManagerArray;
448 DispatchQueue* pDispatchQueueArray;
449
450 // Draw State Ring
451 // When draw are very large (lots of primitives) then the API thread will break these up.
452 // These split draws all have identical state. So instead of storing the state directly
453 // in the Draw Context (DC) we instead store it in a Draw State (DS). This allows multiple DCs
454 // to reference a single entry in the DS ring.
455 RingBuffer<DRAW_STATE> dsRing;
456
457 uint32_t curStateId; // Current index to the next available entry in the DS ring.
458
459 uint32_t NumWorkerThreads;
460 uint32_t NumFEThreads;
461 uint32_t NumBEThreads;
462
463 THREAD_POOL threadPool; // Thread pool associated with this context
464 SWR_THREADING_INFO threadInfo;
465
466 std::condition_variable FifosNotEmpty;
467 std::mutex WaitLock;
468
469 DRIVER_TYPE driverType;
470
471 uint32_t privateStateSize;
472
473 HotTileMgr *pHotTileMgr;
474
475 // Callback functions, passed in at create context time
476 PFN_LOAD_TILE pfnLoadTile;
477 PFN_STORE_TILE pfnStoreTile;
478 PFN_CLEAR_TILE pfnClearTile;
479 PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
480 PFN_UPDATE_STATS pfnUpdateStats;
481 PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
482
483 // Global Stats
484 SWR_STATS stats[KNOB_MAX_NUM_THREADS];
485
486 // Scratch space for workers.
487 uint8_t* pScratch[KNOB_MAX_NUM_THREADS];
488
489 volatile int32_t drawsOutstandingFE;
490
491 CachingAllocator cachingArenaAllocator;
492 uint32_t frameCount;
493
494 uint32_t lastFrameChecked;
495 uint64_t lastDrawChecked;
496 TileSet singleThreadLockedTiles;
497 };
498
499 #define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.stats[workerId].name += count; }
500 #define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.statsFE.name += count; }