swr: [rasterizer core] routing of viewport indexes through frontend
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / context.h
1 /****************************************************************************
2 * Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file context.h
24 *
25 * @brief Definitions for SWR_CONTEXT and DRAW_CONTEXT
26 * The SWR_CONTEXT is our global context and contains the DC ring,
27 * thread state, etc.
28 *
29 * The DRAW_CONTEXT contains all state associated with a draw operation.
30 *
31 ******************************************************************************/
32 #pragma once
33
34 #include <condition_variable>
35 #include <algorithm>
36
37 #include "core/api.h"
38 #include "core/utils.h"
39 #include "core/arena.h"
40 #include "core/fifo.hpp"
41 #include "core/knobs.h"
42 #include "common/simdintrin.h"
43 #include "core/threads.h"
44 #include "ringbuffer.h"
45
46 // x.8 fixed point precision values
47 #define FIXED_POINT_SHIFT 8
48 #define FIXED_POINT_SCALE 256
49
50 // x.16 fixed point precision values
51 #define FIXED_POINT16_SHIFT 16
52 #define FIXED_POINT16_SCALE 65536
53
54 struct SWR_CONTEXT;
55 struct DRAW_CONTEXT;
56
57 struct TRI_FLAGS
58 {
59 uint32_t frontFacing : 1;
60 uint32_t yMajor : 1;
61 uint32_t coverageMask : (SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM);
62 uint32_t reserved : 32 - 1 - 1 - (SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM);
63 float pointSize;
64 uint32_t primID;
65 uint32_t renderTargetArrayIndex;
66 };
67
68 //////////////////////////////////////////////////////////////////////////
69 /// SWR_TRIANGLE_DESC
70 /////////////////////////////////////////////////////////////////////////
71 struct SWR_TRIANGLE_DESC
72 {
73 float I[3];
74 float J[3];
75 float Z[3];
76 float OneOverW[3];
77 float recipDet;
78
79 float *pRecipW;
80 float *pAttribs;
81 float *pPerspAttribs;
82 float *pSamplePos;
83 float *pUserClipBuffer;
84
85 uint64_t coverageMask[SWR_MAX_NUM_MULTISAMPLES];
86 uint64_t innerCoverageMask; // Conservative rasterization inner coverage: marked covered if entire pixel is covered
87 uint64_t anyCoveredSamples;
88
89 TRI_FLAGS triFlags;
90 };
91
92 struct TRIANGLE_WORK_DESC
93 {
94 float *pTriBuffer;
95 float *pAttribs;
96 float *pUserClipBuffer;
97 uint32_t numAttribs;
98 TRI_FLAGS triFlags;
99 };
100
101 union CLEAR_FLAGS
102 {
103 struct
104 {
105 uint32_t mask : 3;
106 };
107 uint32_t bits;
108 };
109
110 struct CLEAR_DESC
111 {
112 CLEAR_FLAGS flags;
113 float clearRTColor[4]; // RGBA_32F
114 float clearDepth; // [0..1]
115 uint8_t clearStencil;
116 };
117
118 struct DISCARD_INVALIDATE_TILES_DESC
119 {
120 uint32_t attachmentMask;
121 SWR_RECT rect;
122 SWR_TILE_STATE newTileState;
123 bool createNewTiles;
124 bool fullTilesOnly;
125 };
126
127 struct SYNC_DESC
128 {
129 PFN_CALLBACK_FUNC pfnCallbackFunc;
130 uint64_t userData;
131 uint64_t userData2;
132 uint64_t userData3;
133 };
134
135 struct STORE_TILES_DESC
136 {
137 SWR_RENDERTARGET_ATTACHMENT attachment;
138 SWR_TILE_STATE postStoreTileState;
139 };
140
141 struct COMPUTE_DESC
142 {
143 uint32_t threadGroupCountX;
144 uint32_t threadGroupCountY;
145 uint32_t threadGroupCountZ;
146 };
147
148 typedef void(*PFN_WORK_FUNC)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc);
149
150 enum WORK_TYPE
151 {
152 SYNC,
153 DRAW,
154 CLEAR,
155 DISCARDINVALIDATETILES,
156 STORETILES,
157 };
158
159 struct BE_WORK
160 {
161 WORK_TYPE type;
162 PFN_WORK_FUNC pfnWork;
163 union
164 {
165 SYNC_DESC sync;
166 TRIANGLE_WORK_DESC tri;
167 CLEAR_DESC clear;
168 DISCARD_INVALIDATE_TILES_DESC discardInvalidateTiles;
169 STORE_TILES_DESC storeTiles;
170 } desc;
171 };
172
173 struct DRAW_WORK
174 {
175 DRAW_CONTEXT* pDC;
176 union
177 {
178 uint32_t numIndices; // DrawIndexed: Number of indices for draw.
179 uint32_t numVerts; // Draw: Number of verts (triangles, lines, etc)
180 };
181 union
182 {
183 const int32_t* pIB; // DrawIndexed: App supplied indices
184 uint32_t startVertex; // Draw: Starting vertex in VB to render from.
185 };
186 int32_t baseVertex;
187 uint32_t numInstances; // Number of instances
188 uint32_t startInstance; // Instance offset
189 uint32_t startPrimID; // starting primitiveID for this draw batch
190 uint32_t startVertexID; // starting VertexID for this draw batch (only needed for non-indexed draws)
191 SWR_FORMAT type; // index buffer type
192 };
193
194 typedef void(*PFN_FE_WORK_FUNC)(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pDesc);
195 struct FE_WORK
196 {
197 WORK_TYPE type;
198 PFN_FE_WORK_FUNC pfnWork;
199 union
200 {
201 SYNC_DESC sync;
202 DRAW_WORK draw;
203 CLEAR_DESC clear;
204 DISCARD_INVALIDATE_TILES_DESC discardInvalidateTiles;
205 STORE_TILES_DESC storeTiles;
206 } desc;
207 };
208
209 struct GUARDBAND
210 {
211 float left, right, top, bottom;
212 };
213
214 struct PA_STATE;
215
216 // function signature for pipeline stages that execute after primitive assembly
217 typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[],
218 uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
219
220 OSALIGNLINE(struct) API_STATE
221 {
222 // Vertex Buffers
223 SWR_VERTEX_BUFFER_STATE vertexBuffers[KNOB_NUM_STREAMS];
224
225 // Index Buffer
226 SWR_INDEX_BUFFER_STATE indexBuffer;
227
228 // FS - Fetch Shader State
229 PFN_FETCH_FUNC pfnFetchFunc;
230
231 // VS - Vertex Shader State
232 PFN_VERTEX_FUNC pfnVertexFunc;
233
234 // GS - Geometry Shader State
235 PFN_GS_FUNC pfnGsFunc;
236 SWR_GS_STATE gsState;
237
238 // CS - Compute Shader
239 PFN_CS_FUNC pfnCsFunc;
240 uint32_t totalThreadsInGroup;
241 uint32_t totalSpillFillSize;
242
243 // FE - Frontend State
244 SWR_FRONTEND_STATE frontendState;
245
246 // SOS - Streamout Shader State
247 PFN_SO_FUNC pfnSoFunc[MAX_SO_STREAMS];
248
249 // Streamout state
250 SWR_STREAMOUT_STATE soState;
251 mutable SWR_STREAMOUT_BUFFER soBuffer[MAX_SO_STREAMS];
252
253 // Tessellation State
254 PFN_HS_FUNC pfnHsFunc;
255 PFN_DS_FUNC pfnDsFunc;
256 SWR_TS_STATE tsState;
257
258 // Number of attributes used by the frontend (vs, so, gs)
259 uint32_t feNumAttributes;
260
261 PRIMITIVE_TOPOLOGY topology;
262 bool forceFront;
263
264 // RS - Rasterizer State
265 SWR_RASTSTATE rastState;
266 // floating point multisample offsets
267 float samplePos[SWR_MAX_NUM_MULTISAMPLES * 2];
268
269 GUARDBAND gbState;
270
271 SWR_VIEWPORT vp[KNOB_NUM_VIEWPORTS_SCISSORS];
272 SWR_VIEWPORT_MATRICES vpMatrices;
273
274 BBOX scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
275 BBOX scissorInFixedPoint;
276
277 // Backend state
278 SWR_BACKEND_STATE backendState;
279
280 // PS - Pixel shader state
281 SWR_PS_STATE psState;
282
283 SWR_DEPTH_STENCIL_STATE depthStencilState;
284
285 // OM - Output Merger State
286 SWR_BLEND_STATE blendState;
287 PFN_BLEND_JIT_FUNC pfnBlendFunc[SWR_NUM_RENDERTARGETS];
288
289 // Stats are incremented when this is true.
290 bool enableStats;
291
292 struct
293 {
294 uint32_t colorHottileEnable : 8;
295 uint32_t depthHottileEnable: 1;
296 uint32_t stencilHottileEnable : 1;
297 };
298
299 PFN_QUANTIZE_DEPTH pfnQuantizeDepth;
300 };
301
302 class MacroTileMgr;
303 class DispatchQueue;
304
305 struct RenderOutputBuffers
306 {
307 uint8_t* pColor[SWR_NUM_RENDERTARGETS];
308 uint8_t* pDepth;
309 uint8_t* pStencil;
310 };
311
312 // Plane equation A/B/C coeffs used to evaluate I/J barycentric coords
313 struct BarycentricCoeffs
314 {
315 simdscalar vIa;
316 simdscalar vIb;
317 simdscalar vIc;
318
319 simdscalar vJa;
320 simdscalar vJb;
321 simdscalar vJc;
322
323 simdscalar vZa;
324 simdscalar vZb;
325 simdscalar vZc;
326
327 simdscalar vRecipDet;
328
329 simdscalar vAOneOverW;
330 simdscalar vBOneOverW;
331 simdscalar vCOneOverW;
332 };
333
334 // pipeline function pointer types
335 typedef void(*PFN_BACKEND_FUNC)(DRAW_CONTEXT*, uint32_t, uint32_t, uint32_t, SWR_TRIANGLE_DESC&, RenderOutputBuffers&);
336 typedef void(*PFN_OUTPUT_MERGER)(SWR_PS_CONTEXT &, uint8_t* (&)[SWR_NUM_RENDERTARGETS], uint32_t, const SWR_BLEND_STATE*,
337 const PFN_BLEND_JIT_FUNC (&)[SWR_NUM_RENDERTARGETS], simdscalar&, simdscalar);
338 typedef void(*PFN_CALC_PIXEL_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT &);
339 typedef void(*PFN_CALC_SAMPLE_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT&);
340 typedef void(*PFN_CALC_CENTROID_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT &, const uint64_t *const, const uint32_t,
341 const simdscalar, const simdscalar);
342
343 struct BACKEND_FUNCS
344 {
345 PFN_BACKEND_FUNC pfnBackend;
346 };
347
348 // Draw State
349 struct DRAW_STATE
350 {
351 API_STATE state;
352
353 void* pPrivateState; // Its required the driver sets this up for each draw.
354
355 // pipeline function pointers, filled in by API thread when setting up the draw
356 BACKEND_FUNCS backendFuncs;
357 PFN_PROCESS_PRIMS pfnProcessPrims;
358
359 CachingArena* pArena; // This should only be used by API thread.
360 };
361
362 struct DRAW_DYNAMIC_STATE
363 {
364 ///@todo Currently assumes only a single FE can do stream output for a draw.
365 uint32_t SoWriteOffset[4];
366 bool SoWriteOffsetDirty[4];
367
368 SWR_STATS_FE statsFE; // Only one FE thread per DC.
369 SWR_STATS stats[KNOB_MAX_NUM_THREADS];
370 };
371
372 // Draw Context
373 // The api thread sets up a draw context that exists for the life of the draw.
374 // This draw context maintains all of the state needed for the draw operation.
375 struct DRAW_CONTEXT
376 {
377 SWR_CONTEXT* pContext;
378 union
379 {
380 MacroTileMgr* pTileMgr;
381 DispatchQueue* pDispatch; // Queue for thread groups. (isCompute)
382 };
383 DRAW_STATE* pState; // Read-only state. Core should not update this outside of API thread.
384 DRAW_DYNAMIC_STATE dynState;
385
386 CachingArena* pArena;
387
388 uint32_t drawId;
389 bool dependent;
390 bool isCompute; // Is this DC a compute context?
391 bool cleanupState; // True if this is the last draw using an entry in the state ring.
392 volatile bool doneFE; // Is FE work done for this draw?
393
394 FE_WORK FeWork;
395
396 volatile OSALIGNLINE(uint32_t) FeLock;
397 volatile int32_t threadsDone;
398
399 SYNC_DESC retireCallback; // Call this func when this DC is retired.
400 };
401
402 static_assert((sizeof(DRAW_CONTEXT) & 63) == 0, "Invalid size for DRAW_CONTEXT");
403
404 INLINE const API_STATE& GetApiState(const DRAW_CONTEXT* pDC)
405 {
406 SWR_ASSERT(pDC != nullptr);
407 SWR_ASSERT(pDC->pState != nullptr);
408
409 return pDC->pState->state;
410 }
411
412 INLINE void* GetPrivateState(const DRAW_CONTEXT* pDC)
413 {
414 SWR_ASSERT(pDC != nullptr);
415 SWR_ASSERT(pDC->pState != nullptr);
416
417 return pDC->pState->pPrivateState;
418 }
419
420 class HotTileMgr;
421
422 struct SWR_CONTEXT
423 {
424 // Draw Context Ring
425 // Each draw needs its own state in order to support mulitple draws in flight across multiple threads.
426 // We maintain N draw contexts configured as a ring. The size of the ring limits the maximum number
427 // of draws that can be in flight at any given time.
428 //
429 // Description:
430 // 1. State - When an application first sets state we'll request a new draw context to use.
431 // a. If there are no available draw contexts then we'll have to wait until one becomes free.
432 // b. If one is available then set pCurDrawContext to point to it and mark it in use.
433 // c. All state calls set state on pCurDrawContext.
434 // 2. Draw - Creates submits a work item that is associated with current draw context.
435 // a. Set pPrevDrawContext = pCurDrawContext
436 // b. Set pCurDrawContext to NULL.
437 // 3. State - When an applications sets state after draw
438 // a. Same as step 1.
439 // b. State is copied from prev draw context to current.
440 RingBuffer<DRAW_CONTEXT> dcRing;
441
442 DRAW_CONTEXT *pCurDrawContext; // This points to DC entry in ring for an unsubmitted draw.
443 DRAW_CONTEXT *pPrevDrawContext; // This points to DC entry for the previous context submitted that we can copy state from.
444
445 MacroTileMgr* pMacroTileManagerArray;
446 DispatchQueue* pDispatchQueueArray;
447
448 // Draw State Ring
449 // When draw are very large (lots of primitives) then the API thread will break these up.
450 // These split draws all have identical state. So instead of storing the state directly
451 // in the Draw Context (DC) we instead store it in a Draw State (DS). This allows multiple DCs
452 // to reference a single entry in the DS ring.
453 RingBuffer<DRAW_STATE> dsRing;
454
455 uint32_t curStateId; // Current index to the next available entry in the DS ring.
456
457 uint32_t NumWorkerThreads;
458 uint32_t NumFEThreads;
459 uint32_t NumBEThreads;
460
461 THREAD_POOL threadPool; // Thread pool associated with this context
462 SWR_THREADING_INFO threadInfo;
463
464 std::condition_variable FifosNotEmpty;
465 std::mutex WaitLock;
466
467 DRIVER_TYPE driverType;
468
469 uint32_t privateStateSize;
470
471 HotTileMgr *pHotTileMgr;
472
473 // Callback functions, passed in at create context time
474 PFN_LOAD_TILE pfnLoadTile;
475 PFN_STORE_TILE pfnStoreTile;
476 PFN_CLEAR_TILE pfnClearTile;
477 PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
478 PFN_UPDATE_STATS pfnUpdateStats;
479 PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
480
481 // Global Stats
482 SWR_STATS stats[KNOB_MAX_NUM_THREADS];
483
484 // Scratch space for workers.
485 uint8_t* pScratch[KNOB_MAX_NUM_THREADS];
486
487 volatile int32_t drawsOutstandingFE;
488
489 CachingAllocator cachingArenaAllocator;
490 uint32_t frameCount;
491 };
492
493 void WaitForDependencies(SWR_CONTEXT *pContext, uint64_t drawId);
494 void WakeAllThreads(SWR_CONTEXT *pContext);
495
496 #define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.stats[workerId].name += count; }
497 #define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.statsFE.name += count; }