39f23372a18e04e52795913d83370ff4f42a304b
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / context.h
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file context.h
24 *
25 * @brief Definitions for SWR_CONTEXT and DRAW_CONTEXT
26 * The SWR_CONTEXT is our global context and contains the DC ring,
27 * thread state, etc.
28 *
29 * The DRAW_CONTEXT contains all state associated with a draw operation.
30 *
31 ******************************************************************************/
32 #pragma once
33
34 #include <condition_variable>
35 #include <algorithm>
36
37 #include "core/api.h"
38 #include "core/utils.h"
39 #include "core/arena.h"
40 #include "core/fifo.hpp"
41 #include "core/knobs.h"
42 #include "common/simdintrin.h"
43 #include "core/threads.h"
44 #include "ringbuffer.h"
45
46 // x.8 fixed point precision values
47 #define FIXED_POINT_SHIFT 8
48 #define FIXED_POINT_SCALE 256
49
50 // x.16 fixed point precision values
51 #define FIXED_POINT16_SHIFT 16
52 #define FIXED_POINT16_SCALE 65536
53
54 struct SWR_CONTEXT;
55 struct DRAW_CONTEXT;
56
57 struct TRI_FLAGS
58 {
59 uint32_t frontFacing : 1;
60 uint32_t yMajor : 1;
61 uint32_t coverageMask : (SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM);
62 uint32_t reserved : 32 - 1 - 1 - (SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM);
63 float pointSize;
64 uint32_t primID;
65 uint32_t renderTargetArrayIndex;
66 };
67
68 //////////////////////////////////////////////////////////////////////////
69 /// SWR_TRIANGLE_DESC
70 /////////////////////////////////////////////////////////////////////////
71 struct SWR_TRIANGLE_DESC
72 {
73 float I[3];
74 float J[3];
75 float Z[3];
76 float OneOverW[3];
77 float recipDet;
78
79 float *pRecipW;
80 float *pAttribs;
81 float *pPerspAttribs;
82 float *pSamplePos;
83 float *pUserClipBuffer;
84
85 uint64_t coverageMask[SWR_MAX_NUM_MULTISAMPLES];
86 uint64_t anyCoveredSamples;
87
88 TRI_FLAGS triFlags;
89 };
90
91 struct TRIANGLE_WORK_DESC
92 {
93 float *pTriBuffer;
94 float *pAttribs;
95 float *pUserClipBuffer;
96 uint32_t numAttribs;
97 TRI_FLAGS triFlags;
98 };
99
100 union CLEAR_FLAGS
101 {
102 struct
103 {
104 uint32_t mask : 3;
105 };
106 uint32_t bits;
107 };
108
109 struct CLEAR_DESC
110 {
111 CLEAR_FLAGS flags;
112 float clearRTColor[4]; // RGBA_32F
113 float clearDepth; // [0..1]
114 uint8_t clearStencil;
115 };
116
117 struct DISCARD_INVALIDATE_TILES_DESC
118 {
119 uint32_t attachmentMask;
120 SWR_RECT rect;
121 SWR_TILE_STATE newTileState;
122 bool createNewTiles;
123 bool fullTilesOnly;
124 };
125
126 struct SYNC_DESC
127 {
128 PFN_CALLBACK_FUNC pfnCallbackFunc;
129 uint64_t userData;
130 uint64_t userData2;
131 uint64_t userData3;
132 };
133
134 struct QUERY_DESC
135 {
136 SWR_STATS* pStats;
137 };
138
139 struct STORE_TILES_DESC
140 {
141 SWR_RENDERTARGET_ATTACHMENT attachment;
142 SWR_TILE_STATE postStoreTileState;
143 };
144
145 struct COMPUTE_DESC
146 {
147 uint32_t threadGroupCountX;
148 uint32_t threadGroupCountY;
149 uint32_t threadGroupCountZ;
150 };
151
152 typedef void(*PFN_WORK_FUNC)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc);
153
154 enum WORK_TYPE
155 {
156 SYNC,
157 DRAW,
158 CLEAR,
159 DISCARDINVALIDATETILES,
160 STORETILES,
161 QUERYSTATS,
162 };
163
164 struct BE_WORK
165 {
166 WORK_TYPE type;
167 PFN_WORK_FUNC pfnWork;
168 union
169 {
170 SYNC_DESC sync;
171 TRIANGLE_WORK_DESC tri;
172 CLEAR_DESC clear;
173 DISCARD_INVALIDATE_TILES_DESC discardInvalidateTiles;
174 STORE_TILES_DESC storeTiles;
175 QUERY_DESC queryStats;
176 } desc;
177 };
178
179 struct DRAW_WORK
180 {
181 DRAW_CONTEXT* pDC;
182 union
183 {
184 uint32_t numIndices; // DrawIndexed: Number of indices for draw.
185 uint32_t numVerts; // Draw: Number of verts (triangles, lines, etc)
186 };
187 union
188 {
189 const int32_t* pIB; // DrawIndexed: App supplied indices
190 uint32_t startVertex; // Draw: Starting vertex in VB to render from.
191 };
192 int32_t baseVertex;
193 uint32_t numInstances; // Number of instances
194 uint32_t startInstance; // Instance offset
195 uint32_t startPrimID; // starting primitiveID for this draw batch
196 uint32_t startVertexID; // starting VertexID for this draw batch (only needed for non-indexed draws)
197 SWR_FORMAT type; // index buffer type
198 };
199
200 typedef void(*PFN_FE_WORK_FUNC)(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pDesc);
201 struct FE_WORK
202 {
203 WORK_TYPE type;
204 PFN_FE_WORK_FUNC pfnWork;
205 union
206 {
207 SYNC_DESC sync;
208 DRAW_WORK draw;
209 CLEAR_DESC clear;
210 DISCARD_INVALIDATE_TILES_DESC discardInvalidateTiles;
211 STORE_TILES_DESC storeTiles;
212 QUERY_DESC queryStats;
213 } desc;
214 };
215
216 struct GUARDBAND
217 {
218 float left, right, top, bottom;
219 };
220
221 struct PA_STATE;
222
223 // function signature for pipeline stages that execute after primitive assembly
224 typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[],
225 uint32_t primMask, simdscalari primID);
226
227 OSALIGNLINE(struct) API_STATE
228 {
229 // Vertex Buffers
230 SWR_VERTEX_BUFFER_STATE vertexBuffers[KNOB_NUM_STREAMS];
231
232 // Index Buffer
233 SWR_INDEX_BUFFER_STATE indexBuffer;
234
235 // FS - Fetch Shader State
236 PFN_FETCH_FUNC pfnFetchFunc;
237
238 // VS - Vertex Shader State
239 PFN_VERTEX_FUNC pfnVertexFunc;
240
241 // GS - Geometry Shader State
242 PFN_GS_FUNC pfnGsFunc;
243 SWR_GS_STATE gsState;
244
245 // CS - Compute Shader
246 PFN_CS_FUNC pfnCsFunc;
247 uint32_t totalThreadsInGroup;
248
249 // FE - Frontend State
250 SWR_FRONTEND_STATE frontendState;
251
252 // SOS - Streamout Shader State
253 PFN_SO_FUNC pfnSoFunc[MAX_SO_STREAMS];
254
255 // Streamout state
256 SWR_STREAMOUT_STATE soState;
257 mutable SWR_STREAMOUT_BUFFER soBuffer[MAX_SO_STREAMS];
258
259 // Tessellation State
260 PFN_HS_FUNC pfnHsFunc;
261 PFN_DS_FUNC pfnDsFunc;
262 SWR_TS_STATE tsState;
263
264 // Specifies which VS outputs are sent to PS.
265 // Does not include position
266 uint32_t linkageMask;
267 uint32_t linkageCount;
268 uint8_t linkageMap[MAX_ATTRIBUTES];
269
270 // attrib mask, specifies the total set of attributes used
271 // by the frontend (vs, so, gs)
272 uint32_t feAttribMask;
273
274 PRIMITIVE_TOPOLOGY topology;
275 bool forceFront;
276
277 // RS - Rasterizer State
278 SWR_RASTSTATE rastState;
279 // floating point multisample offsets
280 float samplePos[SWR_MAX_NUM_MULTISAMPLES * 2];
281
282 GUARDBAND gbState;
283
284 SWR_VIEWPORT vp[KNOB_NUM_VIEWPORTS_SCISSORS];
285 SWR_VIEWPORT_MATRIX vpMatrix[KNOB_NUM_VIEWPORTS_SCISSORS];
286
287 BBOX scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
288 BBOX scissorInFixedPoint;
289
290 // Backend state
291 SWR_BACKEND_STATE backendState;
292
293 // PS - Pixel shader state
294 SWR_PS_STATE psState;
295
296 SWR_DEPTH_STENCIL_STATE depthStencilState;
297
298 // OM - Output Merger State
299 SWR_BLEND_STATE blendState;
300 PFN_BLEND_JIT_FUNC pfnBlendFunc[SWR_NUM_RENDERTARGETS];
301
302 // Stats are incremented when this is true.
303 bool enableStats;
304
305 struct
306 {
307 uint32_t colorHottileEnable : 8;
308 uint32_t depthHottileEnable: 1;
309 uint32_t stencilHottileEnable : 1;
310 };
311 };
312
313 class MacroTileMgr;
314 class DispatchQueue;
315
316 struct RenderOutputBuffers
317 {
318 uint8_t* pColor[SWR_NUM_RENDERTARGETS];
319 uint8_t* pDepth;
320 uint8_t* pStencil;
321 };
322
323 // Plane equation A/B/C coeffs used to evaluate I/J barycentric coords
324 struct BarycentricCoeffs
325 {
326 simdscalar vIa;
327 simdscalar vIb;
328 simdscalar vIc;
329
330 simdscalar vJa;
331 simdscalar vJb;
332 simdscalar vJc;
333
334 simdscalar vZa;
335 simdscalar vZb;
336 simdscalar vZc;
337
338 simdscalar vRecipDet;
339
340 simdscalar vAOneOverW;
341 simdscalar vBOneOverW;
342 simdscalar vCOneOverW;
343 };
344
345 // pipeline function pointer types
346 typedef void(*PFN_BACKEND_FUNC)(DRAW_CONTEXT*, uint32_t, uint32_t, uint32_t, SWR_TRIANGLE_DESC&, RenderOutputBuffers&);
347 typedef void(*PFN_OUTPUT_MERGER)(SWR_PS_CONTEXT &, uint8_t* (&)[SWR_NUM_RENDERTARGETS], uint32_t, const SWR_BLEND_STATE*,
348 const PFN_BLEND_JIT_FUNC (&)[SWR_NUM_RENDERTARGETS], simdscalar&, simdscalar);
349 typedef void(*PFN_CALC_PIXEL_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT &);
350 typedef void(*PFN_CALC_SAMPLE_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT&);
351 typedef void(*PFN_CALC_CENTROID_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT &, const uint64_t *const, const uint32_t,
352 const simdscalar, const simdscalar);
353
354 struct BACKEND_FUNCS
355 {
356 PFN_BACKEND_FUNC pfnBackend;
357 PFN_CALC_PIXEL_BARYCENTRICS pfnCalcPixelBarycentrics;
358 PFN_CALC_SAMPLE_BARYCENTRICS pfnCalcSampleBarycentrics;
359 PFN_CALC_CENTROID_BARYCENTRICS pfnCalcCentroidBarycentrics;
360 PFN_OUTPUT_MERGER pfnOutputMerger;
361 };
362
363
364 // Draw State
365 struct DRAW_STATE
366 {
367 API_STATE state;
368
369 void* pPrivateState; // Its required the driver sets this up for each draw.
370
371 // pipeline function pointers, filled in by API thread when setting up the draw
372 BACKEND_FUNCS backendFuncs;
373 PFN_PROCESS_PRIMS pfnProcessPrims;
374
375 CachingArena* pArena; // This should only be used by API thread.
376 };
377
378 // Draw Context
379 // The api thread sets up a draw context that exists for the life of the draw.
380 // This draw context maintains all of the state needed for the draw operation.
381 struct DRAW_CONTEXT
382 {
383 SWR_CONTEXT *pContext;
384
385 uint64_t drawId;
386
387 bool isCompute; // Is this DC a compute context?
388
389 FE_WORK FeWork;
390 volatile OSALIGNLINE(uint32_t) FeLock;
391 volatile OSALIGNLINE(bool) doneFE; // Is FE work done for this draw?
392 volatile OSALIGNLINE(int64_t) threadsDone;
393
394 uint64_t dependency;
395
396 MacroTileMgr* pTileMgr;
397
398 // The following fields are valid if isCompute is true.
399 DispatchQueue* pDispatch; // Queue for thread groups. (isCompute)
400
401 DRAW_STATE* pState;
402 CachingArena* pArena;
403
404 uint8_t* pSpillFill[KNOB_MAX_NUM_THREADS]; // Scratch space used for spill fills.
405
406 bool cleanupState; // True if this is the last draw using an entry in the state ring.
407 };
408
409 INLINE const API_STATE& GetApiState(const DRAW_CONTEXT* pDC)
410 {
411 SWR_ASSERT(pDC != nullptr);
412 SWR_ASSERT(pDC->pState != nullptr);
413
414 return pDC->pState->state;
415 }
416
417 INLINE void* GetPrivateState(const DRAW_CONTEXT* pDC)
418 {
419 SWR_ASSERT(pDC != nullptr);
420 SWR_ASSERT(pDC->pState != nullptr);
421
422 return pDC->pState->pPrivateState;
423 }
424
425 class HotTileMgr;
426
427 struct SWR_CONTEXT
428 {
429 // Draw Context Ring
430 // Each draw needs its own state in order to support mulitple draws in flight across multiple threads.
431 // We maintain N draw contexts configured as a ring. The size of the ring limits the maximum number
432 // of draws that can be in flight at any given time.
433 //
434 // Description:
435 // 1. State - When an application first sets state we'll request a new draw context to use.
436 // a. If there are no available draw contexts then we'll have to wait until one becomes free.
437 // b. If one is available then set pCurDrawContext to point to it and mark it in use.
438 // c. All state calls set state on pCurDrawContext.
439 // 2. Draw - Creates submits a work item that is associated with current draw context.
440 // a. Set pPrevDrawContext = pCurDrawContext
441 // b. Set pCurDrawContext to NULL.
442 // 3. State - When an applications sets state after draw
443 // a. Same as step 1.
444 // b. State is copied from prev draw context to current.
445 RingBuffer<DRAW_CONTEXT> dcRing;
446
447 DRAW_CONTEXT *pCurDrawContext; // This points to DC entry in ring for an unsubmitted draw.
448 DRAW_CONTEXT *pPrevDrawContext; // This points to DC entry for the previous context submitted that we can copy state from.
449
450 // Draw State Ring
451 // When draw are very large (lots of primitives) then the API thread will break these up.
452 // These split draws all have identical state. So instead of storing the state directly
453 // in the Draw Context (DC) we instead store it in a Draw State (DS). This allows multiple DCs
454 // to reference a single entry in the DS ring.
455 RingBuffer<DRAW_STATE> dsRing;
456
457 uint32_t curStateId; // Current index to the next available entry in the DS ring.
458
459 uint32_t NumWorkerThreads;
460
461 THREAD_POOL threadPool; // Thread pool associated with this context
462
463 std::condition_variable FifosNotEmpty;
464 std::mutex WaitLock;
465
466 DRIVER_TYPE driverType;
467
468 uint32_t privateStateSize;
469
470 HotTileMgr *pHotTileMgr;
471
472 // tile load/store functions, passed in at create context time
473 PFN_LOAD_TILE pfnLoadTile;
474 PFN_STORE_TILE pfnStoreTile;
475 PFN_CLEAR_TILE pfnClearTile;
476
477 // Global Stats
478 SWR_STATS stats[KNOB_MAX_NUM_THREADS];
479
480 // Scratch space for workers.
481 uint8_t* pScratch[KNOB_MAX_NUM_THREADS];
482
483 CachingAllocator cachingArenaAllocator;
484 };
485
486 void WaitForDependencies(SWR_CONTEXT *pContext, uint64_t drawId);
487 void WakeAllThreads(SWR_CONTEXT *pContext);
488
489 #define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pContext->stats[workerId].name += count; }
490 #define SET_STAT(name, count) if (GetApiState(pDC).enableStats) { pContext->stats[workerId].name = count; }