gallium/swr: add OpenSWR rasterizer
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / context.h
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file context.h
24 *
25 * @brief Definitions for SWR_CONTEXT and DRAW_CONTEXT
26 * The SWR_CONTEXT is our global context and contains the DC ring,
27 * thread state, etc.
28 *
29 * The DRAW_CONTEXT contains all state associated with a draw operation.
30 *
31 ******************************************************************************/
32 #pragma once
33
34 #include <condition_variable>
35 #include <algorithm>
36
37 #include "core/api.h"
38 #include "core/utils.h"
39 #include "core/arena.h"
40 #include "core/fifo.hpp"
41 #include "core/knobs.h"
42 #include "common/simdintrin.h"
43 #include "core/threads.h"
44
45 // x.8 fixed point precision values
46 #define FIXED_POINT_SHIFT 8
47 #define FIXED_POINT_SCALE 256
48
49 // x.16 fixed point precision values
50 #define FIXED_POINT16_SHIFT 16
51 #define FIXED_POINT16_SCALE 65536
52
53 struct SWR_CONTEXT;
54 struct DRAW_CONTEXT;
55
56 struct TRI_FLAGS
57 {
58 uint32_t frontFacing : 1;
59 uint32_t yMajor : 1;
60 uint32_t coverageMask : (SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM);
61 uint32_t reserved : 32 - 1 - 1 - (SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM);
62 float pointSize;
63 uint32_t primID;
64 uint32_t renderTargetArrayIndex;
65 };
66
67 //////////////////////////////////////////////////////////////////////////
68 /// SWR_TRIANGLE_DESC
69 /////////////////////////////////////////////////////////////////////////
70 struct SWR_TRIANGLE_DESC
71 {
72 float I[3];
73 float J[3];
74 float Z[3];
75 float OneOverW[3];
76 float recipDet;
77
78 float *pRecipW;
79 float *pAttribs;
80 float *pPerspAttribs;
81 float *pSamplePos;
82 float *pUserClipBuffer;
83
84 uint64_t coverageMask[SWR_MAX_NUM_MULTISAMPLES];
85
86 TRI_FLAGS triFlags;
87 };
88
89 struct TRIANGLE_WORK_DESC
90 {
91 float *pTriBuffer;
92 float *pAttribs;
93 float *pUserClipBuffer;
94 uint32_t numAttribs;
95 TRI_FLAGS triFlags;
96 };
97
98 union CLEAR_FLAGS
99 {
100 struct
101 {
102 uint32_t mask : 3;
103 };
104 uint32_t bits;
105 };
106
107 struct CLEAR_DESC
108 {
109 CLEAR_FLAGS flags;
110 float clearRTColor[4]; // RGBA_32F
111 float clearDepth; // [0..1]
112 BYTE clearStencil;
113 };
114
115 struct INVALIDATE_TILES_DESC
116 {
117 uint32_t attachmentMask;
118 };
119
120 struct SYNC_DESC
121 {
122 PFN_CALLBACK_FUNC pfnCallbackFunc;
123 uint64_t userData;
124 uint64_t userData2;
125 uint64_t userData3;
126 };
127
128 struct QUERY_DESC
129 {
130 SWR_STATS* pStats;
131 };
132
133 struct STORE_TILES_DESC
134 {
135 SWR_RENDERTARGET_ATTACHMENT attachment;
136 SWR_TILE_STATE postStoreTileState;
137 };
138
139 struct COMPUTE_DESC
140 {
141 uint32_t threadGroupCountX;
142 uint32_t threadGroupCountY;
143 uint32_t threadGroupCountZ;
144 };
145
146 typedef void(*PFN_WORK_FUNC)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc);
147
148 enum WORK_TYPE
149 {
150 SYNC,
151 DRAW,
152 CLEAR,
153 INVALIDATETILES,
154 STORETILES,
155 QUERYSTATS,
156 };
157
158 struct BE_WORK
159 {
160 WORK_TYPE type;
161 PFN_WORK_FUNC pfnWork;
162 union
163 {
164 SYNC_DESC sync;
165 TRIANGLE_WORK_DESC tri;
166 CLEAR_DESC clear;
167 INVALIDATE_TILES_DESC invalidateTiles;
168 STORE_TILES_DESC storeTiles;
169 QUERY_DESC queryStats;
170 } desc;
171 };
172
173 struct DRAW_WORK
174 {
175 DRAW_CONTEXT* pDC;
176 union
177 {
178 uint32_t numIndices; // DrawIndexed: Number of indices for draw.
179 uint32_t numVerts; // Draw: Number of verts (triangles, lines, etc)
180 };
181 union
182 {
183 const int32_t* pIB; // DrawIndexed: App supplied indices
184 uint32_t startVertex; // Draw: Starting vertex in VB to render from.
185 };
186 int32_t baseVertex;
187 uint32_t numInstances; // Number of instances
188 uint32_t startInstance; // Instance offset
189 uint32_t startPrimID; // starting primitiveID for this draw batch
190 uint32_t startVertexID; // starting VertexID for this draw batch (only needed for non-indexed draws)
191 SWR_FORMAT type; // index buffer type
192 };
193
194 typedef void(*PFN_FE_WORK_FUNC)(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pDesc);
195 struct FE_WORK
196 {
197 WORK_TYPE type;
198 PFN_FE_WORK_FUNC pfnWork;
199 union
200 {
201 SYNC_DESC sync;
202 DRAW_WORK draw;
203 CLEAR_DESC clear;
204 INVALIDATE_TILES_DESC invalidateTiles;
205 STORE_TILES_DESC storeTiles;
206 QUERY_DESC queryStats;
207 } desc;
208 };
209
210 struct GUARDBAND
211 {
212 float left, right, top, bottom;
213 };
214
215 struct PA_STATE;
216
217 // function signature for pipeline stages that execute after primitive assembly
218 typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[],
219 uint32_t primMask, simdscalari primID);
220
221 OSALIGNLINE(struct) API_STATE
222 {
223 // Vertex Buffers
224 SWR_VERTEX_BUFFER_STATE vertexBuffers[KNOB_NUM_STREAMS];
225
226 // Index Buffer
227 SWR_INDEX_BUFFER_STATE indexBuffer;
228
229 // FS - Fetch Shader State
230 PFN_FETCH_FUNC pfnFetchFunc;
231
232 // VS - Vertex Shader State
233 PFN_VERTEX_FUNC pfnVertexFunc;
234
235 // GS - Geometry Shader State
236 PFN_GS_FUNC pfnGsFunc;
237 SWR_GS_STATE gsState;
238
239 // CS - Compute Shader
240 PFN_CS_FUNC pfnCsFunc;
241 uint32_t totalThreadsInGroup;
242
243 // FE - Frontend State
244 SWR_FRONTEND_STATE frontendState;
245
246 // SOS - Streamout Shader State
247 PFN_SO_FUNC pfnSoFunc[MAX_SO_STREAMS];
248
249 // Streamout state
250 SWR_STREAMOUT_STATE soState;
251 mutable SWR_STREAMOUT_BUFFER soBuffer[MAX_SO_STREAMS];
252
253 // Tessellation State
254 PFN_HS_FUNC pfnHsFunc;
255 PFN_DS_FUNC pfnDsFunc;
256 SWR_TS_STATE tsState;
257
258 // Specifies which VS outputs are sent to PS.
259 // Does not include position
260 uint32_t linkageMask;
261 uint32_t linkageCount;
262 uint8_t linkageMap[MAX_ATTRIBUTES];
263
264 // attrib mask, specifies the total set of attributes used
265 // by the frontend (vs, so, gs)
266 uint32_t feAttribMask;
267
268 PRIMITIVE_TOPOLOGY topology;
269 bool forceFront;
270
271 // RS - Rasterizer State
272 SWR_RASTSTATE rastState;
273 // floating point multisample offsets
274 float samplePos[SWR_MAX_NUM_MULTISAMPLES * 2];
275
276 GUARDBAND gbState;
277
278 SWR_VIEWPORT vp[KNOB_NUM_VIEWPORTS_SCISSORS];
279 SWR_VIEWPORT_MATRIX vpMatrix[KNOB_NUM_VIEWPORTS_SCISSORS];
280
281 BBOX scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
282 BBOX scissorInFixedPoint;
283
284 // Backend state
285 SWR_BACKEND_STATE backendState;
286
287 // PS - Pixel shader state
288 SWR_PS_STATE psState;
289
290 SWR_DEPTH_STENCIL_STATE depthStencilState;
291
292 // OM - Output Merger State
293 SWR_BLEND_STATE blendState;
294 PFN_BLEND_JIT_FUNC pfnBlendFunc[SWR_NUM_RENDERTARGETS];
295
296 // Stats are incremented when this is true.
297 bool enableStats;
298
299 struct
300 {
301 uint32_t colorHottileEnable : 8;
302 uint32_t depthHottileEnable: 1;
303 uint32_t stencilHottileEnable : 1;
304 };
305 };
306
307 class MacroTileMgr;
308 class DispatchQueue;
309
310 struct RenderOutputBuffers
311 {
312 uint8_t* pColor[SWR_NUM_RENDERTARGETS];
313 uint8_t* pDepth;
314 uint8_t* pStencil;
315 };
316
317 // Plane equation A/B/C coeffs used to evaluate I/J barycentric coords
318 struct BarycentricCoeffs
319 {
320 simdscalar vIa;
321 simdscalar vIb;
322 simdscalar vIc;
323
324 simdscalar vJa;
325 simdscalar vJb;
326 simdscalar vJc;
327
328 simdscalar vZa;
329 simdscalar vZb;
330 simdscalar vZc;
331
332 simdscalar vRecipDet;
333
334 simdscalar vAOneOverW;
335 simdscalar vBOneOverW;
336 simdscalar vCOneOverW;
337 };
338
339 // pipeline function pointer types
340 typedef void(*PFN_BACKEND_FUNC)(DRAW_CONTEXT*, uint32_t, uint32_t, uint32_t, SWR_TRIANGLE_DESC&, RenderOutputBuffers&);
341 typedef void(*PFN_OUTPUT_MERGER)(SWR_PS_CONTEXT &, uint8_t* (&)[SWR_NUM_RENDERTARGETS], uint32_t, const SWR_BLEND_STATE*,
342 const PFN_BLEND_JIT_FUNC (&)[SWR_NUM_RENDERTARGETS], simdscalar&, simdscalar);
343 typedef void(*PFN_CALC_PIXEL_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT &);
344 typedef void(*PFN_CALC_SAMPLE_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT&);
345 typedef void(*PFN_CALC_CENTROID_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT &, const uint64_t *const, const uint32_t,
346 const simdscalar, const simdscalar);
347
348 struct BACKEND_FUNCS
349 {
350 PFN_BACKEND_FUNC pfnBackend;
351 PFN_CALC_PIXEL_BARYCENTRICS pfnCalcPixelBarycentrics;
352 PFN_CALC_SAMPLE_BARYCENTRICS pfnCalcSampleBarycentrics;
353 PFN_CALC_CENTROID_BARYCENTRICS pfnCalcCentroidBarycentrics;
354 PFN_OUTPUT_MERGER pfnOutputMerger;
355 };
356
357 // Draw State
358 struct DRAW_STATE
359 {
360 API_STATE state;
361
362 void* pPrivateState; // Its required the driver sets this up for each draw.
363
364 // pipeline function pointers, filled in by API thread when setting up the draw
365 BACKEND_FUNCS backendFuncs;
366 PFN_PROCESS_PRIMS pfnProcessPrims;
367
368 Arena* pArena; // This should only be used by API thread.
369 };
370
371 // Draw Context
372 // The api thread sets up a draw context that exists for the life of the draw.
373 // This draw context maintains all of the state needed for the draw operation.
374 struct DRAW_CONTEXT
375 {
376 SWR_CONTEXT *pContext;
377
378 uint64_t drawId;
379
380 bool isCompute; // Is this DC a compute context?
381
382 FE_WORK FeWork;
383 volatile OSALIGNLINE(uint32_t) FeLock;
384 volatile OSALIGNLINE(bool) inUse;
385 volatile OSALIGNLINE(bool) doneFE; // Is FE work done for this draw?
386
387 // Have all worker threads moved past draw in DC ring?
388 volatile OSALIGNLINE(uint32_t) threadsDoneFE;
389 volatile OSALIGNLINE(uint32_t) threadsDoneBE;
390
391 uint64_t dependency;
392
393 MacroTileMgr* pTileMgr;
394
395 // The following fields are valid if isCompute is true.
396 volatile OSALIGNLINE(bool) doneCompute; // Is this dispatch done? (isCompute)
397 DispatchQueue* pDispatch; // Queue for thread groups. (isCompute)
398
399 DRAW_STATE* pState;
400 Arena* pArena;
401
402 uint8_t* pSpillFill[KNOB_MAX_NUM_THREADS]; // Scratch space used for spill fills.
403 };
404
405 INLINE const API_STATE& GetApiState(const DRAW_CONTEXT* pDC)
406 {
407 SWR_ASSERT(pDC != nullptr);
408 SWR_ASSERT(pDC->pState != nullptr);
409
410 return pDC->pState->state;
411 }
412
413 INLINE void* GetPrivateState(const DRAW_CONTEXT* pDC)
414 {
415 SWR_ASSERT(pDC != nullptr);
416 SWR_ASSERT(pDC->pState != nullptr);
417
418 return pDC->pState->pPrivateState;
419 }
420
421 class HotTileMgr;
422
423 struct SWR_CONTEXT
424 {
425 // Draw Context Ring
426 // Each draw needs its own state in order to support mulitple draws in flight across multiple threads.
427 // We maintain N draw contexts configured as a ring. The size of the ring limits the maximum number
428 // of draws that can be in flight at any given time.
429 //
430 // Description:
431 // 1. State - When an application first sets state we'll request a new draw context to use.
432 // a. If there are no available draw contexts then we'll have to wait until one becomes free.
433 // b. If one is available then set pCurDrawContext to point to it and mark it in use.
434 // c. All state calls set state on pCurDrawContext.
435 // 2. Draw - Creates submits a work item that is associated with current draw context.
436 // a. Set pPrevDrawContext = pCurDrawContext
437 // b. Set pCurDrawContext to NULL.
438 // 3. State - When an applications sets state after draw
439 // a. Same as step 1.
440 // b. State is copied from prev draw context to current.
441 DRAW_CONTEXT* dcRing;
442
443 DRAW_CONTEXT *pCurDrawContext; // This points to DC entry in ring for an unsubmitted draw.
444 DRAW_CONTEXT *pPrevDrawContext; // This points to DC entry for the previous context submitted that we can copy state from.
445
446 // Draw State Ring
447 // When draw are very large (lots of primitives) then the API thread will break these up.
448 // These split draws all have identical state. So instead of storing the state directly
449 // in the Draw Context (DC) we instead store it in a Draw State (DS). This allows multiple DCs
450 // to reference a single entry in the DS ring.
451 DRAW_STATE* dsRing;
452
453 uint32_t curStateId; // Current index to the next available entry in the DS ring.
454
455 DRAW_STATE* subCtxSave; // Save area for inactive contexts.
456 uint32_t curSubCtxId; // Current index for active state subcontext.
457 uint32_t numSubContexts; // Number of available subcontexts
458
459 uint32_t NumWorkerThreads;
460
461 THREAD_POOL threadPool; // Thread pool associated with this context
462
463 std::condition_variable FifosNotEmpty;
464 std::mutex WaitLock;
465
466 // Draw Contexts will get a unique drawId generated from this
467 uint64_t nextDrawId;
468
469 // most recent draw id enqueued by the API thread
470 // written by api thread, read by multiple workers
471 OSALIGNLINE(volatile uint64_t) DrawEnqueued;
472
473 DRIVER_TYPE driverType;
474
475 uint32_t privateStateSize;
476
477 HotTileMgr *pHotTileMgr;
478
479 // tile load/store functions, passed in at create context time
480 PFN_LOAD_TILE pfnLoadTile;
481 PFN_STORE_TILE pfnStoreTile;
482 PFN_CLEAR_TILE pfnClearTile;
483
484 // Global Stats
485 SWR_STATS stats[KNOB_MAX_NUM_THREADS];
486
487 // Scratch space for workers.
488 uint8_t* pScratch[KNOB_MAX_NUM_THREADS];
489 };
490
491 void WaitForDependencies(SWR_CONTEXT *pContext, uint64_t drawId);
492 void WakeAllThreads(SWR_CONTEXT *pContext);
493
494 #define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pContext->stats[workerId].name += count; }
495 #define SET_STAT(name, count) if (GetApiState(pDC).enableStats) { pContext->stats[workerId].name = count; }