swr/rast: Refactor memory API between rasterizer core and swr
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / state.h
1 /****************************************************************************
2 * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file state.h
24 *
25 * @brief Definitions for API state.
26 *
27 ******************************************************************************/
28 // Skipping clang-format due to parsing by simplistic python scripts
29 // clang-format off
30 #pragma once
31
32 #include "common/formats.h"
33 #include "common/intrin.h"
34 #include <functional>
35 #include <algorithm>
36
37 using gfxptr_t = unsigned long long;
38
39 //////////////////////////////////////////////////////////////////////////
40 /// PRIMITIVE_TOPOLOGY.
41 //////////////////////////////////////////////////////////////////////////
42 enum PRIMITIVE_TOPOLOGY
43 {
44 TOP_UNKNOWN = 0x0,
45 TOP_POINT_LIST = 0x1,
46 TOP_LINE_LIST = 0x2,
47 TOP_LINE_STRIP = 0x3,
48 TOP_TRIANGLE_LIST = 0x4,
49 TOP_TRIANGLE_STRIP = 0x5,
50 TOP_TRIANGLE_FAN = 0x6,
51 TOP_QUAD_LIST = 0x7,
52 TOP_QUAD_STRIP = 0x8,
53 TOP_LINE_LIST_ADJ = 0x9,
54 TOP_LISTSTRIP_ADJ = 0xA,
55 TOP_TRI_LIST_ADJ = 0xB,
56 TOP_TRI_STRIP_ADJ = 0xC,
57 TOP_TRI_STRIP_REVERSE = 0xD,
58 TOP_POLYGON = 0xE,
59 TOP_RECT_LIST = 0xF,
60 TOP_LINE_LOOP = 0x10,
61 TOP_POINT_LIST_BF = 0x11,
62 TOP_LINE_STRIP_CONT = 0x12,
63 TOP_LINE_STRIP_BF = 0x13,
64 TOP_LINE_STRIP_CONT_BF = 0x14,
65 TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16,
66 TOP_TRIANGLE_DISC = 0x17, /// @todo What is this??
67
68 TOP_PATCHLIST_BASE = 0x1F, // Invalid topology, used to calculate num verts for a patchlist.
69 TOP_PATCHLIST_1 = 0x20, // List of 1-vertex patches
70 TOP_PATCHLIST_2 = 0x21,
71 TOP_PATCHLIST_3 = 0x22,
72 TOP_PATCHLIST_4 = 0x23,
73 TOP_PATCHLIST_5 = 0x24,
74 TOP_PATCHLIST_6 = 0x25,
75 TOP_PATCHLIST_7 = 0x26,
76 TOP_PATCHLIST_8 = 0x27,
77 TOP_PATCHLIST_9 = 0x28,
78 TOP_PATCHLIST_10 = 0x29,
79 TOP_PATCHLIST_11 = 0x2A,
80 TOP_PATCHLIST_12 = 0x2B,
81 TOP_PATCHLIST_13 = 0x2C,
82 TOP_PATCHLIST_14 = 0x2D,
83 TOP_PATCHLIST_15 = 0x2E,
84 TOP_PATCHLIST_16 = 0x2F,
85 TOP_PATCHLIST_17 = 0x30,
86 TOP_PATCHLIST_18 = 0x31,
87 TOP_PATCHLIST_19 = 0x32,
88 TOP_PATCHLIST_20 = 0x33,
89 TOP_PATCHLIST_21 = 0x34,
90 TOP_PATCHLIST_22 = 0x35,
91 TOP_PATCHLIST_23 = 0x36,
92 TOP_PATCHLIST_24 = 0x37,
93 TOP_PATCHLIST_25 = 0x38,
94 TOP_PATCHLIST_26 = 0x39,
95 TOP_PATCHLIST_27 = 0x3A,
96 TOP_PATCHLIST_28 = 0x3B,
97 TOP_PATCHLIST_29 = 0x3C,
98 TOP_PATCHLIST_30 = 0x3D,
99 TOP_PATCHLIST_31 = 0x3E,
100 TOP_PATCHLIST_32 = 0x3F, // List of 32-vertex patches
101 };
102
103 //////////////////////////////////////////////////////////////////////////
104 /// SWR_SHADER_TYPE
105 //////////////////////////////////////////////////////////////////////////
106 enum SWR_SHADER_TYPE
107 {
108 SHADER_VERTEX,
109 SHADER_GEOMETRY,
110 SHADER_DOMAIN,
111 SHADER_HULL,
112 SHADER_PIXEL,
113 SHADER_COMPUTE,
114
115 NUM_SHADER_TYPES,
116 };
117
118 //////////////////////////////////////////////////////////////////////////
119 /// SWR_RENDERTARGET_ATTACHMENT
120 /// @todo Its not clear what an "attachment" means. Its not common term.
121 //////////////////////////////////////////////////////////////////////////
122 enum SWR_RENDERTARGET_ATTACHMENT
123 {
124 SWR_ATTACHMENT_COLOR0,
125 SWR_ATTACHMENT_COLOR1,
126 SWR_ATTACHMENT_COLOR2,
127 SWR_ATTACHMENT_COLOR3,
128 SWR_ATTACHMENT_COLOR4,
129 SWR_ATTACHMENT_COLOR5,
130 SWR_ATTACHMENT_COLOR6,
131 SWR_ATTACHMENT_COLOR7,
132 SWR_ATTACHMENT_DEPTH,
133 SWR_ATTACHMENT_STENCIL,
134
135 SWR_NUM_ATTACHMENTS
136 };
137
138 #define SWR_NUM_RENDERTARGETS 8
139
140 #define SWR_ATTACHMENT_COLOR0_BIT 0x001
141 #define SWR_ATTACHMENT_COLOR1_BIT 0x002
142 #define SWR_ATTACHMENT_COLOR2_BIT 0x004
143 #define SWR_ATTACHMENT_COLOR3_BIT 0x008
144 #define SWR_ATTACHMENT_COLOR4_BIT 0x010
145 #define SWR_ATTACHMENT_COLOR5_BIT 0x020
146 #define SWR_ATTACHMENT_COLOR6_BIT 0x040
147 #define SWR_ATTACHMENT_COLOR7_BIT 0x080
148 #define SWR_ATTACHMENT_DEPTH_BIT 0x100
149 #define SWR_ATTACHMENT_STENCIL_BIT 0x200
150 #define SWR_ATTACHMENT_MASK_ALL 0x3ff
151 #define SWR_ATTACHMENT_MASK_COLOR 0x0ff
152
153
154 //////////////////////////////////////////////////////////////////////////
155 /// @brief SWR Inner Tessellation factor ID
156 /// See above GetTessFactorOutputPosition code for documentation
157 enum SWR_INNER_TESSFACTOR_ID
158 {
159 SWR_QUAD_U_TRI_INSIDE,
160 SWR_QUAD_V_INSIDE,
161
162 SWR_NUM_INNER_TESS_FACTORS,
163 };
164
165 //////////////////////////////////////////////////////////////////////////
166 /// @brief SWR Outer Tessellation factor ID
167 /// See above GetTessFactorOutputPosition code for documentation
168 enum SWR_OUTER_TESSFACTOR_ID
169 {
170 SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL,
171 SWR_QUAD_V_EQ0_TRI_V_LINE_DENSITY,
172 SWR_QUAD_U_EQ1_TRI_W,
173 SWR_QUAD_V_EQ1,
174
175 SWR_NUM_OUTER_TESS_FACTORS,
176 };
177
178 /////////////////////////////////////////////////////////////////////////
179 /// simdvertex
180 /// @brief Defines a vertex element that holds all the data for SIMD vertices.
181 /// Contains space for position, SGV, and 32 generic attributes
182 /////////////////////////////////////////////////////////////////////////
183 enum SWR_VTX_SLOTS
184 {
185 VERTEX_SGV_SLOT = 0,
186 VERTEX_SGV_RTAI_COMP = 0,
187 VERTEX_SGV_VAI_COMP = 1,
188 VERTEX_SGV_POINT_SIZE_COMP = 2,
189 VERTEX_POSITION_SLOT = 1,
190 VERTEX_POSITION_END_SLOT = 1,
191 VERTEX_CLIPCULL_DIST_LO_SLOT = (1 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist
192 VERTEX_CLIPCULL_DIST_HI_SLOT = (2 + VERTEX_POSITION_END_SLOT), // VS writes upper 4 clip/cull dist
193 VERTEX_ATTRIB_START_SLOT = (3 + VERTEX_POSITION_END_SLOT),
194 VERTEX_ATTRIB_END_SLOT = (34 + VERTEX_POSITION_END_SLOT),
195 SWR_VTX_NUM_SLOTS = (1 + VERTEX_ATTRIB_END_SLOT)
196 };
197
198 // SoAoSoA
199 struct simdvertex
200 {
201 simdvector attrib[SWR_VTX_NUM_SLOTS];
202 };
203
204 struct simd16vertex
205 {
206 simd16vector attrib[SWR_VTX_NUM_SLOTS];
207 };
208
209 template <typename SIMD_T>
210 struct SIMDVERTEX_T
211 {
212 typename SIMD_T::Vec4 attrib[SWR_VTX_NUM_SLOTS];
213 };
214
215 //////////////////////////////////////////////////////////////////////////
216 /// SWR_SHADER_STATS
217 /// @brief Structure passed to shader for stats collection.
218 /////////////////////////////////////////////////////////////////////////
219 struct SWR_SHADER_STATS
220 {
221 uint32_t numInstExecuted; // This is roughly the API instructions executed and not x86.
222 uint32_t numSampleExecuted;
223 uint32_t numSampleLExecuted;
224 uint32_t numSampleBExecuted;
225 uint32_t numSampleCExecuted;
226 uint32_t numSampleCLZExecuted;
227 uint32_t numSampleCDExecuted;
228 uint32_t numGather4Executed;
229 uint32_t numGather4CExecuted;
230 uint32_t numGather4CPOExecuted;
231 uint32_t numGather4CPOCExecuted;
232 uint32_t numLodExecuted;
233 };
234
235 //////////////////////////////////////////////////////////////////////////
236 /// SWR_VS_CONTEXT
237 /// @brief Input to vertex shader
238 /////////////////////////////////////////////////////////////////////////
239 struct SWR_VS_CONTEXT
240 {
241 simdvertex* pVin; // IN: SIMD input vertex data store
242 simdvertex* pVout; // OUT: SIMD output vertex data store
243
244 uint32_t InstanceID; // IN: Instance ID, constant across all verts of the SIMD
245 simdscalari VertexID; // IN: Vertex ID
246 simdscalari mask; // IN: Active mask for shader
247
248 // SIMD16 Frontend fields.
249 uint32_t AlternateOffset; // IN: amount to offset for interleaving even/odd simd8 in
250 // simd16vertex output
251 simd16scalari mask16; // IN: Active mask for shader (16-wide)
252 simd16scalari VertexID16; // IN: Vertex ID (16-wide)
253
254 SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
255 };
256
257 /////////////////////////////////////////////////////////////////////////
258 /// ScalarCPoint
259 /// @brief defines a control point element as passed from the output
260 /// of the hull shader to the input of the domain shader
261 /////////////////////////////////////////////////////////////////////////
262 struct ScalarAttrib
263 {
264 float x;
265 float y;
266 float z;
267 float w;
268 };
269
270 struct ScalarCPoint
271 {
272 ScalarAttrib attrib[SWR_VTX_NUM_SLOTS];
273 };
274
275 //////////////////////////////////////////////////////////////////////////
276 /// SWR_TESSELLATION_FACTORS
277 /// @brief Tessellation factors structure (non-vector)
278 /////////////////////////////////////////////////////////////////////////
279 struct SWR_TESSELLATION_FACTORS
280 {
281 float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
282 float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
283 };
284
285 #define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
286 struct ScalarPatch
287 {
288 SWR_TESSELLATION_FACTORS tessFactors;
289 ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM];
290 ScalarCPoint patchData;
291 };
292
293 //////////////////////////////////////////////////////////////////////////
294 /// SWR_HS_CONTEXT
295 /// @brief Input to hull shader
296 /////////////////////////////////////////////////////////////////////////
297 struct SWR_HS_CONTEXT
298 {
299 simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
300 simdscalari PrimitiveID; // IN: (SIMD) primitive ID generated from the draw call
301 simdscalari mask; // IN: Active mask for shader
302 ScalarPatch* pCPout; // OUT: Output control point patch SIMD-sized-array of SCALAR patches
303 SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
304 };
305
306 //////////////////////////////////////////////////////////////////////////
307 /// SWR_DS_CONTEXT
308 /// @brief Input to domain shader
309 /////////////////////////////////////////////////////////////////////////
310 struct SWR_DS_CONTEXT
311 {
312 uint32_t PrimitiveID; // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation
313 uint32_t vectorOffset; // IN: (SCALAR) vector index offset into SIMD data.
314 uint32_t vectorStride; // IN: (SCALAR) stride (in vectors) of output data per attribute-component
315 uint32_t outVertexAttribOffset; // IN: (SCALAR) Offset to the attributes as processed by the next shader stage.
316 ScalarPatch* pCpIn; // IN: (SCALAR) Control patch
317 simdscalar* pDomainU; // IN: (SIMD) Domain Point U coords
318 simdscalar* pDomainV; // IN: (SIMD) Domain Point V coords
319 simdscalari mask; // IN: Active mask for shader
320 simdscalar* pOutputData; // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component)
321 SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
322 };
323
324 //////////////////////////////////////////////////////////////////////////
325 /// SWR_GS_CONTEXT
326 /// @brief Input to geometry shader.
327 /////////////////////////////////////////////////////////////////////////
328 struct SWR_GS_CONTEXT
329 {
330 simdvector* pVerts; // IN: input primitive data for SIMD prims
331 uint32_t inputVertStride; // IN: input vertex stride, in attributes
332 simdscalari PrimitiveID; // IN: input primitive ID generated from the draw call
333 uint32_t InstanceID; // IN: input instance ID
334 simdscalari mask; // IN: Active mask for shader
335 uint8_t* pStreams[KNOB_SIMD_WIDTH]; // OUT: output stream (contains vertices for all output streams)
336 SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
337 };
338
339 struct PixelPositions
340 {
341 simdscalar UL;
342 simdscalar center;
343 simdscalar sample;
344 simdscalar centroid;
345 };
346
347 #define SWR_MAX_NUM_MULTISAMPLES 16
348
349 //////////////////////////////////////////////////////////////////////////
350 /// SWR_PS_CONTEXT
351 /// @brief Input to pixel shader.
352 /////////////////////////////////////////////////////////////////////////
353 struct SWR_PS_CONTEXT
354 {
355 PixelPositions vX; // IN: x location(s) of pixels
356 PixelPositions vY; // IN: x location(s) of pixels
357 simdscalar vZ; // INOUT: z location of pixels
358 simdscalari activeMask; // OUT: mask for kill
359 simdscalar inputMask; // IN: input coverage mask for all samples
360 simdscalari oMask; // OUT: mask for output coverage
361
362 PixelPositions vI; // barycentric coords evaluated at pixel center, sample position, centroid
363 PixelPositions vJ;
364 PixelPositions vOneOverW; // IN: 1/w
365
366 const float* pAttribs; // IN: pointer to attribute barycentric coefficients
367 const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients
368 const float* pRecipW; // IN: pointer to 1/w coord for each vertex
369 const float* I; // IN: Barycentric A, B, and C coefs used to compute I
370 const float* J; // IN: Barycentric A, B, and C coefs used to compute J
371 float recipDet; // IN: 1/Det, used when barycentric interpolating attributes
372 const float* pSamplePosX; // IN: array of sample positions
373 const float* pSamplePosY; // IN: array of sample positions
374 simdvector shaded[SWR_NUM_RENDERTARGETS]; // OUT: result color per rendertarget
375
376 uint32_t frontFace; // IN: front- 1, back- 0
377 uint32_t sampleIndex; // IN: sampleIndex
378 uint32_t renderTargetArrayIndex; // IN: render target array index from GS
379 uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
380
381 uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
382
383 SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
384 };
385
386 //////////////////////////////////////////////////////////////////////////
387 /// SWR_CS_CONTEXT
388 /// @brief Input to compute shader.
389 /////////////////////////////////////////////////////////////////////////
390 struct SWR_CS_CONTEXT
391 {
392 // The ThreadGroupId is the current thread group index relative
393 // to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup,
394 // and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader.
395
396 // Compute shader accepts the following system values.
397 // o ThreadId - Current thread id relative to all other threads in dispatch.
398 // o ThreadGroupId - Current thread group id relative to all other groups in dispatch.
399 // o ThreadIdInGroup - Current thread relative to all threads in the current thread group.
400 // o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup.
401 //
402 // All of these system values can be computed in the shader. They will be
403 // derived from the current tile counter. The tile counter is an atomic counter that
404 // resides in the draw context and is initialized to the product of the dispatch dims.
405 //
406 // tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z
407 //
408 // Each CPU worker thread will atomically decrement this counter and passes the current
409 // count into the shader. When the count reaches 0 then all thread groups in the
410 // dispatch call have been completed.
411
412 uint32_t tileCounter; // The tile counter value for this thread group.
413
414 // Dispatch dimensions used by shader to compute system values from the tile counter.
415 uint32_t dispatchDims[3];
416
417 uint8_t* pTGSM; // Thread Group Shared Memory pointer.
418 uint8_t* pSpillFillBuffer; // Spill/fill buffer for barrier support
419 uint8_t* pScratchSpace; // Pointer to scratch space buffer used by the shader, shader is
420 // responsible for subdividing scratch space per instance/simd
421 uint32_t scratchSpacePerWarp; // Scratch space per work item x SIMD_WIDTH
422
423 SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
424 };
425
426 // enums
427 enum SWR_TILE_MODE
428 {
429 SWR_TILE_NONE = 0x0, // Linear mode (no tiling)
430 SWR_TILE_MODE_WMAJOR, // W major tiling
431 SWR_TILE_MODE_XMAJOR, // X major tiling
432 SWR_TILE_MODE_YMAJOR, // Y major tiling
433 SWR_TILE_SWRZ, // SWR-Z tiling
434
435
436 SWR_TILE_MODE_COUNT
437 };
438
439 enum SWR_SURFACE_TYPE
440 {
441 SURFACE_1D = 0,
442 SURFACE_2D = 1,
443 SURFACE_3D = 2,
444 SURFACE_CUBE = 3,
445 SURFACE_BUFFER = 4,
446 SURFACE_STRUCTURED_BUFFER = 5,
447 SURFACE_NULL = 7
448 };
449
450 enum SWR_ZFUNCTION
451 {
452 ZFUNC_ALWAYS,
453 ZFUNC_NEVER,
454 ZFUNC_LT,
455 ZFUNC_EQ,
456 ZFUNC_LE,
457 ZFUNC_GT,
458 ZFUNC_NE,
459 ZFUNC_GE,
460 NUM_ZFUNC
461 };
462
463 enum SWR_STENCILOP
464 {
465 STENCILOP_KEEP,
466 STENCILOP_ZERO,
467 STENCILOP_REPLACE,
468 STENCILOP_INCRSAT,
469 STENCILOP_DECRSAT,
470 STENCILOP_INCR,
471 STENCILOP_DECR,
472 STENCILOP_INVERT
473 };
474
475 enum SWR_BLEND_FACTOR
476 {
477 BLENDFACTOR_ONE,
478 BLENDFACTOR_SRC_COLOR,
479 BLENDFACTOR_SRC_ALPHA,
480 BLENDFACTOR_DST_ALPHA,
481 BLENDFACTOR_DST_COLOR,
482 BLENDFACTOR_SRC_ALPHA_SATURATE,
483 BLENDFACTOR_CONST_COLOR,
484 BLENDFACTOR_CONST_ALPHA,
485 BLENDFACTOR_SRC1_COLOR,
486 BLENDFACTOR_SRC1_ALPHA,
487 BLENDFACTOR_ZERO,
488 BLENDFACTOR_INV_SRC_COLOR,
489 BLENDFACTOR_INV_SRC_ALPHA,
490 BLENDFACTOR_INV_DST_ALPHA,
491 BLENDFACTOR_INV_DST_COLOR,
492 BLENDFACTOR_INV_CONST_COLOR,
493 BLENDFACTOR_INV_CONST_ALPHA,
494 BLENDFACTOR_INV_SRC1_COLOR,
495 BLENDFACTOR_INV_SRC1_ALPHA
496 };
497
498 enum SWR_BLEND_OP
499 {
500 BLENDOP_ADD,
501 BLENDOP_SUBTRACT,
502 BLENDOP_REVSUBTRACT,
503 BLENDOP_MIN,
504 BLENDOP_MAX,
505 };
506
507 enum SWR_LOGIC_OP
508 {
509 LOGICOP_CLEAR,
510 LOGICOP_NOR,
511 LOGICOP_AND_INVERTED,
512 LOGICOP_COPY_INVERTED,
513 LOGICOP_AND_REVERSE,
514 LOGICOP_INVERT,
515 LOGICOP_XOR,
516 LOGICOP_NAND,
517 LOGICOP_AND,
518 LOGICOP_EQUIV,
519 LOGICOP_NOOP,
520 LOGICOP_OR_INVERTED,
521 LOGICOP_COPY,
522 LOGICOP_OR_REVERSE,
523 LOGICOP_OR,
524 LOGICOP_SET,
525 };
526
527 //////////////////////////////////////////////////////////////////////////
528 /// SWR_AUX_MODE
529 /// @brief Specifies how the auxiliary buffer is used by the driver.
530 //////////////////////////////////////////////////////////////////////////
531 enum SWR_AUX_MODE
532 {
533 AUX_MODE_NONE,
534 AUX_MODE_COLOR,
535 AUX_MODE_UAV,
536 AUX_MODE_DEPTH,
537 };
538
539 // vertex fetch state
540 // WARNING- any changes to this struct need to be reflected
541 // in the fetch shader jit
542 struct SWR_VERTEX_BUFFER_STATE
543 {
544 gfxptr_t xpData;
545 uint32_t index;
546 uint32_t pitch;
547 uint32_t size;
548 uint32_t minVertex; // min vertex (for bounds checking)
549 uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks
550 uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for
551 // partially OOB vertices
552 };
553
554 struct SWR_INDEX_BUFFER_STATE
555 {
556 gfxptr_t xpIndices;
557 // Format type for indices (e.g. UINT16, UINT32, etc.)
558 SWR_FORMAT format; // @llvm_enum
559 uint32_t size;
560 };
561
562 //////////////////////////////////////////////////////////////////////////
563 /// SWR_FETCH_CONTEXT
564 /// @brief Input to fetch shader.
565 /// @note WARNING - Changes to this struct need to be reflected in the
566 /// fetch shader jit.
567 /////////////////////////////////////////////////////////////////////////
568 struct SWR_FETCH_CONTEXT
569 {
570 const SWR_VERTEX_BUFFER_STATE* pStreams; // IN: array of bound vertex buffers
571 gfxptr_t xpIndices; // IN: pointer to int32 index buffer for indexed draws
572 gfxptr_t xpLastIndex; // IN: pointer to end of index buffer, used for bounds checking
573 uint32_t CurInstance; // IN: current instance
574 uint32_t BaseVertex; // IN: base vertex
575 uint32_t StartVertex; // IN: start vertex
576 uint32_t StartInstance; // IN: start instance
577 simdscalari VertexID; // OUT: vector of vertex IDs
578 simdscalari CutMask; // OUT: vector mask of indices which have the cut index value
579 #if USE_SIMD16_SHADERS
580 // simd16scalari VertexID; // OUT: vector of vertex IDs
581 // simd16scalari CutMask; // OUT: vector mask of indices which have the
582 // cut index value
583 simdscalari VertexID2; // OUT: vector of vertex IDs
584 simdscalari CutMask2; // OUT: vector mask of indices which have the cut index value
585 #endif
586 };
587
588 //////////////////////////////////////////////////////////////////////////
589 /// SWR_STATS
590 ///
591 /// @brief All statistics generated by SWR go here. These are public
592 /// to driver.
593 /////////////////////////////////////////////////////////////////////////
594 OSALIGNLINE(struct) SWR_STATS
595 {
596 // Occlusion Query
597 uint64_t DepthPassCount; // Number of passing depth tests. Not exact.
598
599 // Pipeline Stats
600 uint64_t PsInvocations; // Number of Pixel Shader invocations
601 uint64_t CsInvocations; // Number of Compute Shader invocations
602
603 };
604
605 //////////////////////////////////////////////////////////////////////////
606 /// SWR_STATS
607 ///
608 /// @brief All statistics generated by FE.
609 /////////////////////////////////////////////////////////////////////////
610 OSALIGNLINE(struct) SWR_STATS_FE
611 {
612 uint64_t IaVertices; // Number of Fetch Shader vertices
613 uint64_t IaPrimitives; // Number of PA primitives.
614 uint64_t VsInvocations; // Number of Vertex Shader invocations
615 uint64_t HsInvocations; // Number of Hull Shader invocations
616 uint64_t DsInvocations; // Number of Domain Shader invocations
617 uint64_t GsInvocations; // Number of Geometry Shader invocations
618 uint64_t GsPrimitives; // Number of prims GS outputs.
619 uint64_t CInvocations; // Number of clipper invocations
620 uint64_t CPrimitives; // Number of clipper primitives.
621
622 // Streamout Stats
623 uint64_t SoPrimStorageNeeded[4];
624 uint64_t SoNumPrimsWritten[4];
625 };
626
627 //////////////////////////////////////////////////////////////////////////
628 /// STREAMOUT_BUFFERS
629 /////////////////////////////////////////////////////////////////////////
630
631 #define MAX_SO_STREAMS 4
632 #define MAX_SO_BUFFERS 4
633 #define MAX_ATTRIBUTES 32
634
635 struct SWR_STREAMOUT_BUFFER
636 {
637 // Pointers to streamout buffers.
638 gfxptr_t pBuffer;
639
640 // Offset to the SO write offset. If not null then we update offset here.
641 gfxptr_t pWriteOffset;
642
643 bool enable;
644 bool soWriteEnable;
645
646 // Size of buffer in dwords.
647 uint32_t bufferSize;
648
649 // Vertex pitch of buffer in dwords.
650 uint32_t pitch;
651
652 // Offset into buffer in dwords. SOS will increment this offset.
653 uint32_t streamOffset;
654 };
655
656 //////////////////////////////////////////////////////////////////////////
657 /// STREAMOUT_STATE
658 /////////////////////////////////////////////////////////////////////////
659 struct SWR_STREAMOUT_STATE
660 {
661 // This disables stream output.
662 bool soEnable;
663
664 // which streams are enabled for streamout
665 bool streamEnable[MAX_SO_STREAMS];
666
667 // If set then do not send any streams to the rasterizer.
668 bool rasterizerDisable;
669
670 // Specifies which stream to send to the rasterizer.
671 uint32_t streamToRasterizer;
672
673 // The stream masks specify which attributes are sent to which streams.
674 // These masks help the FE to setup the pPrimData buffer that is passed
675 // the Stream Output Shader (SOS) function.
676 uint64_t streamMasks[MAX_SO_STREAMS];
677
678 // Number of attributes, including position, per vertex that are streamed out.
679 // This should match number of bits in stream mask.
680 uint32_t streamNumEntries[MAX_SO_STREAMS];
681
682 // Offset to the start of the attributes of the input vertices, in simdvector units
683 uint32_t vertexAttribOffset[MAX_SO_STREAMS];
684 };
685
686 //////////////////////////////////////////////////////////////////////////
687 /// STREAMOUT_CONTEXT - Passed to SOS
688 /////////////////////////////////////////////////////////////////////////
689 struct SWR_STREAMOUT_CONTEXT
690 {
691 uint32_t* pPrimData;
692 SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS];
693
694 // Num prims written for this stream
695 uint32_t numPrimsWritten;
696
697 // Num prims that should have been written if there were no overflow.
698 uint32_t numPrimStorageNeeded;
699 };
700
701 //////////////////////////////////////////////////////////////////////////
702 /// SWR_GS_STATE - Geometry shader state
703 /////////////////////////////////////////////////////////////////////////
704 struct SWR_GS_STATE
705 {
706 bool gsEnable;
707
708 // If true, geometry shader emits a single stream, with separate cut buffer.
709 // If false, geometry shader emits vertices for multiple streams to the stream buffer, with a
710 // separate StreamID buffer to map vertices to streams
711 bool isSingleStream;
712
713 // Number of input attributes per vertex. Used by the frontend to
714 // optimize assembling primitives for GS
715 uint32_t numInputAttribs;
716
717 // Stride of incoming verts in attributes
718 uint32_t inputVertStride;
719
720 // Output topology - can be point, tristrip, linestrip, or rectlist
721 PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum
722
723 // Maximum number of verts that can be emitted by a single instance of the GS
724 uint32_t maxNumVerts;
725
726 // Instance count
727 uint32_t instanceCount;
728
729 // When single stream is enabled, singleStreamID dictates which stream is being output.
730 // field ignored if isSingleStream is false
731 uint32_t singleStreamID;
732
733 // Total amount of memory to allocate for one instance of the shader output in bytes
734 uint32_t allocationSize;
735
736 // Offset to the start of the attributes of the input vertices, in simdvector units, as read by
737 // the GS
738 uint32_t vertexAttribOffset;
739
740 // Offset to the attributes as stored by the preceding shader stage.
741 uint32_t srcVertexAttribOffset;
742
743 // Size of the control data section which contains cut or streamID data, in simdscalar units.
744 // Should be sized to handle the maximum number of verts output by the GS. Can be 0 if there are
745 // no cuts or streamID bits.
746 uint32_t controlDataSize;
747
748 // Offset to the control data section, in bytes
749 uint32_t controlDataOffset;
750
751 // Total size of an output vertex, in simdvector units
752 uint32_t outputVertexSize;
753
754 // Offset to the start of the vertex section, in bytes
755 uint32_t outputVertexOffset;
756
757 // Set this to non-zero to indicate that the shader outputs a static number of verts. If zero,
758 // shader is expected to store the final vertex count in the first dword of the gs output
759 // stream.
760 uint32_t staticVertexCount;
761
762 uint32_t pad;
763 };
764 static_assert(sizeof(SWR_GS_STATE) == 64, "Adjust padding to keep size (or remove this assert)");
765
766 //////////////////////////////////////////////////////////////////////////
767 /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
768 /////////////////////////////////////////////////////////////////////////
769 enum SWR_TS_OUTPUT_TOPOLOGY
770 {
771 SWR_TS_OUTPUT_POINT,
772 SWR_TS_OUTPUT_LINE,
773 SWR_TS_OUTPUT_TRI_CW,
774 SWR_TS_OUTPUT_TRI_CCW,
775
776 SWR_TS_OUTPUT_TOPOLOGY_COUNT
777 };
778
779 //////////////////////////////////////////////////////////////////////////
780 /// SWR_TS_PARTITIONING - Defines tessellation algorithm
781 /////////////////////////////////////////////////////////////////////////
782 enum SWR_TS_PARTITIONING
783 {
784 SWR_TS_INTEGER,
785 SWR_TS_ODD_FRACTIONAL,
786 SWR_TS_EVEN_FRACTIONAL,
787
788 SWR_TS_PARTITIONING_COUNT
789 };
790
791 //////////////////////////////////////////////////////////////////////////
792 /// SWR_TS_DOMAIN - Defines Tessellation Domain
793 /////////////////////////////////////////////////////////////////////////
794 enum SWR_TS_DOMAIN
795 {
796 SWR_TS_QUAD,
797 SWR_TS_TRI,
798 SWR_TS_ISOLINE,
799
800 SWR_TS_DOMAIN_COUNT
801 };
802
803 //////////////////////////////////////////////////////////////////////////
804 /// SWR_TS_STATE - Tessellation state
805 /////////////////////////////////////////////////////////////////////////
806 struct SWR_TS_STATE
807 {
808 bool tsEnable;
809
810 SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum
811 SWR_TS_PARTITIONING partitioning; // @llvm_enum
812 SWR_TS_DOMAIN domain; // @llvm_enum
813
814 PRIMITIVE_TOPOLOGY postDSTopology; // @llvm_enum
815
816 uint32_t numHsInputAttribs;
817 uint32_t numHsOutputAttribs;
818 uint32_t numDsOutputAttribs;
819 uint32_t dsAllocationSize;
820 uint32_t dsOutVtxAttribOffset;
821
822 // Offset to the start of the attributes of the input vertices, in simdvector units
823 uint32_t vertexAttribOffset;
824 };
825
826 // output merger state
827 struct SWR_RENDER_TARGET_BLEND_STATE
828 {
829 uint8_t writeDisableRed : 1;
830 uint8_t writeDisableGreen : 1;
831 uint8_t writeDisableBlue : 1;
832 uint8_t writeDisableAlpha : 1;
833 };
834 static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1,
835 "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
836
837 enum SWR_MULTISAMPLE_COUNT
838 {
839 SWR_MULTISAMPLE_1X = 0,
840 SWR_MULTISAMPLE_2X,
841 SWR_MULTISAMPLE_4X,
842 SWR_MULTISAMPLE_8X,
843 SWR_MULTISAMPLE_16X,
844 SWR_MULTISAMPLE_TYPE_COUNT
845 };
846
847 static INLINE uint32_t GetNumSamples(/* SWR_SAMPLE_COUNT */ int sampleCountEnum) // @llvm_func_start
848 {
849 return uint32_t(1) << sampleCountEnum;
850 } // @llvm_func_end
851
852 struct SWR_BLEND_STATE
853 {
854 // constant blend factor color in RGBA float
855 float constantColor[4];
856
857 // alpha test reference value in unorm8 or float32
858 uint32_t alphaTestReference;
859 uint32_t sampleMask;
860 // all RT's have the same sample count
861 ///@todo move this to Output Merger state when we refactor
862 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
863
864 SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
865 };
866 static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
867
868 struct SWR_BLEND_CONTEXT
869 {
870 const SWR_BLEND_STATE* pBlendState;
871 simdvector* src;
872 simdvector* src1;
873 simdvector* src0alpha;
874 uint32_t sampleNum;
875 simdvector* pDst;
876 simdvector* result;
877 simdscalari* oMask;
878 simdscalari* pMask;
879 uint32_t isAlphaTested;
880 uint32_t isAlphaBlended;
881 };
882
883 //////////////////////////////////////////////////////////////////////////
884 /// FUNCTION POINTERS FOR SHADERS
885
886 #if USE_SIMD16_SHADERS
887 typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
888 #else
889 typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
890 #endif
891 typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_VS_CONTEXT* pVsContext);
892 typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_HS_CONTEXT* pHsContext);
893 typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_DS_CONTEXT* pDsContext);
894 typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_GS_CONTEXT* pGsContext);
895 typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_CS_CONTEXT* pCsContext);
896 typedef void(__cdecl *PFN_SO_FUNC)(HANDLE hPrivateData, SWR_STREAMOUT_CONTEXT& soContext);
897 typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT* pContext);
898 typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT* pContext);
899 typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(SWR_BLEND_CONTEXT*);
900 typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &);
901
902
903 //////////////////////////////////////////////////////////////////////////
904 /// FRONTEND_STATE
905 /////////////////////////////////////////////////////////////////////////
906 struct SWR_FRONTEND_STATE
907 {
908 // skip clip test, perspective divide, and viewport transform
909 // intended for verts in screen space
910 bool vpTransformDisable;
911 bool bEnableCutIndex;
912 union
913 {
914 struct
915 {
916 uint32_t triFan : 2;
917 uint32_t lineStripList : 1;
918 uint32_t triStripList : 2;
919 };
920 uint32_t bits;
921 } provokingVertex;
922 uint32_t topologyProvokingVertex; // provoking vertex for the draw topology
923
924 // Size of a vertex in simdvector units. Should be sized to the
925 // maximum of the input/output of the vertex shader.
926 uint32_t vsVertexSize;
927 };
928
929 //////////////////////////////////////////////////////////////////////////
930 /// VIEWPORT_MATRIX
931 /////////////////////////////////////////////////////////////////////////
932 struct SWR_VIEWPORT_MATRIX
933 {
934 float m00;
935 float m11;
936 float m22;
937 float m30;
938 float m31;
939 float m32;
940 };
941
942 //////////////////////////////////////////////////////////////////////////
943 /// VIEWPORT_MATRIXES
944 /////////////////////////////////////////////////////////////////////////
945 struct SWR_VIEWPORT_MATRICES
946 {
947 float m00[KNOB_NUM_VIEWPORTS_SCISSORS];
948 float m11[KNOB_NUM_VIEWPORTS_SCISSORS];
949 float m22[KNOB_NUM_VIEWPORTS_SCISSORS];
950 float m30[KNOB_NUM_VIEWPORTS_SCISSORS];
951 float m31[KNOB_NUM_VIEWPORTS_SCISSORS];
952 float m32[KNOB_NUM_VIEWPORTS_SCISSORS];
953 };
954
955 //////////////////////////////////////////////////////////////////////////
956 /// SWR_VIEWPORT
957 /////////////////////////////////////////////////////////////////////////
958 struct SWR_VIEWPORT
959 {
960 float x;
961 float y;
962 float width;
963 float height;
964 float minZ;
965 float maxZ;
966 };
967
968 //////////////////////////////////////////////////////////////////////////
969 /// SWR_CULLMODE
970 //////////////////////////////////////////////////////////////////////////
971 enum SWR_CULLMODE
972 {
973 SWR_CULLMODE_BOTH,
974 SWR_CULLMODE_NONE,
975 SWR_CULLMODE_FRONT,
976 SWR_CULLMODE_BACK
977 };
978
979 enum SWR_FILLMODE
980 {
981 SWR_FILLMODE_POINT,
982 SWR_FILLMODE_WIREFRAME,
983 SWR_FILLMODE_SOLID
984 };
985
986 enum SWR_FRONTWINDING
987 {
988 SWR_FRONTWINDING_CW,
989 SWR_FRONTWINDING_CCW
990 };
991
992
993 enum SWR_PIXEL_LOCATION
994 {
995 SWR_PIXEL_LOCATION_CENTER,
996 SWR_PIXEL_LOCATION_UL,
997 };
998
999 // fixed point screen space sample locations within a pixel
1000 struct SWR_MULTISAMPLE_POS
1001 {
1002 public:
1003 INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func
1004 INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func
1005 INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func
1006 INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func
1007 INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func
1008 INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func
1009 INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func
1010 INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func
1011 typedef const float (&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef
1012 INLINE sampleArrayT X() const { return _x; }; // @llvm_func
1013 INLINE sampleArrayT Y() const { return _y; }; // @llvm_func
1014 INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func
1015 INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func
1016 INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func
1017 INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
1018 INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
1019 INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
1020
1021 INLINE void PrecalcSampleData(int numSamples); //@llvm_func
1022
1023 private:
1024 template <typename MaskT>
1025 INLINE __m128i expandThenBlend4(uint32_t* min, uint32_t* max); // @llvm_func
1026 INLINE void CalcTileSampleOffsets(int numSamples); // @llvm_func
1027
1028 // scalar sample values
1029 uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES];
1030 uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES];
1031 float _x[SWR_MAX_NUM_MULTISAMPLES];
1032 float _y[SWR_MAX_NUM_MULTISAMPLES];
1033
1034 // precalc'd / vectorized samples
1035 __m128i _vXi[SWR_MAX_NUM_MULTISAMPLES];
1036 __m128i _vYi[SWR_MAX_NUM_MULTISAMPLES];
1037 simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES];
1038 simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES];
1039 __m128i tileSampleOffsetsX;
1040 __m128i tileSampleOffsetsY;
1041 };
1042
1043 //////////////////////////////////////////////////////////////////////////
1044 /// SWR_RASTSTATE
1045 //////////////////////////////////////////////////////////////////////////
1046 struct SWR_RASTSTATE
1047 {
1048 uint32_t cullMode : 2;
1049 uint32_t fillMode : 2;
1050 uint32_t frontWinding : 1;
1051 uint32_t scissorEnable : 1;
1052 uint32_t depthClipEnable : 1;
1053 uint32_t clipHalfZ : 1;
1054 uint32_t pointParam : 1;
1055 uint32_t pointSpriteEnable : 1;
1056 uint32_t pointSpriteTopOrigin : 1;
1057 uint32_t forcedSampleCount : 1;
1058 uint32_t pixelOffset : 1;
1059 uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
1060 uint32_t conservativeRast : 1;
1061
1062 float pointSize;
1063 float lineWidth;
1064
1065 float depthBias;
1066 float slopeScaledDepthBias;
1067 float depthBiasClamp;
1068 SWR_FORMAT depthFormat; // @llvm_enum
1069
1070 // sample count the rasterizer is running at
1071 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
1072 uint32_t pixelLocation; // UL or Center
1073 SWR_MULTISAMPLE_POS samplePositions; // @llvm_struct
1074 bool bIsCenterPattern; // @llvm_enum
1075 };
1076
1077
1078 enum SWR_CONSTANT_SOURCE
1079 {
1080 SWR_CONSTANT_SOURCE_CONST_0000,
1081 SWR_CONSTANT_SOURCE_CONST_0001_FLOAT,
1082 SWR_CONSTANT_SOURCE_CONST_1111_FLOAT,
1083 SWR_CONSTANT_SOURCE_PRIM_ID
1084 };
1085
1086 struct SWR_ATTRIB_SWIZZLE
1087 {
1088 uint16_t sourceAttrib : 5; // source attribute
1089 uint16_t constantSource : 2; // constant source to apply
1090 uint16_t componentOverrideMask : 4; // override component with constant source
1091 };
1092
1093 // backend state
1094 struct SWR_BACKEND_STATE
1095 {
1096 uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant
1097 // interpolation
1098 uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be
1099 // interpreted as tex coordinates
1100
1101 bool swizzleEnable; // when enabled, core will parse the swizzle map when
1102 // setting up attributes for the backend, otherwise
1103 // all attributes up to numAttributes will be sent
1104 uint8_t numAttributes; // total number of attributes to send to backend (up to 32)
1105 uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some
1106 // calculations for unneeded components
1107
1108 bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the
1109 // backend
1110 bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning
1111
1112 // User clip/cull distance enables
1113 uint8_t cullDistanceMask;
1114 uint8_t clipDistanceMask;
1115
1116 // padding to ensure swizzleMap starts 64B offset from start of the struct
1117 // and that the next fields are dword aligned.
1118 uint8_t pad[10];
1119
1120 // Offset to the start of the attributes of the input vertices, in simdvector units
1121 uint32_t vertexAttribOffset;
1122
1123 // Offset to clip/cull attrib section of the vertex, in simdvector units
1124 uint32_t vertexClipCullOffset;
1125
1126 SWR_ATTRIB_SWIZZLE swizzleMap[32];
1127 };
1128 static_assert(sizeof(SWR_BACKEND_STATE) == 128,
1129 "Adjust padding to keep size (or remove this assert)");
1130
1131
1132 union SWR_DEPTH_STENCIL_STATE
1133 {
1134 struct
1135 {
1136 // dword 0
1137 uint32_t depthWriteEnable : 1;
1138 uint32_t depthTestEnable : 1;
1139 uint32_t stencilWriteEnable : 1;
1140 uint32_t stencilTestEnable : 1;
1141 uint32_t doubleSidedStencilTestEnable : 1;
1142
1143 uint32_t depthTestFunc : 3;
1144 uint32_t stencilTestFunc : 3;
1145
1146 uint32_t backfaceStencilPassDepthPassOp : 3;
1147 uint32_t backfaceStencilPassDepthFailOp : 3;
1148 uint32_t backfaceStencilFailOp : 3;
1149 uint32_t backfaceStencilTestFunc : 3;
1150 uint32_t stencilPassDepthPassOp : 3;
1151 uint32_t stencilPassDepthFailOp : 3;
1152 uint32_t stencilFailOp : 3;
1153
1154 // dword 1
1155 uint8_t backfaceStencilWriteMask;
1156 uint8_t backfaceStencilTestMask;
1157 uint8_t stencilWriteMask;
1158 uint8_t stencilTestMask;
1159
1160 // dword 2
1161 uint8_t backfaceStencilRefValue;
1162 uint8_t stencilRefValue;
1163 };
1164 uint32_t value[3];
1165 };
1166
1167 enum SWR_SHADING_RATE
1168 {
1169 SWR_SHADING_RATE_PIXEL,
1170 SWR_SHADING_RATE_SAMPLE,
1171 SWR_SHADING_RATE_COUNT,
1172 };
1173
1174 enum SWR_INPUT_COVERAGE
1175 {
1176 SWR_INPUT_COVERAGE_NONE,
1177 SWR_INPUT_COVERAGE_NORMAL,
1178 SWR_INPUT_COVERAGE_INNER_CONSERVATIVE,
1179 SWR_INPUT_COVERAGE_COUNT,
1180 };
1181
1182 enum SWR_PS_POSITION_OFFSET
1183 {
1184 SWR_PS_POSITION_SAMPLE_NONE,
1185 SWR_PS_POSITION_SAMPLE_OFFSET,
1186 SWR_PS_POSITION_CENTROID_OFFSET,
1187 SWR_PS_POSITION_OFFSET_COUNT,
1188 };
1189
1190 enum SWR_BARYCENTRICS_MASK
1191 {
1192 SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1,
1193 SWR_BARYCENTRIC_CENTROID_MASK = 0x2,
1194 SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4,
1195 };
1196
1197 // pixel shader state
1198 struct SWR_PS_STATE
1199 {
1200 // dword 0-1
1201 PFN_PIXEL_KERNEL pfnPixelShader; // @llvm_pfn
1202
1203 // dword 2
1204 uint32_t killsPixel : 1; // pixel shader can kill pixels
1205 uint32_t inputCoverage : 2; // ps uses input coverage
1206 uint32_t writesODepth : 1; // pixel shader writes to depth
1207 uint32_t usesSourceDepth : 1; // pixel shader reads depth
1208 uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel
1209 uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position
1210 uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate
1211 // attributes with
1212 uint32_t usesUAV : 1; // pixel shader accesses UAV
1213 uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test
1214
1215 uint8_t renderTargetMask; // Mask of render targets written
1216 };
1217
1218 // depth bounds state
1219 struct SWR_DEPTH_BOUNDS_STATE
1220 {
1221 bool depthBoundsTestEnable;
1222 float depthBoundsTestMinValue;
1223 float depthBoundsTestMaxValue;
1224 };
1225 // clang-format on