swr/rast: Migrate memory pointers to gfxptr_t type
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / state.h
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file state.h
24 *
25 * @brief Definitions for API state.
26 *
27 ******************************************************************************/
28 #pragma once
29
30 #include "common/formats.h"
31 #include "common/intrin.h"
32 using gfxptr_t = unsigned long long;
33 #include <functional>
34 #include <algorithm>
35
36 //////////////////////////////////////////////////////////////////////////
37 /// PRIMITIVE_TOPOLOGY.
38 //////////////////////////////////////////////////////////////////////////
39 enum PRIMITIVE_TOPOLOGY
40 {
41 TOP_UNKNOWN = 0x0,
42 TOP_POINT_LIST = 0x1,
43 TOP_LINE_LIST = 0x2,
44 TOP_LINE_STRIP = 0x3,
45 TOP_TRIANGLE_LIST = 0x4,
46 TOP_TRIANGLE_STRIP = 0x5,
47 TOP_TRIANGLE_FAN = 0x6,
48 TOP_QUAD_LIST = 0x7,
49 TOP_QUAD_STRIP = 0x8,
50 TOP_LINE_LIST_ADJ = 0x9,
51 TOP_LISTSTRIP_ADJ = 0xA,
52 TOP_TRI_LIST_ADJ = 0xB,
53 TOP_TRI_STRIP_ADJ = 0xC,
54 TOP_TRI_STRIP_REVERSE = 0xD,
55 TOP_POLYGON = 0xE,
56 TOP_RECT_LIST = 0xF,
57 TOP_LINE_LOOP = 0x10,
58 TOP_POINT_LIST_BF = 0x11,
59 TOP_LINE_STRIP_CONT = 0x12,
60 TOP_LINE_STRIP_BF = 0x13,
61 TOP_LINE_STRIP_CONT_BF = 0x14,
62 TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16,
63 TOP_TRIANGLE_DISC = 0x17, /// @todo What is this??
64
65 TOP_PATCHLIST_BASE = 0x1F, // Invalid topology, used to calculate num verts for a patchlist.
66 TOP_PATCHLIST_1 = 0x20, // List of 1-vertex patches
67 TOP_PATCHLIST_2 = 0x21,
68 TOP_PATCHLIST_3 = 0x22,
69 TOP_PATCHLIST_4 = 0x23,
70 TOP_PATCHLIST_5 = 0x24,
71 TOP_PATCHLIST_6 = 0x25,
72 TOP_PATCHLIST_7 = 0x26,
73 TOP_PATCHLIST_8 = 0x27,
74 TOP_PATCHLIST_9 = 0x28,
75 TOP_PATCHLIST_10 = 0x29,
76 TOP_PATCHLIST_11 = 0x2A,
77 TOP_PATCHLIST_12 = 0x2B,
78 TOP_PATCHLIST_13 = 0x2C,
79 TOP_PATCHLIST_14 = 0x2D,
80 TOP_PATCHLIST_15 = 0x2E,
81 TOP_PATCHLIST_16 = 0x2F,
82 TOP_PATCHLIST_17 = 0x30,
83 TOP_PATCHLIST_18 = 0x31,
84 TOP_PATCHLIST_19 = 0x32,
85 TOP_PATCHLIST_20 = 0x33,
86 TOP_PATCHLIST_21 = 0x34,
87 TOP_PATCHLIST_22 = 0x35,
88 TOP_PATCHLIST_23 = 0x36,
89 TOP_PATCHLIST_24 = 0x37,
90 TOP_PATCHLIST_25 = 0x38,
91 TOP_PATCHLIST_26 = 0x39,
92 TOP_PATCHLIST_27 = 0x3A,
93 TOP_PATCHLIST_28 = 0x3B,
94 TOP_PATCHLIST_29 = 0x3C,
95 TOP_PATCHLIST_30 = 0x3D,
96 TOP_PATCHLIST_31 = 0x3E,
97 TOP_PATCHLIST_32 = 0x3F, // List of 32-vertex patches
98 };
99
100 //////////////////////////////////////////////////////////////////////////
101 /// SWR_SHADER_TYPE
102 //////////////////////////////////////////////////////////////////////////
103 enum SWR_SHADER_TYPE
104 {
105 SHADER_VERTEX,
106 SHADER_GEOMETRY,
107 SHADER_DOMAIN,
108 SHADER_HULL,
109 SHADER_PIXEL,
110 SHADER_COMPUTE,
111
112 NUM_SHADER_TYPES,
113 };
114
115 //////////////////////////////////////////////////////////////////////////
116 /// SWR_RENDERTARGET_ATTACHMENT
117 /// @todo Its not clear what an "attachment" means. Its not common term.
118 //////////////////////////////////////////////////////////////////////////
119 enum SWR_RENDERTARGET_ATTACHMENT
120 {
121 SWR_ATTACHMENT_COLOR0,
122 SWR_ATTACHMENT_COLOR1,
123 SWR_ATTACHMENT_COLOR2,
124 SWR_ATTACHMENT_COLOR3,
125 SWR_ATTACHMENT_COLOR4,
126 SWR_ATTACHMENT_COLOR5,
127 SWR_ATTACHMENT_COLOR6,
128 SWR_ATTACHMENT_COLOR7,
129 SWR_ATTACHMENT_DEPTH,
130 SWR_ATTACHMENT_STENCIL,
131
132 SWR_NUM_ATTACHMENTS
133 };
134
135 #define SWR_NUM_RENDERTARGETS 8
136
137 #define SWR_ATTACHMENT_COLOR0_BIT 0x001
138 #define SWR_ATTACHMENT_COLOR1_BIT 0x002
139 #define SWR_ATTACHMENT_COLOR2_BIT 0x004
140 #define SWR_ATTACHMENT_COLOR3_BIT 0x008
141 #define SWR_ATTACHMENT_COLOR4_BIT 0x010
142 #define SWR_ATTACHMENT_COLOR5_BIT 0x020
143 #define SWR_ATTACHMENT_COLOR6_BIT 0x040
144 #define SWR_ATTACHMENT_COLOR7_BIT 0x080
145 #define SWR_ATTACHMENT_DEPTH_BIT 0x100
146 #define SWR_ATTACHMENT_STENCIL_BIT 0x200
147 #define SWR_ATTACHMENT_MASK_ALL 0x3ff
148 #define SWR_ATTACHMENT_MASK_COLOR 0x0ff
149
150
151 //////////////////////////////////////////////////////////////////////////
152 /// @brief SWR Inner Tessellation factor ID
153 /// See above GetTessFactorOutputPosition code for documentation
154 enum SWR_INNER_TESSFACTOR_ID
155 {
156 SWR_QUAD_U_TRI_INSIDE,
157 SWR_QUAD_V_INSIDE,
158
159 SWR_NUM_INNER_TESS_FACTORS,
160 };
161
162 //////////////////////////////////////////////////////////////////////////
163 /// @brief SWR Outer Tessellation factor ID
164 /// See above GetTessFactorOutputPosition code for documentation
165 enum SWR_OUTER_TESSFACTOR_ID
166 {
167 SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL,
168 SWR_QUAD_V_EQ0_TRI_V_LINE_DENSITY,
169 SWR_QUAD_U_EQ1_TRI_W,
170 SWR_QUAD_V_EQ1,
171
172 SWR_NUM_OUTER_TESS_FACTORS,
173 };
174
175
176 /////////////////////////////////////////////////////////////////////////
177 /// simdvertex
178 /// @brief Defines a vertex element that holds all the data for SIMD vertices.
179 /// Contains space for position, SGV, and 32 generic attributes
180 /////////////////////////////////////////////////////////////////////////
181 enum SWR_VTX_SLOTS
182 {
183 VERTEX_SGV_SLOT = 0,
184 VERTEX_SGV_RTAI_COMP = 0,
185 VERTEX_SGV_VAI_COMP = 1,
186 VERTEX_SGV_POINT_SIZE_COMP = 2,
187 VERTEX_POSITION_SLOT = 1,
188 VERTEX_POSITION_END_SLOT = 1,
189 VERTEX_CLIPCULL_DIST_LO_SLOT = (1 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist
190 VERTEX_CLIPCULL_DIST_HI_SLOT = (2 + VERTEX_POSITION_END_SLOT), // VS writes upper 4 clip/cull dist
191 VERTEX_ATTRIB_START_SLOT = (3 + VERTEX_POSITION_END_SLOT),
192 VERTEX_ATTRIB_END_SLOT = (34 + VERTEX_POSITION_END_SLOT),
193 SWR_VTX_NUM_SLOTS = (1 + VERTEX_ATTRIB_END_SLOT)
194 };
195
196 // SoAoSoA
197 struct simdvertex
198 {
199 simdvector attrib[SWR_VTX_NUM_SLOTS];
200 };
201
202 #if ENABLE_AVX512_SIMD16
203 struct simd16vertex
204 {
205 simd16vector attrib[SWR_VTX_NUM_SLOTS];
206 };
207
208 #endif
209
210 template<typename SIMD_T>
211 struct SIMDVERTEX_T
212 {
213 typename SIMD_T::Vec4 attrib[SWR_VTX_NUM_SLOTS];
214 };
215
216 //////////////////////////////////////////////////////////////////////////
217 /// SWR_VS_CONTEXT
218 /// @brief Input to vertex shader
219 /////////////////////////////////////////////////////////////////////////
220 struct SWR_VS_CONTEXT
221 {
222 simdvertex* pVin; // IN: SIMD input vertex data store
223 simdvertex* pVout; // OUT: SIMD output vertex data store
224
225 uint32_t InstanceID; // IN: Instance ID, constant across all verts of the SIMD
226 simdscalari VertexID; // IN: Vertex ID
227 simdscalari mask; // IN: Active mask for shader
228 #if USE_SIMD16_FRONTEND
229 uint32_t AlternateOffset; // IN: amount to offset for interleaving even/odd simd8 in simd16vertex output
230 #endif
231 };
232
233 /////////////////////////////////////////////////////////////////////////
234 /// ScalarCPoint
235 /// @brief defines a control point element as passed from the output
236 /// of the hull shader to the input of the domain shader
237 /////////////////////////////////////////////////////////////////////////
238 struct ScalarAttrib
239 {
240 float x;
241 float y;
242 float z;
243 float w;
244 };
245
246 struct ScalarCPoint
247 {
248 ScalarAttrib attrib[SWR_VTX_NUM_SLOTS];
249 };
250
251 //////////////////////////////////////////////////////////////////////////
252 /// SWR_TESSELLATION_FACTORS
253 /// @brief Tessellation factors structure (non-vector)
254 /////////////////////////////////////////////////////////////////////////
255 struct SWR_TESSELLATION_FACTORS
256 {
257 float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
258 float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
259 };
260
261 #define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
262 struct ScalarPatch
263 {
264 SWR_TESSELLATION_FACTORS tessFactors;
265 ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM];
266 ScalarCPoint patchData;
267 };
268
269 //////////////////////////////////////////////////////////////////////////
270 /// SWR_HS_CONTEXT
271 /// @brief Input to hull shader
272 /////////////////////////////////////////////////////////////////////////
273 struct SWR_HS_CONTEXT
274 {
275 simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
276 simdscalari PrimitiveID; // IN: (SIMD) primitive ID generated from the draw call
277 simdscalari mask; // IN: Active mask for shader
278 ScalarPatch* pCPout; // OUT: Output control point patch
279 // SIMD-sized-array of SCALAR patches
280 };
281
282 //////////////////////////////////////////////////////////////////////////
283 /// SWR_DS_CONTEXT
284 /// @brief Input to domain shader
285 /////////////////////////////////////////////////////////////////////////
286 struct SWR_DS_CONTEXT
287 {
288 uint32_t PrimitiveID; // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation
289 uint32_t vectorOffset; // IN: (SCALAR) vector index offset into SIMD data.
290 uint32_t vectorStride; // IN: (SCALAR) stride (in vectors) of output data per attribute-component
291 ScalarPatch* pCpIn; // IN: (SCALAR) Control patch
292 simdscalar* pDomainU; // IN: (SIMD) Domain Point U coords
293 simdscalar* pDomainV; // IN: (SIMD) Domain Point V coords
294 simdscalari mask; // IN: Active mask for shader
295 simdscalar* pOutputData; // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component)
296 };
297
298 //////////////////////////////////////////////////////////////////////////
299 /// SWR_GS_CONTEXT
300 /// @brief Input to geometry shader.
301 /////////////////////////////////////////////////////////////////////////
302 struct SWR_GS_CONTEXT
303 {
304 simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: input primitive data for SIMD prims
305 simdscalari PrimitiveID; // IN: input primitive ID generated from the draw call
306 uint32_t InstanceID; // IN: input instance ID
307 simdscalari mask; // IN: Active mask for shader
308 uint8_t* pStream; // OUT: output stream (contains vertices for all output streams)
309 uint8_t* pCutOrStreamIdBuffer; // OUT: cut or stream id buffer
310 simdscalari vertexCount; // OUT: num vertices emitted per SIMD lane
311 };
312
313 struct PixelPositions
314 {
315 simdscalar UL;
316 simdscalar center;
317 simdscalar sample;
318 simdscalar centroid;
319 };
320
321 #define SWR_MAX_NUM_MULTISAMPLES 16
322
323 //////////////////////////////////////////////////////////////////////////
324 /// SWR_PS_CONTEXT
325 /// @brief Input to pixel shader.
326 /////////////////////////////////////////////////////////////////////////
327 struct SWR_PS_CONTEXT
328 {
329 PixelPositions vX; // IN: x location(s) of pixels
330 PixelPositions vY; // IN: x location(s) of pixels
331 simdscalar vZ; // INOUT: z location of pixels
332 simdscalari activeMask; // OUT: mask for kill
333 simdscalar inputMask; // IN: input coverage mask for all samples
334 simdscalari oMask; // OUT: mask for output coverage
335
336 PixelPositions vI; // barycentric coords evaluated at pixel center, sample position, centroid
337 PixelPositions vJ;
338 PixelPositions vOneOverW; // IN: 1/w
339
340 const float* pAttribs; // IN: pointer to attribute barycentric coefficients
341 const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients
342 const float* pRecipW; // IN: pointer to 1/w coord for each vertex
343 const float *I; // IN: Barycentric A, B, and C coefs used to compute I
344 const float *J; // IN: Barycentric A, B, and C coefs used to compute J
345 float recipDet; // IN: 1/Det, used when barycentric interpolating attributes
346 const float* pSamplePosX; // IN: array of sample positions
347 const float* pSamplePosY; // IN: array of sample positions
348 simdvector shaded[SWR_NUM_RENDERTARGETS];
349 // OUT: result color per rendertarget
350
351 uint32_t frontFace; // IN: front- 1, back- 0
352 uint32_t sampleIndex; // IN: sampleIndex
353 uint32_t renderTargetArrayIndex; // IN: render target array index from GS
354 uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
355
356 uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
357 };
358
359 //////////////////////////////////////////////////////////////////////////
360 /// SWR_CS_CONTEXT
361 /// @brief Input to compute shader.
362 /////////////////////////////////////////////////////////////////////////
363 struct SWR_CS_CONTEXT
364 {
365 // The ThreadGroupId is the current thread group index relative
366 // to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup,
367 // and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader.
368
369 // Compute shader accepts the following system values.
370 // o ThreadId - Current thread id relative to all other threads in dispatch.
371 // o ThreadGroupId - Current thread group id relative to all other groups in dispatch.
372 // o ThreadIdInGroup - Current thread relative to all threads in the current thread group.
373 // o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup.
374 //
375 // All of these system values can be computed in the shader. They will be
376 // derived from the current tile counter. The tile counter is an atomic counter that
377 // resides in the draw context and is initialized to the product of the dispatch dims.
378 //
379 // tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z
380 //
381 // Each CPU worker thread will atomically decrement this counter and passes the current
382 // count into the shader. When the count reaches 0 then all thread groups in the
383 // dispatch call have been completed.
384
385 uint32_t tileCounter; // The tile counter value for this thread group.
386
387 // Dispatch dimensions used by shader to compute system values from the tile counter.
388 uint32_t dispatchDims[3];
389
390 uint8_t* pTGSM; // Thread Group Shared Memory pointer.
391
392 uint8_t* pSpillFillBuffer; // Spill/fill buffer for barrier support
393
394 uint8_t* pScratchSpace; // Pointer to scratch space buffer used by the shader, shader is responsible
395 // for subdividing scratch space per instance/simd
396
397 uint32_t scratchSpacePerSimd; // Scratch space per work item x SIMD_WIDTH
398 };
399
400 // enums
401 enum SWR_TILE_MODE
402 {
403 SWR_TILE_NONE = 0x0, // Linear mode (no tiling)
404 SWR_TILE_MODE_WMAJOR, // W major tiling
405 SWR_TILE_MODE_XMAJOR, // X major tiling
406 SWR_TILE_MODE_YMAJOR, // Y major tiling
407 SWR_TILE_SWRZ, // SWR-Z tiling
408
409 SWR_TILE_MODE_COUNT
410 };
411
412 enum SWR_SURFACE_TYPE
413 {
414 SURFACE_1D = 0,
415 SURFACE_2D = 1,
416 SURFACE_3D = 2,
417 SURFACE_CUBE = 3,
418 SURFACE_BUFFER = 4,
419 SURFACE_STRUCTURED_BUFFER = 5,
420 SURFACE_NULL = 7
421 };
422
423 enum SWR_ZFUNCTION
424 {
425 ZFUNC_ALWAYS,
426 ZFUNC_NEVER,
427 ZFUNC_LT,
428 ZFUNC_EQ,
429 ZFUNC_LE,
430 ZFUNC_GT,
431 ZFUNC_NE,
432 ZFUNC_GE,
433 NUM_ZFUNC
434 };
435
436 enum SWR_STENCILOP
437 {
438 STENCILOP_KEEP,
439 STENCILOP_ZERO,
440 STENCILOP_REPLACE,
441 STENCILOP_INCRSAT,
442 STENCILOP_DECRSAT,
443 STENCILOP_INCR,
444 STENCILOP_DECR,
445 STENCILOP_INVERT
446 };
447
448 enum SWR_BLEND_FACTOR
449 {
450 BLENDFACTOR_ONE,
451 BLENDFACTOR_SRC_COLOR,
452 BLENDFACTOR_SRC_ALPHA,
453 BLENDFACTOR_DST_ALPHA,
454 BLENDFACTOR_DST_COLOR,
455 BLENDFACTOR_SRC_ALPHA_SATURATE,
456 BLENDFACTOR_CONST_COLOR,
457 BLENDFACTOR_CONST_ALPHA,
458 BLENDFACTOR_SRC1_COLOR,
459 BLENDFACTOR_SRC1_ALPHA,
460 BLENDFACTOR_ZERO,
461 BLENDFACTOR_INV_SRC_COLOR,
462 BLENDFACTOR_INV_SRC_ALPHA,
463 BLENDFACTOR_INV_DST_ALPHA,
464 BLENDFACTOR_INV_DST_COLOR,
465 BLENDFACTOR_INV_CONST_COLOR,
466 BLENDFACTOR_INV_CONST_ALPHA,
467 BLENDFACTOR_INV_SRC1_COLOR,
468 BLENDFACTOR_INV_SRC1_ALPHA
469 };
470
471 enum SWR_BLEND_OP
472 {
473 BLENDOP_ADD,
474 BLENDOP_SUBTRACT,
475 BLENDOP_REVSUBTRACT,
476 BLENDOP_MIN,
477 BLENDOP_MAX,
478 };
479
480 enum SWR_LOGIC_OP
481 {
482 LOGICOP_CLEAR,
483 LOGICOP_NOR,
484 LOGICOP_AND_INVERTED,
485 LOGICOP_COPY_INVERTED,
486 LOGICOP_AND_REVERSE,
487 LOGICOP_INVERT,
488 LOGICOP_XOR,
489 LOGICOP_NAND,
490 LOGICOP_AND,
491 LOGICOP_EQUIV,
492 LOGICOP_NOOP,
493 LOGICOP_OR_INVERTED,
494 LOGICOP_COPY,
495 LOGICOP_OR_REVERSE,
496 LOGICOP_OR,
497 LOGICOP_SET,
498 };
499
500 //////////////////////////////////////////////////////////////////////////
501 /// SWR_AUX_MODE
502 /// @brief Specifies how the auxiliary buffer is used by the driver.
503 //////////////////////////////////////////////////////////////////////////
504 enum SWR_AUX_MODE
505 {
506 AUX_MODE_NONE,
507 AUX_MODE_COLOR,
508 AUX_MODE_UAV,
509 AUX_MODE_DEPTH,
510 };
511
512 //////////////////////////////////////////////////////////////////////////
513 /// SWR_SURFACE_STATE
514 //////////////////////////////////////////////////////////////////////////
515 struct SWR_SURFACE_STATE
516 {
517 gfxptr_t xpBaseAddress;
518 SWR_SURFACE_TYPE type; // @llvm_enum
519 SWR_FORMAT format; // @llvm_enum
520 uint32_t width;
521 uint32_t height;
522 uint32_t depth;
523 uint32_t numSamples;
524 uint32_t samplePattern;
525 uint32_t pitch;
526 uint32_t qpitch;
527 uint32_t minLod; // for sampled surfaces, the most detailed LOD that can be accessed by sampler
528 uint32_t maxLod; // for sampled surfaces, the max LOD that can be accessed
529 float resourceMinLod; // for sampled surfaces, the most detailed fractional mip that can be accessed by sampler
530 uint32_t lod; // for render targets, the lod being rendered to
531 uint32_t arrayIndex; // for render targets, the array index being rendered to for arrayed surfaces
532 SWR_TILE_MODE tileMode; // @llvm_enum
533 uint32_t halign;
534 uint32_t valign;
535 uint32_t xOffset;
536 uint32_t yOffset;
537
538 uint32_t lodOffsets[2][15]; // lod offsets for sampled surfaces
539
540 gfxptr_t xpAuxBaseAddress; // Used for compression, append/consume counter, etc.
541 SWR_AUX_MODE auxMode; // @llvm_enum
542
543
544 bool bInterleavedSamples; // are MSAA samples stored interleaved or planar
545 };
546
547 // vertex fetch state
548 // WARNING- any changes to this struct need to be reflected
549 // in the fetch shader jit
550 struct SWR_VERTEX_BUFFER_STATE
551 {
552 uint32_t index;
553 uint32_t pitch;
554 const uint8_t *pData;
555 uint32_t size;
556 uint32_t numaNode;
557 uint32_t minVertex; // min vertex (for bounds checking)
558 uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks
559 uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for partially OOB vertices
560 };
561
562 struct SWR_INDEX_BUFFER_STATE
563 {
564 // Format type for indices (e.g. UINT16, UINT32, etc.)
565 SWR_FORMAT format; // @llvm_enum
566 const void *pIndices;
567 uint32_t size;
568 };
569
570
571 //////////////////////////////////////////////////////////////////////////
572 /// SWR_FETCH_CONTEXT
573 /// @brief Input to fetch shader.
574 /// @note WARNING - Changes to this struct need to be reflected in the
575 /// fetch shader jit.
576 /////////////////////////////////////////////////////////////////////////
577 struct SWR_FETCH_CONTEXT
578 {
579 const SWR_VERTEX_BUFFER_STATE* pStreams; // IN: array of bound vertex buffers
580 const int32_t* pIndices; // IN: pointer to index buffer for indexed draws
581 const int32_t* pLastIndex; // IN: pointer to end of index buffer, used for bounds checking
582 uint32_t CurInstance; // IN: current instance
583 uint32_t BaseVertex; // IN: base vertex
584 uint32_t StartVertex; // IN: start vertex
585 uint32_t StartInstance; // IN: start instance
586 simdscalari VertexID; // OUT: vector of vertex IDs
587 simdscalari CutMask; // OUT: vector mask of indices which have the cut index value
588 #if USE_SIMD16_SHADERS
589 // simd16scalari VertexID; // OUT: vector of vertex IDs
590 // simd16scalari CutMask; // OUT: vector mask of indices which have the cut index value
591 simdscalari VertexID2; // OUT: vector of vertex IDs
592 simdscalari CutMask2; // OUT: vector mask of indices which have the cut index value
593 #endif
594 };
595
596 //////////////////////////////////////////////////////////////////////////
597 /// SWR_STATS
598 ///
599 /// @brief All statistics generated by SWR go here. These are public
600 /// to driver.
601 /////////////////////////////////////////////////////////////////////////
602 OSALIGNLINE(struct) SWR_STATS
603 {
604 // Occlusion Query
605 uint64_t DepthPassCount; // Number of passing depth tests. Not exact.
606
607 // Pipeline Stats
608 uint64_t PsInvocations; // Number of Pixel Shader invocations
609 uint64_t CsInvocations; // Number of Compute Shader invocations
610
611 };
612
613 //////////////////////////////////////////////////////////////////////////
614 /// SWR_STATS
615 ///
616 /// @brief All statistics generated by FE.
617 /////////////////////////////////////////////////////////////////////////
618 OSALIGNLINE(struct) SWR_STATS_FE
619 {
620 uint64_t IaVertices; // Number of Fetch Shader vertices
621 uint64_t IaPrimitives; // Number of PA primitives.
622 uint64_t VsInvocations; // Number of Vertex Shader invocations
623 uint64_t HsInvocations; // Number of Hull Shader invocations
624 uint64_t DsInvocations; // Number of Domain Shader invocations
625 uint64_t GsInvocations; // Number of Geometry Shader invocations
626 uint64_t GsPrimitives; // Number of prims GS outputs.
627 uint64_t CInvocations; // Number of clipper invocations
628 uint64_t CPrimitives; // Number of clipper primitives.
629
630 // Streamout Stats
631 uint64_t SoPrimStorageNeeded[4];
632 uint64_t SoNumPrimsWritten[4];
633 };
634
635 //////////////////////////////////////////////////////////////////////////
636 /// STREAMOUT_BUFFERS
637 /////////////////////////////////////////////////////////////////////////
638
639 #define MAX_SO_STREAMS 4
640 #define MAX_SO_BUFFERS 4
641 #define MAX_ATTRIBUTES 32
642
643 struct SWR_STREAMOUT_BUFFER
644 {
645 bool enable;
646 bool soWriteEnable;
647
648 // Pointers to streamout buffers.
649 uint32_t* pBuffer;
650
651 // Size of buffer in dwords.
652 uint32_t bufferSize;
653
654 // Vertex pitch of buffer in dwords.
655 uint32_t pitch;
656
657 // Offset into buffer in dwords. SOS will increment this offset.
658 uint32_t streamOffset;
659
660 // Offset to the SO write offset. If not null then we update offset here.
661 uint32_t* pWriteOffset;
662
663 };
664
665 //////////////////////////////////////////////////////////////////////////
666 /// STREAMOUT_STATE
667 /////////////////////////////////////////////////////////////////////////
668 struct SWR_STREAMOUT_STATE
669 {
670 // This disables stream output.
671 bool soEnable;
672
673 // which streams are enabled for streamout
674 bool streamEnable[MAX_SO_STREAMS];
675
676 // If set then do not send any streams to the rasterizer.
677 bool rasterizerDisable;
678
679 // Specifies which stream to send to the rasterizer.
680 uint32_t streamToRasterizer;
681
682 // The stream masks specify which attributes are sent to which streams.
683 // These masks help the FE to setup the pPrimData buffer that is passed
684 // the Stream Output Shader (SOS) function.
685 uint32_t streamMasks[MAX_SO_STREAMS];
686
687 // Number of attributes, including position, per vertex that are streamed out.
688 // This should match number of bits in stream mask.
689 uint32_t streamNumEntries[MAX_SO_STREAMS];
690
691 // Offset to the start of the attributes of the input vertices, in simdvector units
692 uint32_t vertexAttribOffset[MAX_SO_STREAMS];
693 };
694
695 //////////////////////////////////////////////////////////////////////////
696 /// STREAMOUT_CONTEXT - Passed to SOS
697 /////////////////////////////////////////////////////////////////////////
698 struct SWR_STREAMOUT_CONTEXT
699 {
700 uint32_t* pPrimData;
701 SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS];
702
703 // Num prims written for this stream
704 uint32_t numPrimsWritten;
705
706 // Num prims that should have been written if there were no overflow.
707 uint32_t numPrimStorageNeeded;
708 };
709
710 //////////////////////////////////////////////////////////////////////////
711 /// SWR_GS_STATE - Geometry shader state
712 /////////////////////////////////////////////////////////////////////////
713 struct SWR_GS_STATE
714 {
715 bool gsEnable;
716
717 // number of input attributes per vertex. used by the frontend to
718 // optimize assembling primitives for GS
719 uint32_t numInputAttribs;
720
721 // output topology - can be point, tristrip, or linestrip
722 PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum
723
724 // maximum number of verts that can be emitted by a single instance of the GS
725 uint32_t maxNumVerts;
726
727 // instance count
728 uint32_t instanceCount;
729
730 // if true, geometry shader emits a single stream, with separate cut buffer.
731 // if false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
732 // to map vertices to streams
733 bool isSingleStream;
734
735 // when single stream is enabled, singleStreamID dictates which stream is being output.
736 // field ignored if isSingleStream is false
737 uint32_t singleStreamID;
738
739 // Offset to the start of the attributes of the input vertices, in simdvector units
740 uint32_t vertexAttribOffset;
741 };
742
743
744 //////////////////////////////////////////////////////////////////////////
745 /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
746 /////////////////////////////////////////////////////////////////////////
747 enum SWR_TS_OUTPUT_TOPOLOGY
748 {
749 SWR_TS_OUTPUT_POINT,
750 SWR_TS_OUTPUT_LINE,
751 SWR_TS_OUTPUT_TRI_CW,
752 SWR_TS_OUTPUT_TRI_CCW,
753
754 SWR_TS_OUTPUT_TOPOLOGY_COUNT
755 };
756
757 //////////////////////////////////////////////////////////////////////////
758 /// SWR_TS_PARTITIONING - Defines tessellation algorithm
759 /////////////////////////////////////////////////////////////////////////
760 enum SWR_TS_PARTITIONING
761 {
762 SWR_TS_INTEGER,
763 SWR_TS_ODD_FRACTIONAL,
764 SWR_TS_EVEN_FRACTIONAL,
765
766 SWR_TS_PARTITIONING_COUNT
767 };
768
769 //////////////////////////////////////////////////////////////////////////
770 /// SWR_TS_DOMAIN - Defines Tessellation Domain
771 /////////////////////////////////////////////////////////////////////////
772 enum SWR_TS_DOMAIN
773 {
774 SWR_TS_QUAD,
775 SWR_TS_TRI,
776 SWR_TS_ISOLINE,
777
778 SWR_TS_DOMAIN_COUNT
779 };
780
781 //////////////////////////////////////////////////////////////////////////
782 /// SWR_TS_STATE - Tessellation state
783 /////////////////////////////////////////////////////////////////////////
784 struct SWR_TS_STATE
785 {
786 bool tsEnable;
787 SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum
788 SWR_TS_PARTITIONING partitioning; // @llvm_enum
789 SWR_TS_DOMAIN domain; // @llvm_enum
790
791 PRIMITIVE_TOPOLOGY postDSTopology; // @llvm_enum
792
793 uint32_t numHsInputAttribs;
794 uint32_t numHsOutputAttribs;
795 uint32_t numDsOutputAttribs;
796
797 // Offset to the start of the attributes of the input vertices, in simdvector units
798 uint32_t vertexAttribOffset;
799 };
800
801 // output merger state
802 struct SWR_RENDER_TARGET_BLEND_STATE
803 {
804 uint8_t writeDisableRed : 1;
805 uint8_t writeDisableGreen : 1;
806 uint8_t writeDisableBlue : 1;
807 uint8_t writeDisableAlpha : 1;
808 };
809 static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1, "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
810
811 enum SWR_MULTISAMPLE_COUNT
812 {
813 SWR_MULTISAMPLE_1X = 0,
814 SWR_MULTISAMPLE_2X,
815 SWR_MULTISAMPLE_4X,
816 SWR_MULTISAMPLE_8X,
817 SWR_MULTISAMPLE_16X,
818 SWR_MULTISAMPLE_TYPE_COUNT
819 };
820
821 INLINE uint32_t GetNumSamples(SWR_MULTISAMPLE_COUNT sampleCount) // @llvm_func_start
822 {
823 static const uint32_t sampleCountLUT[SWR_MULTISAMPLE_TYPE_COUNT] {1, 2, 4, 8, 16};
824 assert(sampleCount < SWR_MULTISAMPLE_TYPE_COUNT);
825 return sampleCountLUT[sampleCount];
826 } // @llvm_func_end
827
828 struct SWR_BLEND_STATE
829 {
830 // constant blend factor color in RGBA float
831 float constantColor[4];
832
833 // alpha test reference value in unorm8 or float32
834 uint32_t alphaTestReference;
835 uint32_t sampleMask;
836 // all RT's have the same sample count
837 ///@todo move this to Output Merger state when we refactor
838 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
839
840 SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
841 };
842 static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
843
844 //////////////////////////////////////////////////////////////////////////
845 /// FUNCTION POINTERS FOR SHADERS
846
847 #if USE_SIMD16_SHADERS
848 typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
849 #else
850 typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
851 #endif
852 typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext);
853 typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext);
854 typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext);
855 typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, SWR_GS_CONTEXT* pGsContext);
856 typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsContext);
857 typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
858 typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
859 typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
860 typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*,
861 simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t sample,
862 uint8_t* pDst, simdvector& vResult, simdscalari* vOMask, simdscalari* vCoverageMask);
863 typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &);
864
865
866
867 //////////////////////////////////////////////////////////////////////////
868 /// FRONTEND_STATE
869 /////////////////////////////////////////////////////////////////////////
870 struct SWR_FRONTEND_STATE
871 {
872 // skip clip test, perspective divide, and viewport transform
873 // intended for verts in screen space
874 bool vpTransformDisable;
875 bool bEnableCutIndex;
876 union
877 {
878 struct
879 {
880 uint32_t triFan : 2;
881 uint32_t lineStripList : 1;
882 uint32_t triStripList : 2;
883 };
884 uint32_t bits;
885 } provokingVertex;
886 uint32_t topologyProvokingVertex; // provoking vertex for the draw topology
887
888 // Size of a vertex in simdvector units. Should be sized to the
889 // maximum of the input/output of the vertex shader.
890 uint32_t vsVertexSize;
891 };
892
893 //////////////////////////////////////////////////////////////////////////
894 /// VIEWPORT_MATRIX
895 /////////////////////////////////////////////////////////////////////////
896 struct SWR_VIEWPORT_MATRIX
897 {
898 float m00;
899 float m11;
900 float m22;
901 float m30;
902 float m31;
903 float m32;
904 };
905
906 //////////////////////////////////////////////////////////////////////////
907 /// VIEWPORT_MATRIXES
908 /////////////////////////////////////////////////////////////////////////
909 struct SWR_VIEWPORT_MATRICES
910 {
911 float m00[KNOB_NUM_VIEWPORTS_SCISSORS];
912 float m11[KNOB_NUM_VIEWPORTS_SCISSORS];
913 float m22[KNOB_NUM_VIEWPORTS_SCISSORS];
914 float m30[KNOB_NUM_VIEWPORTS_SCISSORS];
915 float m31[KNOB_NUM_VIEWPORTS_SCISSORS];
916 float m32[KNOB_NUM_VIEWPORTS_SCISSORS];
917 };
918
919 //////////////////////////////////////////////////////////////////////////
920 /// SWR_VIEWPORT
921 /////////////////////////////////////////////////////////////////////////
922 struct SWR_VIEWPORT
923 {
924 float x;
925 float y;
926 float width;
927 float height;
928 float minZ;
929 float maxZ;
930 };
931
932 //////////////////////////////////////////////////////////////////////////
933 /// SWR_CULLMODE
934 //////////////////////////////////////////////////////////////////////////
935 enum SWR_CULLMODE
936 {
937 SWR_CULLMODE_BOTH,
938 SWR_CULLMODE_NONE,
939 SWR_CULLMODE_FRONT,
940 SWR_CULLMODE_BACK
941 };
942
943 enum SWR_FILLMODE
944 {
945 SWR_FILLMODE_POINT,
946 SWR_FILLMODE_WIREFRAME,
947 SWR_FILLMODE_SOLID
948 };
949
950 enum SWR_FRONTWINDING
951 {
952 SWR_FRONTWINDING_CW,
953 SWR_FRONTWINDING_CCW
954 };
955
956
957 enum SWR_PIXEL_LOCATION
958 {
959 SWR_PIXEL_LOCATION_CENTER,
960 SWR_PIXEL_LOCATION_UL,
961 };
962
963 // fixed point screen space sample locations within a pixel
964 struct SWR_MULTISAMPLE_POS
965 {
966 public:
967 INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func
968 INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func
969 INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func
970 INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func
971 INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func
972 INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func
973 INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func
974 INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func
975 typedef const float(&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef
976 INLINE sampleArrayT X() const { return _x; }; // @llvm_func
977 INLINE sampleArrayT Y() const { return _y; }; // @llvm_func
978 INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func
979 INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func
980 INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func
981 INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
982 INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
983 INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
984
985 INLINE void PrecalcSampleData(int numSamples); //@llvm_func
986
987 private:
988 template <typename MaskT>
989 INLINE __m128i expandThenBlend4(uint32_t* min, uint32_t* max); // @llvm_func
990 INLINE void CalcTileSampleOffsets(int numSamples); // @llvm_func
991
992 // scalar sample values
993 uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES];
994 uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES];
995 float _x[SWR_MAX_NUM_MULTISAMPLES];
996 float _y[SWR_MAX_NUM_MULTISAMPLES];
997
998 // precalc'd / vectorized samples
999 __m128i _vXi[SWR_MAX_NUM_MULTISAMPLES];
1000 __m128i _vYi[SWR_MAX_NUM_MULTISAMPLES];
1001 simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES];
1002 simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES];
1003 __m128i tileSampleOffsetsX;
1004 __m128i tileSampleOffsetsY;
1005 };
1006
1007 //////////////////////////////////////////////////////////////////////////
1008 /// SWR_RASTSTATE
1009 //////////////////////////////////////////////////////////////////////////
1010 struct SWR_RASTSTATE
1011 {
1012 uint32_t cullMode : 2;
1013 uint32_t fillMode : 2;
1014 uint32_t frontWinding : 1;
1015 uint32_t scissorEnable : 1;
1016 uint32_t depthClipEnable : 1;
1017 uint32_t clipHalfZ : 1;
1018 uint32_t pointParam : 1;
1019 uint32_t pointSpriteEnable : 1;
1020 uint32_t pointSpriteTopOrigin : 1;
1021 uint32_t forcedSampleCount : 1;
1022 uint32_t pixelOffset : 1;
1023 uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
1024 uint32_t conservativeRast : 1;
1025
1026 float pointSize;
1027 float lineWidth;
1028
1029 float depthBias;
1030 float slopeScaledDepthBias;
1031 float depthBiasClamp;
1032 SWR_FORMAT depthFormat; // @llvm_enum
1033
1034 // sample count the rasterizer is running at
1035 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
1036 uint32_t pixelLocation; // UL or Center
1037 SWR_MULTISAMPLE_POS samplePositions; // @llvm_struct
1038 bool bIsCenterPattern; // @llvm_enum
1039 };
1040
1041
1042 enum SWR_CONSTANT_SOURCE
1043 {
1044 SWR_CONSTANT_SOURCE_CONST_0000,
1045 SWR_CONSTANT_SOURCE_CONST_0001_FLOAT,
1046 SWR_CONSTANT_SOURCE_CONST_1111_FLOAT,
1047 SWR_CONSTANT_SOURCE_PRIM_ID
1048 };
1049
1050 struct SWR_ATTRIB_SWIZZLE
1051 {
1052 uint16_t sourceAttrib : 5; // source attribute
1053 uint16_t constantSource : 2; // constant source to apply
1054 uint16_t componentOverrideMask : 4; // override component with constant source
1055 };
1056
1057 // backend state
1058 struct SWR_BACKEND_STATE
1059 {
1060 uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant interpolation
1061 uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be interpreted as tex coordinates
1062
1063 uint8_t numAttributes; // total number of attributes to send to backend (up to 32)
1064 uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some calculations for unneeded components
1065
1066 bool swizzleEnable; // when enabled, core will parse the swizzle map when
1067 // setting up attributes for the backend, otherwise
1068 // all attributes up to numAttributes will be sent
1069 SWR_ATTRIB_SWIZZLE swizzleMap[32];
1070
1071 bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the backend
1072 bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning
1073
1074 // Offset to the start of the attributes of the input vertices, in simdvector units
1075 uint32_t vertexAttribOffset;
1076
1077 // User clip/cull distance enables
1078 uint8_t cullDistanceMask;
1079 uint8_t clipDistanceMask;
1080
1081 // Offset to clip/cull attrib section of the vertex, in simdvector units
1082 uint32_t vertexClipCullOffset;
1083 };
1084
1085
1086 union SWR_DEPTH_STENCIL_STATE
1087 {
1088 struct
1089 {
1090 // dword 0
1091 uint32_t depthWriteEnable : 1;
1092 uint32_t depthTestEnable : 1;
1093 uint32_t stencilWriteEnable : 1;
1094 uint32_t stencilTestEnable : 1;
1095 uint32_t doubleSidedStencilTestEnable : 1;
1096
1097 uint32_t depthTestFunc : 3;
1098 uint32_t stencilTestFunc : 3;
1099
1100 uint32_t backfaceStencilPassDepthPassOp : 3;
1101 uint32_t backfaceStencilPassDepthFailOp : 3;
1102 uint32_t backfaceStencilFailOp : 3;
1103 uint32_t backfaceStencilTestFunc : 3;
1104 uint32_t stencilPassDepthPassOp : 3;
1105 uint32_t stencilPassDepthFailOp : 3;
1106 uint32_t stencilFailOp : 3;
1107
1108 // dword 1
1109 uint8_t backfaceStencilWriteMask;
1110 uint8_t backfaceStencilTestMask;
1111 uint8_t stencilWriteMask;
1112 uint8_t stencilTestMask;
1113
1114 // dword 2
1115 uint8_t backfaceStencilRefValue;
1116 uint8_t stencilRefValue;
1117 };
1118 uint32_t value[3];
1119 };
1120
1121 enum SWR_SHADING_RATE
1122 {
1123 SWR_SHADING_RATE_PIXEL,
1124 SWR_SHADING_RATE_SAMPLE,
1125 SWR_SHADING_RATE_COUNT,
1126 };
1127
1128 enum SWR_INPUT_COVERAGE
1129 {
1130 SWR_INPUT_COVERAGE_NONE,
1131 SWR_INPUT_COVERAGE_NORMAL,
1132 SWR_INPUT_COVERAGE_INNER_CONSERVATIVE,
1133 SWR_INPUT_COVERAGE_COUNT,
1134 };
1135
1136 enum SWR_PS_POSITION_OFFSET
1137 {
1138 SWR_PS_POSITION_SAMPLE_NONE,
1139 SWR_PS_POSITION_SAMPLE_OFFSET,
1140 SWR_PS_POSITION_CENTROID_OFFSET,
1141 SWR_PS_POSITION_OFFSET_COUNT,
1142 };
1143
1144 enum SWR_BARYCENTRICS_MASK
1145 {
1146 SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1,
1147 SWR_BARYCENTRIC_CENTROID_MASK = 0x2,
1148 SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4,
1149 };
1150
1151 // pixel shader state
1152 struct SWR_PS_STATE
1153 {
1154 // dword 0-1
1155 PFN_PIXEL_KERNEL pfnPixelShader; // @llvm_pfn
1156
1157 // dword 2
1158 uint32_t killsPixel : 1; // pixel shader can kill pixels
1159 uint32_t inputCoverage : 2; // ps uses input coverage
1160 uint32_t writesODepth : 1; // pixel shader writes to depth
1161 uint32_t usesSourceDepth : 1; // pixel shader reads depth
1162 uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel
1163 uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position
1164 uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate attributes with
1165 uint32_t usesUAV : 1; // pixel shader accesses UAV
1166 uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test
1167
1168 uint8_t renderTargetMask; // Mask of render targets written
1169 };
1170
1171 // depth bounds state
1172 struct SWR_DEPTH_BOUNDS_STATE
1173 {
1174 bool depthBoundsTestEnable;
1175 float depthBoundsTestMinValue;
1176 float depthBoundsTestMaxValue;
1177 };
1178