swr: [rasterizer core] SIMD16 Frontend WIP
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / state.h
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file state.h
24 *
25 * @brief Definitions for API state.
26 *
27 ******************************************************************************/
28 #pragma once
29
30 #include "common/formats.h"
31 #include "common/simdintrin.h"
32 #include <functional>
33 #include <algorithm>
34
35 //////////////////////////////////////////////////////////////////////////
36 /// PRIMITIVE_TOPOLOGY.
37 //////////////////////////////////////////////////////////////////////////
38 enum PRIMITIVE_TOPOLOGY
39 {
40 TOP_UNKNOWN = 0x0,
41 TOP_POINT_LIST = 0x1,
42 TOP_LINE_LIST = 0x2,
43 TOP_LINE_STRIP = 0x3,
44 TOP_TRIANGLE_LIST = 0x4,
45 TOP_TRIANGLE_STRIP = 0x5,
46 TOP_TRIANGLE_FAN = 0x6,
47 TOP_QUAD_LIST = 0x7,
48 TOP_QUAD_STRIP = 0x8,
49 TOP_LINE_LIST_ADJ = 0x9,
50 TOP_LISTSTRIP_ADJ = 0xA,
51 TOP_TRI_LIST_ADJ = 0xB,
52 TOP_TRI_STRIP_ADJ = 0xC,
53 TOP_TRI_STRIP_REVERSE = 0xD,
54 TOP_POLYGON = 0xE,
55 TOP_RECT_LIST = 0xF,
56 TOP_LINE_LOOP = 0x10,
57 TOP_POINT_LIST_BF = 0x11,
58 TOP_LINE_STRIP_CONT = 0x12,
59 TOP_LINE_STRIP_BF = 0x13,
60 TOP_LINE_STRIP_CONT_BF = 0x14,
61 TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16,
62 TOP_TRIANGLE_DISC = 0x17, /// @todo What is this??
63
64 TOP_PATCHLIST_BASE = 0x1F, // Invalid topology, used to calculate num verts for a patchlist.
65 TOP_PATCHLIST_1 = 0x20, // List of 1-vertex patches
66 TOP_PATCHLIST_2 = 0x21,
67 TOP_PATCHLIST_3 = 0x22,
68 TOP_PATCHLIST_4 = 0x23,
69 TOP_PATCHLIST_5 = 0x24,
70 TOP_PATCHLIST_6 = 0x25,
71 TOP_PATCHLIST_7 = 0x26,
72 TOP_PATCHLIST_8 = 0x27,
73 TOP_PATCHLIST_9 = 0x28,
74 TOP_PATCHLIST_10 = 0x29,
75 TOP_PATCHLIST_11 = 0x2A,
76 TOP_PATCHLIST_12 = 0x2B,
77 TOP_PATCHLIST_13 = 0x2C,
78 TOP_PATCHLIST_14 = 0x2D,
79 TOP_PATCHLIST_15 = 0x2E,
80 TOP_PATCHLIST_16 = 0x2F,
81 TOP_PATCHLIST_17 = 0x30,
82 TOP_PATCHLIST_18 = 0x31,
83 TOP_PATCHLIST_19 = 0x32,
84 TOP_PATCHLIST_20 = 0x33,
85 TOP_PATCHLIST_21 = 0x34,
86 TOP_PATCHLIST_22 = 0x35,
87 TOP_PATCHLIST_23 = 0x36,
88 TOP_PATCHLIST_24 = 0x37,
89 TOP_PATCHLIST_25 = 0x38,
90 TOP_PATCHLIST_26 = 0x39,
91 TOP_PATCHLIST_27 = 0x3A,
92 TOP_PATCHLIST_28 = 0x3B,
93 TOP_PATCHLIST_29 = 0x3C,
94 TOP_PATCHLIST_30 = 0x3D,
95 TOP_PATCHLIST_31 = 0x3E,
96 TOP_PATCHLIST_32 = 0x3F, // List of 32-vertex patches
97 };
98
99 //////////////////////////////////////////////////////////////////////////
100 /// SWR_SHADER_TYPE
101 //////////////////////////////////////////////////////////////////////////
102 enum SWR_SHADER_TYPE
103 {
104 SHADER_VERTEX,
105 SHADER_GEOMETRY,
106 SHADER_DOMAIN,
107 SHADER_HULL,
108 SHADER_PIXEL,
109 SHADER_COMPUTE,
110
111 NUM_SHADER_TYPES,
112 };
113
114 //////////////////////////////////////////////////////////////////////////
115 /// SWR_RENDERTARGET_ATTACHMENT
116 /// @todo Its not clear what an "attachment" means. Its not common term.
117 //////////////////////////////////////////////////////////////////////////
118 enum SWR_RENDERTARGET_ATTACHMENT
119 {
120 SWR_ATTACHMENT_COLOR0,
121 SWR_ATTACHMENT_COLOR1,
122 SWR_ATTACHMENT_COLOR2,
123 SWR_ATTACHMENT_COLOR3,
124 SWR_ATTACHMENT_COLOR4,
125 SWR_ATTACHMENT_COLOR5,
126 SWR_ATTACHMENT_COLOR6,
127 SWR_ATTACHMENT_COLOR7,
128 SWR_ATTACHMENT_DEPTH,
129 SWR_ATTACHMENT_STENCIL,
130
131 SWR_NUM_ATTACHMENTS
132 };
133
134 #define SWR_NUM_RENDERTARGETS 8
135
136 #define SWR_ATTACHMENT_COLOR0_BIT 0x001
137 #define SWR_ATTACHMENT_COLOR1_BIT 0x002
138 #define SWR_ATTACHMENT_COLOR2_BIT 0x004
139 #define SWR_ATTACHMENT_COLOR3_BIT 0x008
140 #define SWR_ATTACHMENT_COLOR4_BIT 0x010
141 #define SWR_ATTACHMENT_COLOR5_BIT 0x020
142 #define SWR_ATTACHMENT_COLOR6_BIT 0x040
143 #define SWR_ATTACHMENT_COLOR7_BIT 0x080
144 #define SWR_ATTACHMENT_DEPTH_BIT 0x100
145 #define SWR_ATTACHMENT_STENCIL_BIT 0x200
146 #define SWR_ATTACHMENT_MASK_ALL 0x3ff
147 #define SWR_ATTACHMENT_MASK_COLOR 0x0ff
148
149
150 //////////////////////////////////////////////////////////////////////////
151 /// @brief SWR Inner Tessellation factor ID
152 /// See above GetTessFactorOutputPosition code for documentation
153 enum SWR_INNER_TESSFACTOR_ID
154 {
155 SWR_QUAD_U_TRI_INSIDE,
156 SWR_QUAD_V_INSIDE,
157
158 SWR_NUM_INNER_TESS_FACTORS,
159 };
160
161 //////////////////////////////////////////////////////////////////////////
162 /// @brief SWR Outer Tessellation factor ID
163 /// See above GetTessFactorOutputPosition code for documentation
164 enum SWR_OUTER_TESSFACTOR_ID
165 {
166 SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL,
167 SWR_QUAD_V_EQ0_TRI_V_LINE_DENSITY,
168 SWR_QUAD_U_EQ1_TRI_W,
169 SWR_QUAD_V_EQ1,
170
171 SWR_NUM_OUTER_TESS_FACTORS,
172 };
173
174
175 /////////////////////////////////////////////////////////////////////////
176 /// simdvertex
177 /// @brief Defines a vertex element that holds all the data for SIMD vertices.
178 /// Contains position in clip space, hardcoded to attribute 0,
179 /// space for up to 32 attributes, as well as any SGV values generated
180 /// by the pipeline
181 /////////////////////////////////////////////////////////////////////////
182 #define VERTEX_POSITION_SLOT 0
183 #define VERTEX_ATTRIB_START_SLOT 1
184 #define VERTEX_ATTRIB_END_SLOT 32
185 #define VERTEX_RTAI_SLOT 33 // GS writes RenderTargetArrayIndex here
186 #define VERTEX_PRIMID_SLOT 34 // GS writes PrimId here
187 #define VERTEX_CLIPCULL_DIST_LO_SLOT 35 // VS writes lower 4 clip/cull dist
188 #define VERTEX_CLIPCULL_DIST_HI_SLOT 36 // VS writes upper 4 clip/cull dist
189 #define VERTEX_POINT_SIZE_SLOT 37 // VS writes point size here
190 #define VERTEX_VIEWPORT_ARRAY_INDEX_SLOT 38
191 // SoAoSoA
192 struct simdvertex
193 {
194 simdvector attrib[KNOB_NUM_ATTRIBUTES];
195 };
196
197 //////////////////////////////////////////////////////////////////////////
198 /// SWR_VS_CONTEXT
199 /// @brief Input to vertex shader
200 /////////////////////////////////////////////////////////////////////////
201 struct SWR_VS_CONTEXT
202 {
203 simdvertex* pVin; // IN: SIMD input vertex data store
204 simdvertex* pVout; // OUT: SIMD output vertex data store
205
206 uint32_t InstanceID; // IN: Instance ID, constant across all verts of the SIMD
207 simdscalari VertexID; // IN: Vertex ID
208 simdscalari mask; // IN: Active mask for shader
209 #if USE_SIMD16_FRONTEND
210 uint32_t AlternateOffset; // IN: amount to offset for interleaving even/odd simd8 in simd16vertex output
211 #endif
212 };
213
214 /////////////////////////////////////////////////////////////////////////
215 /// ScalarCPoint
216 /// @brief defines a control point element as passed from the output
217 /// of the hull shader to the input of the domain shader
218 /////////////////////////////////////////////////////////////////////////
219 struct ScalarAttrib
220 {
221 float x;
222 float y;
223 float z;
224 float w;
225 };
226
227 struct ScalarCPoint
228 {
229 ScalarAttrib attrib[KNOB_NUM_ATTRIBUTES];
230 };
231
232 //////////////////////////////////////////////////////////////////////////
233 /// SWR_TESSELLATION_FACTORS
234 /// @brief Tessellation factors structure (non-vector)
235 /////////////////////////////////////////////////////////////////////////
236 struct SWR_TESSELLATION_FACTORS
237 {
238 float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
239 float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
240 };
241
242 #define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
243 struct ScalarPatch
244 {
245 SWR_TESSELLATION_FACTORS tessFactors;
246 ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM];
247 ScalarCPoint patchData;
248 };
249
250 //////////////////////////////////////////////////////////////////////////
251 /// SWR_HS_CONTEXT
252 /// @brief Input to hull shader
253 /////////////////////////////////////////////////////////////////////////
254 struct SWR_HS_CONTEXT
255 {
256 simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
257 simdscalari PrimitiveID; // IN: (SIMD) primitive ID generated from the draw call
258 simdscalari mask; // IN: Active mask for shader
259 ScalarPatch* pCPout; // OUT: Output control point patch
260 // SIMD-sized-array of SCALAR patches
261 };
262
263 //////////////////////////////////////////////////////////////////////////
264 /// SWR_DS_CONTEXT
265 /// @brief Input to domain shader
266 /////////////////////////////////////////////////////////////////////////
267 struct SWR_DS_CONTEXT
268 {
269 uint32_t PrimitiveID; // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation
270 uint32_t vectorOffset; // IN: (SCALAR) vector index offset into SIMD data.
271 uint32_t vectorStride; // IN: (SCALAR) stride (in vectors) of output data per attribute-component
272 ScalarPatch* pCpIn; // IN: (SCALAR) Control patch
273 simdscalar* pDomainU; // IN: (SIMD) Domain Point U coords
274 simdscalar* pDomainV; // IN: (SIMD) Domain Point V coords
275 simdscalari mask; // IN: Active mask for shader
276 simdscalar* pOutputData; // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component)
277 };
278
279 //////////////////////////////////////////////////////////////////////////
280 /// SWR_GS_CONTEXT
281 /// @brief Input to geometry shader.
282 /////////////////////////////////////////////////////////////////////////
283 struct SWR_GS_CONTEXT
284 {
285 simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: input primitive data for SIMD prims
286 simdscalari PrimitiveID; // IN: input primitive ID generated from the draw call
287 uint32_t InstanceID; // IN: input instance ID
288 simdscalari mask; // IN: Active mask for shader
289 uint8_t* pStream; // OUT: output stream (contains vertices for all output streams)
290 uint8_t* pCutOrStreamIdBuffer; // OUT: cut or stream id buffer
291 simdscalari vertexCount; // OUT: num vertices emitted per SIMD lane
292 };
293
294 struct PixelPositions
295 {
296 simdscalar UL;
297 simdscalar center;
298 simdscalar sample;
299 simdscalar centroid;
300 };
301
302 #define SWR_MAX_NUM_MULTISAMPLES 16
303
304 //////////////////////////////////////////////////////////////////////////
305 /// SWR_PS_CONTEXT
306 /// @brief Input to pixel shader.
307 /////////////////////////////////////////////////////////////////////////
308 struct SWR_PS_CONTEXT
309 {
310 PixelPositions vX; // IN: x location(s) of pixels
311 PixelPositions vY; // IN: x location(s) of pixels
312 simdscalar vZ; // INOUT: z location of pixels
313 simdscalari activeMask; // OUT: mask for kill
314 simdscalar inputMask; // IN: input coverage mask for all samples
315 simdscalari oMask; // OUT: mask for output coverage
316
317 PixelPositions vI; // barycentric coords evaluated at pixel center, sample position, centroid
318 PixelPositions vJ;
319 PixelPositions vOneOverW; // IN: 1/w
320
321 const float* pAttribs; // IN: pointer to attribute barycentric coefficients
322 const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients
323 const float* pRecipW; // IN: pointer to 1/w coord for each vertex
324 const float *I; // IN: Barycentric A, B, and C coefs used to compute I
325 const float *J; // IN: Barycentric A, B, and C coefs used to compute J
326 float recipDet; // IN: 1/Det, used when barycentric interpolating attributes
327 const float* pSamplePosX; // IN: array of sample positions
328 const float* pSamplePosY; // IN: array of sample positions
329 simdvector shaded[SWR_NUM_RENDERTARGETS];
330 // OUT: result color per rendertarget
331
332 uint32_t frontFace; // IN: front- 1, back- 0
333 uint32_t primID; // IN: primitive ID
334 uint32_t sampleIndex; // IN: sampleIndex
335
336 uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
337
338 uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
339 };
340
341 //////////////////////////////////////////////////////////////////////////
342 /// SWR_CS_CONTEXT
343 /// @brief Input to compute shader.
344 /////////////////////////////////////////////////////////////////////////
345 struct SWR_CS_CONTEXT
346 {
347 // The ThreadGroupId is the current thread group index relative
348 // to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup,
349 // and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader.
350
351 // Compute shader accepts the following system values.
352 // o ThreadId - Current thread id relative to all other threads in dispatch.
353 // o ThreadGroupId - Current thread group id relative to all other groups in dispatch.
354 // o ThreadIdInGroup - Current thread relative to all threads in the current thread group.
355 // o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup.
356 //
357 // All of these system values can be computed in the shader. They will be
358 // derived from the current tile counter. The tile counter is an atomic counter that
359 // resides in the draw context and is initialized to the product of the dispatch dims.
360 //
361 // tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z
362 //
363 // Each CPU worker thread will atomically decrement this counter and passes the current
364 // count into the shader. When the count reaches 0 then all thread groups in the
365 // dispatch call have been completed.
366
367 uint32_t tileCounter; // The tile counter value for this thread group.
368
369 // Dispatch dimensions used by shader to compute system values from the tile counter.
370 uint32_t dispatchDims[3];
371
372 uint8_t* pTGSM; // Thread Group Shared Memory pointer.
373
374 uint8_t* pSpillFillBuffer; // Spill/fill buffer for barrier support
375 };
376
377 // enums
378 enum SWR_TILE_MODE
379 {
380 SWR_TILE_NONE = 0x0, // Linear mode (no tiling)
381 SWR_TILE_MODE_WMAJOR, // W major tiling
382 SWR_TILE_MODE_XMAJOR, // X major tiling
383 SWR_TILE_MODE_YMAJOR, // Y major tiling
384 SWR_TILE_SWRZ, // SWR-Z tiling
385
386 SWR_TILE_MODE_COUNT
387 };
388
389 enum SWR_SURFACE_TYPE
390 {
391 SURFACE_1D = 0,
392 SURFACE_2D = 1,
393 SURFACE_3D = 2,
394 SURFACE_CUBE = 3,
395 SURFACE_BUFFER = 4,
396 SURFACE_STRUCTURED_BUFFER = 5,
397 SURFACE_NULL = 7
398 };
399
400 enum SWR_ZFUNCTION
401 {
402 ZFUNC_ALWAYS,
403 ZFUNC_NEVER,
404 ZFUNC_LT,
405 ZFUNC_EQ,
406 ZFUNC_LE,
407 ZFUNC_GT,
408 ZFUNC_NE,
409 ZFUNC_GE,
410 NUM_ZFUNC
411 };
412
413 enum SWR_STENCILOP
414 {
415 STENCILOP_KEEP,
416 STENCILOP_ZERO,
417 STENCILOP_REPLACE,
418 STENCILOP_INCRSAT,
419 STENCILOP_DECRSAT,
420 STENCILOP_INCR,
421 STENCILOP_DECR,
422 STENCILOP_INVERT
423 };
424
425 enum SWR_BLEND_FACTOR
426 {
427 BLENDFACTOR_ONE,
428 BLENDFACTOR_SRC_COLOR,
429 BLENDFACTOR_SRC_ALPHA,
430 BLENDFACTOR_DST_ALPHA,
431 BLENDFACTOR_DST_COLOR,
432 BLENDFACTOR_SRC_ALPHA_SATURATE,
433 BLENDFACTOR_CONST_COLOR,
434 BLENDFACTOR_CONST_ALPHA,
435 BLENDFACTOR_SRC1_COLOR,
436 BLENDFACTOR_SRC1_ALPHA,
437 BLENDFACTOR_ZERO,
438 BLENDFACTOR_INV_SRC_COLOR,
439 BLENDFACTOR_INV_SRC_ALPHA,
440 BLENDFACTOR_INV_DST_ALPHA,
441 BLENDFACTOR_INV_DST_COLOR,
442 BLENDFACTOR_INV_CONST_COLOR,
443 BLENDFACTOR_INV_CONST_ALPHA,
444 BLENDFACTOR_INV_SRC1_COLOR,
445 BLENDFACTOR_INV_SRC1_ALPHA
446 };
447
448 enum SWR_BLEND_OP
449 {
450 BLENDOP_ADD,
451 BLENDOP_SUBTRACT,
452 BLENDOP_REVSUBTRACT,
453 BLENDOP_MIN,
454 BLENDOP_MAX,
455 };
456
457 enum SWR_LOGIC_OP
458 {
459 LOGICOP_CLEAR,
460 LOGICOP_NOR,
461 LOGICOP_AND_INVERTED,
462 LOGICOP_COPY_INVERTED,
463 LOGICOP_AND_REVERSE,
464 LOGICOP_INVERT,
465 LOGICOP_XOR,
466 LOGICOP_NAND,
467 LOGICOP_AND,
468 LOGICOP_EQUIV,
469 LOGICOP_NOOP,
470 LOGICOP_OR_INVERTED,
471 LOGICOP_COPY,
472 LOGICOP_OR_REVERSE,
473 LOGICOP_OR,
474 LOGICOP_SET,
475 };
476
477 //////////////////////////////////////////////////////////////////////////
478 /// SWR_AUX_MODE
479 /// @brief Specifies how the auxiliary buffer is used by the driver.
480 //////////////////////////////////////////////////////////////////////////
481 enum SWR_AUX_MODE
482 {
483 AUX_MODE_NONE,
484 AUX_MODE_COLOR,
485 AUX_MODE_UAV,
486 AUX_MODE_DEPTH,
487 };
488
489 //////////////////////////////////////////////////////////////////////////
490 /// SWR_SURFACE_STATE
491 //////////////////////////////////////////////////////////////////////////
492 struct SWR_SURFACE_STATE
493 {
494 uint8_t *pBaseAddress;
495 SWR_SURFACE_TYPE type; // @llvm_enum
496 SWR_FORMAT format; // @llvm_enum
497 uint32_t width;
498 uint32_t height;
499 uint32_t depth;
500 uint32_t numSamples;
501 uint32_t samplePattern;
502 uint32_t pitch;
503 uint32_t qpitch;
504 uint32_t minLod; // for sampled surfaces, the most detailed LOD that can be accessed by sampler
505 uint32_t maxLod; // for sampled surfaces, the max LOD that can be accessed
506 float resourceMinLod; // for sampled surfaces, the most detailed fractional mip that can be accessed by sampler
507 uint32_t lod; // for render targets, the lod being rendered to
508 uint32_t arrayIndex; // for render targets, the array index being rendered to for arrayed surfaces
509 SWR_TILE_MODE tileMode; // @llvm_enum
510 uint32_t halign;
511 uint32_t valign;
512 uint32_t xOffset;
513 uint32_t yOffset;
514
515 uint32_t lodOffsets[2][15]; // lod offsets for sampled surfaces
516
517 uint8_t *pAuxBaseAddress; // Used for compression, append/consume counter, etc.
518 SWR_AUX_MODE auxMode; // @llvm_enum
519
520
521 bool bInterleavedSamples; // are MSAA samples stored interleaved or planar
522 };
523
524 // vertex fetch state
525 // WARNING- any changes to this struct need to be reflected
526 // in the fetch shader jit
527 struct SWR_VERTEX_BUFFER_STATE
528 {
529 uint32_t index;
530 uint32_t pitch;
531 const uint8_t *pData;
532 uint32_t size;
533 uint32_t numaNode;
534 uint32_t minVertex; // min vertex (for bounds checking)
535 uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks
536 uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for partially OOB vertices
537 };
538
539 struct SWR_INDEX_BUFFER_STATE
540 {
541 // Format type for indices (e.g. UINT16, UINT32, etc.)
542 SWR_FORMAT format; // @llvm_enum
543 const void *pIndices;
544 uint32_t size;
545 };
546
547
548 //////////////////////////////////////////////////////////////////////////
549 /// SWR_FETCH_CONTEXT
550 /// @brief Input to fetch shader.
551 /// @note WARNING - Changes to this struct need to be reflected in the
552 /// fetch shader jit.
553 /////////////////////////////////////////////////////////////////////////
554 struct SWR_FETCH_CONTEXT
555 {
556 const SWR_VERTEX_BUFFER_STATE* pStreams; // IN: array of bound vertex buffers
557 const int32_t* pIndices; // IN: pointer to index buffer for indexed draws
558 const int32_t* pLastIndex; // IN: pointer to end of index buffer, used for bounds checking
559 uint32_t CurInstance; // IN: current instance
560 uint32_t BaseVertex; // IN: base vertex
561 uint32_t StartVertex; // IN: start vertex
562 uint32_t StartInstance; // IN: start instance
563 simdscalari VertexID; // OUT: vector of vertex IDs
564 simdscalari CutMask; // OUT: vector mask of indices which have the cut index value
565 };
566
567 //////////////////////////////////////////////////////////////////////////
568 /// SWR_STATS
569 ///
570 /// @brief All statistics generated by SWR go here. These are public
571 /// to driver.
572 /////////////////////////////////////////////////////////////////////////
573 OSALIGNLINE(struct) SWR_STATS
574 {
575 // Occlusion Query
576 uint64_t DepthPassCount; // Number of passing depth tests. Not exact.
577
578 // Pipeline Stats
579 uint64_t PsInvocations; // Number of Pixel Shader invocations
580 uint64_t CsInvocations; // Number of Compute Shader invocations
581
582 };
583
584 //////////////////////////////////////////////////////////////////////////
585 /// SWR_STATS
586 ///
587 /// @brief All statistics generated by FE.
588 /////////////////////////////////////////////////////////////////////////
589 OSALIGNLINE(struct) SWR_STATS_FE
590 {
591 uint64_t IaVertices; // Number of Fetch Shader vertices
592 uint64_t IaPrimitives; // Number of PA primitives.
593 uint64_t VsInvocations; // Number of Vertex Shader invocations
594 uint64_t HsInvocations; // Number of Hull Shader invocations
595 uint64_t DsInvocations; // Number of Domain Shader invocations
596 uint64_t GsInvocations; // Number of Geometry Shader invocations
597 uint64_t GsPrimitives; // Number of prims GS outputs.
598 uint64_t CInvocations; // Number of clipper invocations
599 uint64_t CPrimitives; // Number of clipper primitives.
600
601 // Streamout Stats
602 uint64_t SoPrimStorageNeeded[4];
603 uint64_t SoNumPrimsWritten[4];
604 };
605
606 //////////////////////////////////////////////////////////////////////////
607 /// STREAMOUT_BUFFERS
608 /////////////////////////////////////////////////////////////////////////
609
610 #define MAX_SO_STREAMS 4
611 #define MAX_SO_BUFFERS 4
612 #define MAX_ATTRIBUTES 32
613
614 struct SWR_STREAMOUT_BUFFER
615 {
616 bool enable;
617 bool soWriteEnable;
618
619 // Pointers to streamout buffers.
620 uint32_t* pBuffer;
621
622 // Size of buffer in dwords.
623 uint32_t bufferSize;
624
625 // Vertex pitch of buffer in dwords.
626 uint32_t pitch;
627
628 // Offset into buffer in dwords. SOS will increment this offset.
629 uint32_t streamOffset;
630
631 // Offset to the SO write offset. If not null then we update offset here.
632 uint32_t* pWriteOffset;
633
634 };
635
636 //////////////////////////////////////////////////////////////////////////
637 /// STREAMOUT_STATE
638 /////////////////////////////////////////////////////////////////////////
639 struct SWR_STREAMOUT_STATE
640 {
641 // This disables stream output.
642 bool soEnable;
643
644 // which streams are enabled for streamout
645 bool streamEnable[MAX_SO_STREAMS];
646
647 // If set then do not send any streams to the rasterizer.
648 bool rasterizerDisable;
649
650 // Specifies which stream to send to the rasterizer.
651 uint32_t streamToRasterizer;
652
653 // The stream masks specify which attributes are sent to which streams.
654 // These masks help the FE to setup the pPrimData buffer that is passed
655 // the Stream Output Shader (SOS) function.
656 uint32_t streamMasks[MAX_SO_STREAMS];
657
658 // Number of attributes, including position, per vertex that are streamed out.
659 // This should match number of bits in stream mask.
660 uint32_t streamNumEntries[MAX_SO_STREAMS];
661 };
662
663 //////////////////////////////////////////////////////////////////////////
664 /// STREAMOUT_CONTEXT - Passed to SOS
665 /////////////////////////////////////////////////////////////////////////
666 struct SWR_STREAMOUT_CONTEXT
667 {
668 uint32_t* pPrimData;
669 SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS];
670
671 // Num prims written for this stream
672 uint32_t numPrimsWritten;
673
674 // Num prims that should have been written if there were no overflow.
675 uint32_t numPrimStorageNeeded;
676 };
677
678 //////////////////////////////////////////////////////////////////////////
679 /// SWR_GS_STATE - Geometry shader state
680 /////////////////////////////////////////////////////////////////////////
681 struct SWR_GS_STATE
682 {
683 bool gsEnable;
684
685 // number of input attributes per vertex. used by the frontend to
686 // optimize assembling primitives for GS
687 uint32_t numInputAttribs;
688
689 // output topology - can be point, tristrip, or linestrip
690 PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum
691
692 // maximum number of verts that can be emitted by a single instance of the GS
693 uint32_t maxNumVerts;
694
695 // instance count
696 uint32_t instanceCount;
697
698 // geometry shader emits renderTargetArrayIndex
699 bool emitsRenderTargetArrayIndex;
700
701 // geometry shader emits PrimitiveID
702 bool emitsPrimitiveID;
703
704 // geometry shader emits ViewportArrayIndex
705 bool emitsViewportArrayIndex;
706
707 // if true, geometry shader emits a single stream, with separate cut buffer.
708 // if false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
709 // to map vertices to streams
710 bool isSingleStream;
711
712 // when single stream is enabled, singleStreamID dictates which stream is being output.
713 // field ignored if isSingleStream is false
714 uint32_t singleStreamID;
715 };
716
717
718 //////////////////////////////////////////////////////////////////////////
719 /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
720 /////////////////////////////////////////////////////////////////////////
721 enum SWR_TS_OUTPUT_TOPOLOGY
722 {
723 SWR_TS_OUTPUT_POINT,
724 SWR_TS_OUTPUT_LINE,
725 SWR_TS_OUTPUT_TRI_CW,
726 SWR_TS_OUTPUT_TRI_CCW,
727
728 SWR_TS_OUTPUT_TOPOLOGY_COUNT
729 };
730
731 //////////////////////////////////////////////////////////////////////////
732 /// SWR_TS_PARTITIONING - Defines tessellation algorithm
733 /////////////////////////////////////////////////////////////////////////
734 enum SWR_TS_PARTITIONING
735 {
736 SWR_TS_INTEGER,
737 SWR_TS_ODD_FRACTIONAL,
738 SWR_TS_EVEN_FRACTIONAL,
739
740 SWR_TS_PARTITIONING_COUNT
741 };
742
743 //////////////////////////////////////////////////////////////////////////
744 /// SWR_TS_DOMAIN - Defines Tessellation Domain
745 /////////////////////////////////////////////////////////////////////////
746 enum SWR_TS_DOMAIN
747 {
748 SWR_TS_QUAD,
749 SWR_TS_TRI,
750 SWR_TS_ISOLINE,
751
752 SWR_TS_DOMAIN_COUNT
753 };
754
755 //////////////////////////////////////////////////////////////////////////
756 /// SWR_TS_STATE - Tessellation state
757 /////////////////////////////////////////////////////////////////////////
758 struct SWR_TS_STATE
759 {
760 bool tsEnable;
761 SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum
762 SWR_TS_PARTITIONING partitioning; // @llvm_enum
763 SWR_TS_DOMAIN domain; // @llvm_enum
764
765 PRIMITIVE_TOPOLOGY postDSTopology; // @llvm_enum
766
767 uint32_t numHsInputAttribs;
768 uint32_t numHsOutputAttribs;
769 uint32_t numDsOutputAttribs;
770 };
771
772 // output merger state
773 struct SWR_RENDER_TARGET_BLEND_STATE
774 {
775 uint8_t writeDisableRed : 1;
776 uint8_t writeDisableGreen : 1;
777 uint8_t writeDisableBlue : 1;
778 uint8_t writeDisableAlpha : 1;
779 };
780 static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1, "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
781
782 enum SWR_MULTISAMPLE_COUNT
783 {
784 SWR_MULTISAMPLE_1X = 0,
785 SWR_MULTISAMPLE_2X,
786 SWR_MULTISAMPLE_4X,
787 SWR_MULTISAMPLE_8X,
788 SWR_MULTISAMPLE_16X,
789 SWR_MULTISAMPLE_TYPE_COUNT
790 };
791
792 struct SWR_BLEND_STATE
793 {
794 // constant blend factor color in RGBA float
795 float constantColor[4];
796
797 // alpha test reference value in unorm8 or float32
798 uint32_t alphaTestReference;
799 uint32_t sampleMask;
800 // all RT's have the same sample count
801 ///@todo move this to Output Merger state when we refactor
802 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
803
804 SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
805 };
806 static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
807
808 //////////////////////////////////////////////////////////////////////////
809 /// FUNCTION POINTERS FOR SHADERS
810
811 typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
812 typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext);
813 typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext);
814 typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext);
815 typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, SWR_GS_CONTEXT* pGsContext);
816 typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsContext);
817 typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
818 typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
819 typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
820 typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*,
821 simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t sample,
822 uint8_t* pDst, simdvector& vResult, simdscalari* vOMask, simdscalari* vCoverageMask);
823 typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar);
824
825
826
827 //////////////////////////////////////////////////////////////////////////
828 /// FRONTEND_STATE
829 /////////////////////////////////////////////////////////////////////////
830 struct SWR_FRONTEND_STATE
831 {
832 // skip clip test, perspective divide, and viewport transform
833 // intended for verts in screen space
834 bool vpTransformDisable;
835 bool bEnableCutIndex;
836 union
837 {
838 struct
839 {
840 uint32_t triFan : 2;
841 uint32_t lineStripList : 1;
842 uint32_t triStripList : 2;
843 };
844 uint32_t bits;
845 } provokingVertex;
846 uint32_t topologyProvokingVertex; // provoking vertex for the draw topology
847 };
848
849 //////////////////////////////////////////////////////////////////////////
850 /// VIEWPORT_MATRIX
851 /////////////////////////////////////////////////////////////////////////
852 struct SWR_VIEWPORT_MATRIX
853 {
854 float m00;
855 float m11;
856 float m22;
857 float m30;
858 float m31;
859 float m32;
860 };
861
862 //////////////////////////////////////////////////////////////////////////
863 /// VIEWPORT_MATRIXES
864 /////////////////////////////////////////////////////////////////////////
865 struct SWR_VIEWPORT_MATRICES
866 {
867 float m00[KNOB_NUM_VIEWPORTS_SCISSORS];
868 float m11[KNOB_NUM_VIEWPORTS_SCISSORS];
869 float m22[KNOB_NUM_VIEWPORTS_SCISSORS];
870 float m30[KNOB_NUM_VIEWPORTS_SCISSORS];
871 float m31[KNOB_NUM_VIEWPORTS_SCISSORS];
872 float m32[KNOB_NUM_VIEWPORTS_SCISSORS];
873 };
874
875 //////////////////////////////////////////////////////////////////////////
876 /// SWR_VIEWPORT
877 /////////////////////////////////////////////////////////////////////////
878 struct SWR_VIEWPORT
879 {
880 float x;
881 float y;
882 float width;
883 float height;
884 float minZ;
885 float maxZ;
886 };
887
888 //////////////////////////////////////////////////////////////////////////
889 /// SWR_CULLMODE
890 //////////////////////////////////////////////////////////////////////////
891 enum SWR_CULLMODE
892 {
893 SWR_CULLMODE_BOTH,
894 SWR_CULLMODE_NONE,
895 SWR_CULLMODE_FRONT,
896 SWR_CULLMODE_BACK
897 };
898
899 enum SWR_FILLMODE
900 {
901 SWR_FILLMODE_POINT,
902 SWR_FILLMODE_WIREFRAME,
903 SWR_FILLMODE_SOLID
904 };
905
906 enum SWR_FRONTWINDING
907 {
908 SWR_FRONTWINDING_CW,
909 SWR_FRONTWINDING_CCW
910 };
911
912
913 enum SWR_PIXEL_LOCATION
914 {
915 SWR_PIXEL_LOCATION_CENTER,
916 SWR_PIXEL_LOCATION_UL,
917 };
918
919 // fixed point screen space sample locations within a pixel
920 struct SWR_MULTISAMPLE_POS
921 {
922 public:
923 INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func
924 INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func
925 INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func
926 INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func
927 INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func
928 INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func
929 INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func
930 INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func
931 typedef const float(&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef
932 INLINE sampleArrayT X() const { return _x; }; // @llvm_func
933 INLINE sampleArrayT Y() const { return _y; }; // @llvm_func
934 INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func
935 INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func
936 INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func
937 INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
938 INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
939 INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
940
941 INLINE void PrecalcSampleData(int numSamples) // @llvm_func_start
942 {
943 for(int i = 0; i < numSamples; i++)
944 {
945 _vXi[i] = _mm_set1_epi32(_xi[i]);
946 _vYi[i] = _mm_set1_epi32(_yi[i]);
947 _vX[i] = _simd_set1_ps(_x[i]);
948 _vY[i] = _simd_set1_ps(_y[i]);
949 }
950 // precalculate the raster tile BB for the rasterizer.
951 CalcTileSampleOffsets(numSamples);
952 } // @llvm_func_end
953
954
955 private:
956 INLINE void CalcTileSampleOffsets(int numSamples) // @llvm_func_start
957 {
958 auto expandThenBlend4 = [](uint32_t* min, uint32_t* max, auto mask)
959 {
960 __m128i vMin = _mm_set1_epi32(*min);
961 __m128i vMax = _mm_set1_epi32(*max);
962 return _simd_blend4_epi32<decltype(mask)::value>(vMin, vMax);
963 };
964
965 auto minXi = std::min_element(std::begin(_xi), &_xi[numSamples]);
966 auto maxXi = std::max_element(std::begin(_xi), &_xi[numSamples]);
967 std::integral_constant<int, 0xA> xMask;
968 // BR(max), BL(min), UR(max), UL(min)
969 tileSampleOffsetsX = expandThenBlend4(minXi, maxXi, xMask);
970
971 auto minYi = std::min_element(std::begin(_yi), &_yi[numSamples]);
972 auto maxYi = std::max_element(std::begin(_yi), &_yi[numSamples]);
973 std::integral_constant<int, 0xC> yMask;
974 // BR(max), BL(min), UR(max), UL(min)
975 tileSampleOffsetsY = expandThenBlend4(minYi, maxYi, yMask);
976 }; // @llvm_func_end
977 // scalar sample values
978 uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES];
979 uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES];
980 float _x[SWR_MAX_NUM_MULTISAMPLES];
981 float _y[SWR_MAX_NUM_MULTISAMPLES];
982
983 // precalc'd / vectorized samples
984 __m128i _vXi[SWR_MAX_NUM_MULTISAMPLES];
985 __m128i _vYi[SWR_MAX_NUM_MULTISAMPLES];
986 simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES];
987 simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES];
988 __m128i tileSampleOffsetsX;
989 __m128i tileSampleOffsetsY;
990
991 };
992
993 //////////////////////////////////////////////////////////////////////////
994 /// SWR_RASTSTATE
995 //////////////////////////////////////////////////////////////////////////
996 struct SWR_RASTSTATE
997 {
998 uint32_t cullMode : 2;
999 uint32_t fillMode : 2;
1000 uint32_t frontWinding : 1;
1001 uint32_t scissorEnable : 1;
1002 uint32_t depthClipEnable : 1;
1003 uint32_t clipHalfZ : 1;
1004 uint32_t pointParam : 1;
1005 uint32_t pointSpriteEnable : 1;
1006 uint32_t pointSpriteTopOrigin : 1;
1007 uint32_t forcedSampleCount : 1;
1008 uint32_t pixelOffset : 1;
1009 uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
1010 uint32_t conservativeRast : 1;
1011
1012 float pointSize;
1013 float lineWidth;
1014
1015 float depthBias;
1016 float slopeScaledDepthBias;
1017 float depthBiasClamp;
1018 SWR_FORMAT depthFormat; // @llvm_enum
1019
1020 // sample count the rasterizer is running at
1021 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
1022 uint32_t pixelLocation; // UL or Center
1023 SWR_MULTISAMPLE_POS samplePositions; // @llvm_struct
1024 bool bIsCenterPattern; // @llvm_enum
1025
1026 // user clip/cull distance enables
1027 uint8_t cullDistanceMask;
1028 uint8_t clipDistanceMask;
1029 };
1030
1031 enum SWR_CONSTANT_SOURCE
1032 {
1033 SWR_CONSTANT_SOURCE_CONST_0000,
1034 SWR_CONSTANT_SOURCE_CONST_0001_FLOAT,
1035 SWR_CONSTANT_SOURCE_CONST_1111_FLOAT,
1036 SWR_CONSTANT_SOURCE_PRIM_ID
1037 };
1038
1039 struct SWR_ATTRIB_SWIZZLE
1040 {
1041 uint16_t sourceAttrib : 5; // source attribute
1042 uint16_t constantSource : 2; // constant source to apply
1043 uint16_t componentOverrideMask : 4; // override component with constant source
1044 };
1045
1046 // backend state
1047 struct SWR_BACKEND_STATE
1048 {
1049 uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant interpolation
1050 uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be interpreted as tex coordinates
1051
1052 uint8_t numAttributes; // total number of attributes to send to backend (up to 32)
1053 uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some calculations for unneeded components
1054
1055 bool swizzleEnable; // when enabled, core will parse the swizzle map when
1056 // setting up attributes for the backend, otherwise
1057 // all attributes up to numAttributes will be sent
1058 SWR_ATTRIB_SWIZZLE swizzleMap[32];
1059 };
1060
1061
1062 union SWR_DEPTH_STENCIL_STATE
1063 {
1064 struct
1065 {
1066 // dword 0
1067 uint32_t depthWriteEnable : 1;
1068 uint32_t depthTestEnable : 1;
1069 uint32_t stencilWriteEnable : 1;
1070 uint32_t stencilTestEnable : 1;
1071 uint32_t doubleSidedStencilTestEnable : 1;
1072
1073 uint32_t depthTestFunc : 3;
1074 uint32_t stencilTestFunc : 3;
1075
1076 uint32_t backfaceStencilPassDepthPassOp : 3;
1077 uint32_t backfaceStencilPassDepthFailOp : 3;
1078 uint32_t backfaceStencilFailOp : 3;
1079 uint32_t backfaceStencilTestFunc : 3;
1080 uint32_t stencilPassDepthPassOp : 3;
1081 uint32_t stencilPassDepthFailOp : 3;
1082 uint32_t stencilFailOp : 3;
1083
1084 // dword 1
1085 uint8_t backfaceStencilWriteMask;
1086 uint8_t backfaceStencilTestMask;
1087 uint8_t stencilWriteMask;
1088 uint8_t stencilTestMask;
1089
1090 // dword 2
1091 uint8_t backfaceStencilRefValue;
1092 uint8_t stencilRefValue;
1093 };
1094 uint32_t value[3];
1095 };
1096
1097 enum SWR_SHADING_RATE
1098 {
1099 SWR_SHADING_RATE_PIXEL,
1100 SWR_SHADING_RATE_SAMPLE,
1101 SWR_SHADING_RATE_COUNT,
1102 };
1103
1104 enum SWR_INPUT_COVERAGE
1105 {
1106 SWR_INPUT_COVERAGE_NONE,
1107 SWR_INPUT_COVERAGE_NORMAL,
1108 SWR_INPUT_COVERAGE_INNER_CONSERVATIVE,
1109 SWR_INPUT_COVERAGE_COUNT,
1110 };
1111
1112 enum SWR_PS_POSITION_OFFSET
1113 {
1114 SWR_PS_POSITION_SAMPLE_NONE,
1115 SWR_PS_POSITION_SAMPLE_OFFSET,
1116 SWR_PS_POSITION_CENTROID_OFFSET,
1117 SWR_PS_POSITION_OFFSET_COUNT,
1118 };
1119
1120 enum SWR_BARYCENTRICS_MASK
1121 {
1122 SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1,
1123 SWR_BARYCENTRIC_CENTROID_MASK = 0x2,
1124 SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4,
1125 };
1126
1127 // pixel shader state
1128 struct SWR_PS_STATE
1129 {
1130 // dword 0-1
1131 PFN_PIXEL_KERNEL pfnPixelShader; // @llvm_pfn
1132
1133 // dword 2
1134 uint32_t killsPixel : 1; // pixel shader can kill pixels
1135 uint32_t inputCoverage : 2; // ps uses input coverage
1136 uint32_t writesODepth : 1; // pixel shader writes to depth
1137 uint32_t usesSourceDepth : 1; // pixel shader reads depth
1138 uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel
1139 uint32_t numRenderTargets : 4; // number of render target outputs in use (0-8)
1140 uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position
1141 uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate attributes with
1142 uint32_t usesUAV : 1; // pixel shader accesses UAV
1143 uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test
1144 };
1145
1146 // depth bounds state
1147 struct SWR_DEPTH_BOUNDS_STATE
1148 {
1149 bool depthBoundsTestEnable;
1150 float depthBoundsTestMinValue;
1151 float depthBoundsTestMaxValue;
1152 };
1153