1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Definitions for API state.
27 ******************************************************************************/
30 #include "common/formats.h"
31 #include "common/simdintrin.h"
34 #define SWR_CLEAR_NONE 0
35 #define SWR_CLEAR_COLOR (1 << 0)
36 #define SWR_CLEAR_DEPTH (1 << 1)
37 #define SWR_CLEAR_STENCIL (1 << 2)
45 //////////////////////////////////////////////////////////////////////////
46 /// PRIMITIVE_TOPOLOGY.
47 //////////////////////////////////////////////////////////////////////////
48 enum PRIMITIVE_TOPOLOGY
54 TOP_TRIANGLE_LIST
= 0x4,
55 TOP_TRIANGLE_STRIP
= 0x5,
56 TOP_TRIANGLE_FAN
= 0x6,
59 TOP_LINE_LIST_ADJ
= 0x9,
60 TOP_LISTSTRIP_ADJ
= 0xA,
61 TOP_TRI_LIST_ADJ
= 0xB,
62 TOP_TRI_STRIP_ADJ
= 0xC,
63 TOP_TRI_STRIP_REVERSE
= 0xD,
67 TOP_POINT_LIST_BF
= 0x11,
68 TOP_LINE_STRIP_CONT
= 0x12,
69 TOP_LINE_STRIP_BF
= 0x13,
70 TOP_LINE_STRIP_CONT_BF
= 0x14,
71 TOP_TRIANGLE_FAN_NOSTIPPLE
= 0x16,
72 TOP_TRIANGLE_DISC
= 0x17, /// @todo What is this??
74 TOP_PATCHLIST_BASE
= 0x1F, // Invalid topology, used to calculate num verts for a patchlist.
75 TOP_PATCHLIST_1
= 0x20, // List of 1-vertex patches
76 TOP_PATCHLIST_2
= 0x21,
77 TOP_PATCHLIST_3
= 0x22,
78 TOP_PATCHLIST_4
= 0x23,
79 TOP_PATCHLIST_5
= 0x24,
80 TOP_PATCHLIST_6
= 0x25,
81 TOP_PATCHLIST_7
= 0x26,
82 TOP_PATCHLIST_8
= 0x27,
83 TOP_PATCHLIST_9
= 0x28,
84 TOP_PATCHLIST_10
= 0x29,
85 TOP_PATCHLIST_11
= 0x2A,
86 TOP_PATCHLIST_12
= 0x2B,
87 TOP_PATCHLIST_13
= 0x2C,
88 TOP_PATCHLIST_14
= 0x2D,
89 TOP_PATCHLIST_15
= 0x2E,
90 TOP_PATCHLIST_16
= 0x2F,
91 TOP_PATCHLIST_17
= 0x30,
92 TOP_PATCHLIST_18
= 0x31,
93 TOP_PATCHLIST_19
= 0x32,
94 TOP_PATCHLIST_20
= 0x33,
95 TOP_PATCHLIST_21
= 0x34,
96 TOP_PATCHLIST_22
= 0x35,
97 TOP_PATCHLIST_23
= 0x36,
98 TOP_PATCHLIST_24
= 0x37,
99 TOP_PATCHLIST_25
= 0x38,
100 TOP_PATCHLIST_26
= 0x39,
101 TOP_PATCHLIST_27
= 0x3A,
102 TOP_PATCHLIST_28
= 0x3B,
103 TOP_PATCHLIST_29
= 0x3C,
104 TOP_PATCHLIST_30
= 0x3D,
105 TOP_PATCHLIST_31
= 0x3E,
106 TOP_PATCHLIST_32
= 0x3F, // List of 32-vertex patches
109 //////////////////////////////////////////////////////////////////////////
111 //////////////////////////////////////////////////////////////////////////
124 //////////////////////////////////////////////////////////////////////////
125 /// SWR_RENDERTARGET_ATTACHMENT
126 /// @todo Its not clear what an "attachment" means. Its not common term.
127 //////////////////////////////////////////////////////////////////////////
128 enum SWR_RENDERTARGET_ATTACHMENT
130 SWR_ATTACHMENT_COLOR0
,
131 SWR_ATTACHMENT_COLOR1
,
132 SWR_ATTACHMENT_COLOR2
,
133 SWR_ATTACHMENT_COLOR3
,
134 SWR_ATTACHMENT_COLOR4
,
135 SWR_ATTACHMENT_COLOR5
,
136 SWR_ATTACHMENT_COLOR6
,
137 SWR_ATTACHMENT_COLOR7
,
138 SWR_ATTACHMENT_DEPTH
,
139 SWR_ATTACHMENT_STENCIL
,
144 #define SWR_NUM_RENDERTARGETS 8
146 #define SWR_ATTACHMENT_COLOR0_BIT 0x001
147 #define SWR_ATTACHMENT_COLOR1_BIT 0x002
148 #define SWR_ATTACHMENT_COLOR2_BIT 0x004
149 #define SWR_ATTACHMENT_COLOR3_BIT 0x008
150 #define SWR_ATTACHMENT_COLOR4_BIT 0x010
151 #define SWR_ATTACHMENT_COLOR5_BIT 0x020
152 #define SWR_ATTACHMENT_COLOR6_BIT 0x040
153 #define SWR_ATTACHMENT_COLOR7_BIT 0x080
154 #define SWR_ATTACHMENT_DEPTH_BIT 0x100
155 #define SWR_ATTACHMENT_STENCIL_BIT 0x200
156 #define SWR_ATTACHMENT_MASK_ALL 0x3ff
157 #define SWR_ATTACHMENT_MASK_COLOR 0x0ff
160 //////////////////////////////////////////////////////////////////////////
161 /// @brief SWR Inner Tessellation factor ID
162 /// See above GetTessFactorOutputPosition code for documentation
163 enum SWR_INNER_TESSFACTOR_ID
165 SWR_QUAD_U_TRI_INSIDE
,
168 SWR_NUM_INNER_TESS_FACTORS
,
171 //////////////////////////////////////////////////////////////////////////
172 /// @brief SWR Outer Tessellation factor ID
173 /// See above GetTessFactorOutputPosition code for documentation
174 enum SWR_OUTER_TESSFACTOR_ID
176 SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL
,
177 SWR_QUAD_V_EQ0_TRI_V_LINE_DENSITY
,
178 SWR_QUAD_U_EQ1_TRI_W
,
181 SWR_NUM_OUTER_TESS_FACTORS
,
185 /////////////////////////////////////////////////////////////////////////
187 /// @brief Defines a vertex element that holds all the data for SIMD vertices.
188 /// Contains position in clip space, hardcoded to attribute 0,
189 /// space for up to 32 attributes, as well as any SGV values generated
191 /////////////////////////////////////////////////////////////////////////
192 #define VERTEX_POSITION_SLOT 0
193 #define VERTEX_ATTRIB_START_SLOT 1
194 #define VERTEX_ATTRIB_END_SLOT 32
195 #define VERTEX_RTAI_SLOT 33 // GS writes RenderTargetArrayIndex here
196 #define VERTEX_PRIMID_SLOT 34 // GS writes PrimId here
197 #define VERTEX_CLIPCULL_DIST_LO_SLOT 35 // VS writes lower 4 clip/cull dist
198 #define VERTEX_CLIPCULL_DIST_HI_SLOT 36 // VS writes upper 4 clip/cull dist
199 #define VERTEX_POINT_SIZE_SLOT 37 // VS writes point size here
200 static_assert(VERTEX_POINT_SIZE_SLOT
< KNOB_NUM_ATTRIBUTES
, "Mismatched attribute slot size");
205 simdvector attrib
[KNOB_NUM_ATTRIBUTES
];
208 //////////////////////////////////////////////////////////////////////////
210 /// @brief Input to vertex shader
211 /////////////////////////////////////////////////////////////////////////
212 struct SWR_VS_CONTEXT
214 simdvertex
* pVin
; // IN: SIMD input vertex data store
215 simdvertex
* pVout
; // OUT: SIMD output vertex data store
217 uint32_t InstanceID
; // IN: Instance ID, constant across all verts of the SIMD
218 simdscalari VertexID
; // IN: Vertex ID
219 simdscalari mask
; // IN: Active mask for shader
222 /////////////////////////////////////////////////////////////////////////
224 /// @brief defines a control point element as passed from the output
225 /// of the hull shader to the input of the domain shader
226 /////////////////////////////////////////////////////////////////////////
237 ScalarAttrib attrib
[KNOB_NUM_ATTRIBUTES
];
240 //////////////////////////////////////////////////////////////////////////
241 /// SWR_TESSELLATION_FACTORS
242 /// @brief Tessellation factors structure (non-vector)
243 /////////////////////////////////////////////////////////////////////////
244 struct SWR_TESSELLATION_FACTORS
246 float OuterTessFactors
[SWR_NUM_OUTER_TESS_FACTORS
];
247 float InnerTessFactors
[SWR_NUM_INNER_TESS_FACTORS
];
250 #define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
253 SWR_TESSELLATION_FACTORS tessFactors
;
254 ScalarCPoint cp
[MAX_NUM_VERTS_PER_PRIM
];
255 ScalarCPoint patchData
;
258 //////////////////////////////////////////////////////////////////////////
260 /// @brief Input to hull shader
261 /////////////////////////////////////////////////////////////////////////
262 struct SWR_HS_CONTEXT
264 simdvertex vert
[MAX_NUM_VERTS_PER_PRIM
]; // IN: (SIMD) input primitive data
265 simdscalari PrimitiveID
; // IN: (SIMD) primitive ID generated from the draw call
266 simdscalari mask
; // IN: Active mask for shader
267 ScalarPatch
* pCPout
; // OUT: Output control point patch
268 // SIMD-sized-array of SCALAR patches
271 //////////////////////////////////////////////////////////////////////////
273 /// @brief Input to domain shader
274 /////////////////////////////////////////////////////////////////////////
275 struct SWR_DS_CONTEXT
277 uint32_t PrimitiveID
; // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation
278 uint32_t vectorOffset
; // IN: (SCALAR) vector index offset into SIMD data.
279 uint32_t vectorStride
; // IN: (SCALAR) stride (in vectors) of output data per attribute-component
280 ScalarPatch
* pCpIn
; // IN: (SCALAR) Control patch
281 simdscalar
* pDomainU
; // IN: (SIMD) Domain Point U coords
282 simdscalar
* pDomainV
; // IN: (SIMD) Domain Point V coords
283 simdscalari mask
; // IN: Active mask for shader
284 simdscalar
* pOutputData
; // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component)
287 //////////////////////////////////////////////////////////////////////////
289 /// @brief Input to geometry shader.
290 /////////////////////////////////////////////////////////////////////////
291 struct SWR_GS_CONTEXT
293 simdvertex vert
[MAX_NUM_VERTS_PER_PRIM
]; // IN: input primitive data for SIMD prims
294 simdscalari PrimitiveID
; // IN: input primitive ID generated from the draw call
295 uint32_t InstanceID
; // IN: input instance ID
296 simdscalari mask
; // IN: Active mask for shader
297 uint8_t* pStream
; // OUT: output stream (contains vertices for all output streams)
298 uint8_t* pCutOrStreamIdBuffer
; // OUT: cut or stream id buffer
299 simdscalari vertexCount
; // OUT: num vertices emitted per SIMD lane
302 struct PixelPositions
310 #define SWR_MAX_NUM_MULTISAMPLES 16
312 //////////////////////////////////////////////////////////////////////////
314 /// @brief Input to pixel shader.
315 /////////////////////////////////////////////////////////////////////////
316 struct SWR_PS_CONTEXT
318 PixelPositions vX
; // IN: x location(s) of pixels
319 PixelPositions vY
; // IN: x location(s) of pixels
320 simdscalar vZ
; // INOUT: z location of pixels
321 simdscalari activeMask
; // OUT: mask for kill
322 simdscalar inputMask
; // IN: input coverage mask for all samples
323 simdscalari oMask
; // OUT: mask for output coverage
325 PixelPositions vI
; // barycentric coords evaluated at pixel center, sample position, centroid
327 PixelPositions vOneOverW
; // IN: 1/w
329 const float* pAttribs
; // IN: pointer to attribute barycentric coefficients
330 const float* pPerspAttribs
; // IN: pointer to attribute/w barycentric coefficients
331 const float* pRecipW
; // IN: pointer to 1/w coord for each vertex
332 const float *I
; // IN: Barycentric A, B, and C coefs used to compute I
333 const float *J
; // IN: Barycentric A, B, and C coefs used to compute J
334 float recipDet
; // IN: 1/Det, used when barycentric interpolating attributes
335 const float* pSamplePosX
; // IN: array of sample positions
336 const float* pSamplePosY
; // IN: array of sample positions
337 simdvector shaded
[SWR_NUM_RENDERTARGETS
];
338 // OUT: result color per rendertarget
340 uint32_t frontFace
; // IN: front- 1, back- 0
341 uint32_t primID
; // IN: primitive ID
342 uint32_t sampleIndex
; // IN: sampleIndex
346 //////////////////////////////////////////////////////////////////////////
348 /// @brief Input to compute shader.
349 /////////////////////////////////////////////////////////////////////////
350 struct SWR_CS_CONTEXT
352 // The ThreadGroupId is the current thread group index relative
353 // to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup,
354 // and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader.
356 // Compute shader accepts the following system values.
357 // o ThreadId - Current thread id relative to all other threads in dispatch.
358 // o ThreadGroupId - Current thread group id relative to all other groups in dispatch.
359 // o ThreadIdInGroup - Current thread relative to all threads in the current thread group.
360 // o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup.
362 // All of these system values can be computed in the shader. They will be
363 // derived from the current tile counter. The tile counter is an atomic counter that
364 // resides in the draw context and is initialized to the product of the dispatch dims.
366 // tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z
368 // Each CPU worker thread will atomically decrement this counter and passes the current
369 // count into the shader. When the count reaches 0 then all thread groups in the
370 // dispatch call have been completed.
372 uint32_t tileCounter
; // The tile counter value for this thread group.
374 // Dispatch dimensions used by shader to compute system values from the tile counter.
375 uint32_t dispatchDims
[3];
377 uint8_t* pTGSM
; // Thread Group Shared Memory pointer.
379 uint8_t* pSpillFillBuffer
; // Spill/fill buffer for barrier support
385 SWR_TILE_NONE
= 0x0, // Linear mode (no tiling)
386 SWR_TILE_MODE_WMAJOR
, // W major tiling
387 SWR_TILE_MODE_XMAJOR
, // X major tiling
388 SWR_TILE_MODE_YMAJOR
, // Y major tiling
389 SWR_TILE_SWRZ
, // SWR-Z tiling
394 enum SWR_SURFACE_TYPE
401 SURFACE_STRUCTURED_BUFFER
= 5,
430 enum SWR_BLEND_FACTOR
433 BLENDFACTOR_SRC_COLOR
,
434 BLENDFACTOR_SRC_ALPHA
,
435 BLENDFACTOR_DST_ALPHA
,
436 BLENDFACTOR_DST_COLOR
,
437 BLENDFACTOR_SRC_ALPHA_SATURATE
,
438 BLENDFACTOR_CONST_COLOR
,
439 BLENDFACTOR_CONST_ALPHA
,
440 BLENDFACTOR_SRC1_COLOR
,
441 BLENDFACTOR_SRC1_ALPHA
,
443 BLENDFACTOR_INV_SRC_COLOR
,
444 BLENDFACTOR_INV_SRC_ALPHA
,
445 BLENDFACTOR_INV_DST_ALPHA
,
446 BLENDFACTOR_INV_DST_COLOR
,
447 BLENDFACTOR_INV_CONST_COLOR
,
448 BLENDFACTOR_INV_CONST_ALPHA
,
449 BLENDFACTOR_INV_SRC1_COLOR
,
450 BLENDFACTOR_INV_SRC1_ALPHA
466 LOGICOP_AND_INVERTED
,
467 LOGICOP_COPY_INVERTED
,
482 struct SWR_SURFACE_STATE
484 uint8_t *pBaseAddress
;
485 SWR_SURFACE_TYPE type
; // @llvm_enum
486 SWR_FORMAT format
; // @llvm_enum
491 uint32_t samplePattern
;
494 uint32_t minLod
; // for sampled surfaces, the most detailed LOD that can be accessed by sampler
495 uint32_t maxLod
; // for sampled surfaces, the max LOD that can be accessed
496 float resourceMinLod
; // for sampled surfaces, the most detailed fractional mip that can be accessed by sampler
497 uint32_t lod
; // for render targets, the lod being rendered to
498 uint32_t arrayIndex
; // for render targets, the array index being rendered to for arrayed surfaces
499 SWR_TILE_MODE tileMode
; // @llvm_enum
500 bool bInterleavedSamples
; // are MSAA samples stored interleaved or planar
506 uint32_t lodOffsets
[2][15]; // lod offsets for sampled surfaces
508 uint8_t *pAuxBaseAddress
; // Used for compression, append/consume counter, etc.
511 // vertex fetch state
512 // WARNING- any changes to this struct need to be reflected
513 // in the fetch shader jit
514 struct SWR_VERTEX_BUFFER_STATE
518 const uint8_t *pData
;
521 uint32_t maxVertex
; // size / pitch. precalculated value used by fetch shader for OOB checks
522 uint32_t partialInboundsSize
; // size % pitch. precalculated value used by fetch shader for partially OOB vertices
525 struct SWR_INDEX_BUFFER_STATE
527 // Format type for indices (e.g. UINT16, UINT32, etc.)
528 SWR_FORMAT format
; // @llvm_enum
529 const void *pIndices
;
534 //////////////////////////////////////////////////////////////////////////
535 /// SWR_FETCH_CONTEXT
536 /// @brief Input to fetch shader.
537 /// @note WARNING - Changes to this struct need to be reflected in the
538 /// fetch shader jit.
539 /////////////////////////////////////////////////////////////////////////
540 struct SWR_FETCH_CONTEXT
542 const SWR_VERTEX_BUFFER_STATE
* pStreams
; // IN: array of bound vertex buffers
543 const int32_t* pIndices
; // IN: pointer to index buffer for indexed draws
544 const int32_t* pLastIndex
; // IN: pointer to end of index buffer, used for bounds checking
545 uint32_t CurInstance
; // IN: current instance
546 uint32_t BaseVertex
; // IN: base vertex
547 uint32_t StartVertex
; // IN: start vertex
548 uint32_t StartInstance
; // IN: start instance
549 simdscalari VertexID
; // OUT: vector of vertex IDs
550 simdscalari CutMask
; // OUT: vector mask of indices which have the cut index value
553 //////////////////////////////////////////////////////////////////////////
556 /// @brief All statistics generated by SWR go here. These are public
558 /////////////////////////////////////////////////////////////////////////
562 uint64_t DepthPassCount
; // Number of passing depth tests. Not exact.
565 uint64_t IaVertices
; // Number of Fetch Shader vertices
566 uint64_t IaPrimitives
; // Number of PA primitives.
567 uint64_t VsInvocations
; // Number of Vertex Shader invocations
568 uint64_t HsInvocations
; // Number of Hull Shader invocations
569 uint64_t DsInvocations
; // Number of Domain Shader invocations
570 uint64_t GsInvocations
; // Number of Geometry Shader invocations
571 uint64_t PsInvocations
; // Number of Pixel Shader invocations
572 uint64_t CsInvocations
; // Number of Compute Shader invocations
573 uint64_t CInvocations
; // Number of clipper invocations
574 uint64_t CPrimitives
; // Number of clipper primitives.
575 uint64_t GsPrimitives
; // Number of prims GS outputs.
578 uint32_t SoWriteOffset
[4];
579 uint64_t SoPrimStorageNeeded
[4];
580 uint64_t SoNumPrimsWritten
[4];
583 //////////////////////////////////////////////////////////////////////////
584 /// STREAMOUT_BUFFERS
585 /////////////////////////////////////////////////////////////////////////
587 #define MAX_SO_STREAMS 4
588 #define MAX_ATTRIBUTES 32
590 struct SWR_STREAMOUT_BUFFER
594 // Pointers to streamout buffers.
597 // Size of buffer in dwords.
600 // Vertex pitch of buffer in dwords.
603 // Offset into buffer in dwords. SOS will increment this offset.
604 uint32_t streamOffset
;
606 // Offset to the SO write offset. If not null then we update offset here.
607 uint32_t* pWriteOffset
;
611 //////////////////////////////////////////////////////////////////////////
613 /////////////////////////////////////////////////////////////////////////
614 struct SWR_STREAMOUT_STATE
616 // This disables stream output.
619 // which streams are enabled for streamout
620 bool streamEnable
[MAX_SO_STREAMS
];
622 // If set then do not send any streams to the rasterizer.
623 bool rasterizerDisable
;
625 // Specifies which stream to send to the rasterizer.
626 uint32_t streamToRasterizer
;
628 // The stream masks specify which attributes are sent to which streams.
629 // These masks help the FE to setup the pPrimData buffer that is passed
630 // the the Stream Output Shader (SOS) function.
631 uint32_t streamMasks
[MAX_SO_STREAMS
];
633 // Number of attributes, including position, per vertex that are streamed out.
634 // This should match number of bits in stream mask.
635 uint32_t streamNumEntries
[MAX_SO_STREAMS
];
638 //////////////////////////////////////////////////////////////////////////
639 /// STREAMOUT_CONTEXT - Passed to SOS
640 /////////////////////////////////////////////////////////////////////////
641 struct SWR_STREAMOUT_CONTEXT
644 SWR_STREAMOUT_BUFFER
* pBuffer
[MAX_SO_STREAMS
];
646 // Num prims written for this stream
647 uint32_t numPrimsWritten
;
649 // Num prims that should have been written if there were no overflow.
650 uint32_t numPrimStorageNeeded
;
653 //////////////////////////////////////////////////////////////////////////
654 /// SWR_GS_STATE - Geometry shader state
655 /////////////////////////////////////////////////////////////////////////
660 // number of input attributes per vertex. used by the frontend to
661 // optimize assembling primitives for GS
662 uint32_t numInputAttribs
;
664 // output topology - can be point, tristrip, or linestrip
665 PRIMITIVE_TOPOLOGY outputTopology
; // @llvm_enum
667 // maximum number of verts that can be emitted by a single instance of the GS
668 uint32_t maxNumVerts
;
671 uint32_t instanceCount
;
673 // geometry shader emits renderTargetArrayIndex
674 bool emitsRenderTargetArrayIndex
;
676 // geometry shader emits PrimitiveID
677 bool emitsPrimitiveID
;
679 // if true, geometry shader emits a single stream, with separate cut buffer.
680 // if false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
681 // to map vertices to streams
684 // when single stream is enabled, singleStreamID dictates which stream is being output.
685 // field ignored if isSingleStream is false
686 uint32_t singleStreamID
;
690 //////////////////////////////////////////////////////////////////////////
691 /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
692 /////////////////////////////////////////////////////////////////////////
693 enum SWR_TS_OUTPUT_TOPOLOGY
697 SWR_TS_OUTPUT_TRI_CW
,
698 SWR_TS_OUTPUT_TRI_CCW
,
700 SWR_TS_OUTPUT_TOPOLOGY_COUNT
703 //////////////////////////////////////////////////////////////////////////
704 /// SWR_TS_PARTITIONING - Defines tessellation algorithm
705 /////////////////////////////////////////////////////////////////////////
706 enum SWR_TS_PARTITIONING
709 SWR_TS_ODD_FRACTIONAL
,
710 SWR_TS_EVEN_FRACTIONAL
,
712 SWR_TS_PARTITIONING_COUNT
715 //////////////////////////////////////////////////////////////////////////
716 /// SWR_TS_DOMAIN - Defines Tessellation Domain
717 /////////////////////////////////////////////////////////////////////////
727 //////////////////////////////////////////////////////////////////////////
728 /// SWR_TS_STATE - Tessellation state
729 /////////////////////////////////////////////////////////////////////////
733 SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology
; // @llvm_enum
734 SWR_TS_PARTITIONING partitioning
; // @llvm_enum
735 SWR_TS_DOMAIN domain
; // @llvm_enum
737 PRIMITIVE_TOPOLOGY postDSTopology
; // @llvm_enum
739 uint32_t numHsInputAttribs
;
740 uint32_t numHsOutputAttribs
;
741 uint32_t numDsOutputAttribs
;
744 // output merger state
745 struct SWR_RENDER_TARGET_BLEND_STATE
747 uint8_t writeDisableRed
: 1;
748 uint8_t writeDisableGreen
: 1;
749 uint8_t writeDisableBlue
: 1;
750 uint8_t writeDisableAlpha
: 1;
752 static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE
) == 1, "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
754 enum SWR_MULTISAMPLE_COUNT
756 SWR_MULTISAMPLE_1X
= 0,
761 SWR_MULTISAMPLE_TYPE_MAX
764 struct SWR_BLEND_STATE
766 // constant blend factor color in RGBA float
767 float constantColor
[4];
769 // alpha test reference value in unorm8 or float32
770 uint32_t alphaTestReference
;
772 // all RT's have the same sample count
773 ///@todo move this to Output Merger state when we refactor
774 SWR_MULTISAMPLE_COUNT sampleCount
; // @llvm_enum
776 SWR_RENDER_TARGET_BLEND_STATE renderTarget
[SWR_NUM_RENDERTARGETS
];
778 static_assert(sizeof(SWR_BLEND_STATE
) == 36, "Invalid SWR_BLEND_STATE size");
780 //////////////////////////////////////////////////////////////////////////
781 /// FUNCTION POINTERS FOR SHADERS
783 typedef void(__cdecl
*PFN_FETCH_FUNC
)(SWR_FETCH_CONTEXT
& fetchInfo
, simdvertex
& out
);
784 typedef void(__cdecl
*PFN_VERTEX_FUNC
)(HANDLE hPrivateData
, SWR_VS_CONTEXT
* pVsContext
);
785 typedef void(__cdecl
*PFN_HS_FUNC
)(HANDLE hPrivateData
, SWR_HS_CONTEXT
* pHsContext
);
786 typedef void(__cdecl
*PFN_DS_FUNC
)(HANDLE hPrivateData
, SWR_DS_CONTEXT
* pDsContext
);
787 typedef void(__cdecl
*PFN_GS_FUNC
)(HANDLE hPrivateData
, SWR_GS_CONTEXT
* pGsContext
);
788 typedef void(__cdecl
*PFN_CS_FUNC
)(HANDLE hPrivateData
, SWR_CS_CONTEXT
* pCsContext
);
789 typedef void(__cdecl
*PFN_SO_FUNC
)(SWR_STREAMOUT_CONTEXT
& soContext
);
790 typedef void(__cdecl
*PFN_PIXEL_KERNEL
)(HANDLE hPrivateData
, SWR_PS_CONTEXT
*pContext
);
791 typedef void(__cdecl
*PFN_CPIXEL_KERNEL
)(HANDLE hPrivateData
, SWR_PS_CONTEXT
*pContext
);
792 typedef void(__cdecl
*PFN_BLEND_JIT_FUNC
)(const SWR_BLEND_STATE
*, simdvector
&, simdvector
&, uint32_t, uint8_t*, simdvector
&, simdscalari
*, simdscalari
*);
794 //////////////////////////////////////////////////////////////////////////
796 /////////////////////////////////////////////////////////////////////////
797 struct SWR_FRONTEND_STATE
799 // skip clip test, perspective divide, and viewport transform
800 // intended for verts in screen space
801 bool vpTransformDisable
;
807 uint32_t lineStripList
: 1;
808 uint32_t triStripList
: 2;
812 uint32_t topologyProvokingVertex
; // provoking vertex for the draw topology
815 //////////////////////////////////////////////////////////////////////////
817 /////////////////////////////////////////////////////////////////////////
818 struct SWR_VIEWPORT_MATRIX
828 //////////////////////////////////////////////////////////////////////////
830 /////////////////////////////////////////////////////////////////////////
841 //////////////////////////////////////////////////////////////////////////
843 //////////////////////////////////////////////////////////////////////////
855 SWR_FILLMODE_WIREFRAME
,
859 enum SWR_FRONTWINDING
866 enum SWR_MSAA_SAMPLE_PATTERN
868 SWR_MSAA_CENTER_PATTERN
,
869 SWR_MSAA_STANDARD_PATTERN
,
870 SWR_MSAA_SAMPLE_PATTERN_MAX
873 enum SWR_PIXEL_LOCATION
875 SWR_PIXEL_LOCATION_CENTER
,
876 SWR_PIXEL_LOCATION_UL
,
879 // fixed point screen space sample locations within a pixel
880 struct SWR_MULTISAMPLE_POS
886 enum SWR_MSAA_RASTMODE
888 SWR_MSAA_RASTMODE_OFF_PIXEL
,
889 SWR_MSAA_RASTMODE_OFF_PATTERN
,
890 SWR_MSAA_RASTMODE_ON_PIXEL
,
891 SWR_MSAA_RASTMODE_ON_PATTERN
894 //////////////////////////////////////////////////////////////////////////
896 //////////////////////////////////////////////////////////////////////////
899 uint32_t cullMode
: 2;
900 uint32_t fillMode
: 2;
901 uint32_t frontWinding
: 1;
902 uint32_t scissorEnable
: 1;
903 uint32_t depthClipEnable
: 1;
907 // point size output from the VS
911 bool pointSpriteEnable
;
912 bool pointSpriteTopOrigin
;
916 float slopeScaledDepthBias
;
917 float depthBiasClamp
;
918 SWR_FORMAT depthFormat
; // @llvm_enum
921 // multisample state for MSAA lines
923 SWR_MSAA_RASTMODE rastMode
; // @llvm_enum
925 // sample count the rasterizer is running at
926 SWR_MULTISAMPLE_COUNT sampleCount
; // @llvm_enum
927 bool bForcedSampleCount
;
928 uint32_t pixelLocation
; // UL or Center
929 bool pixelOffset
; // offset pixel positions by .5 in both the horizontal and vertical direction
930 SWR_MULTISAMPLE_POS iSamplePos
[SWR_MAX_NUM_MULTISAMPLES
];
931 SWR_MSAA_SAMPLE_PATTERN samplePattern
; // @llvm_enum
933 // user clip/cull distance enables
934 uint8_t cullDistanceMask
;
935 uint8_t clipDistanceMask
;
939 struct SWR_BACKEND_STATE
941 uint32_t constantInterpolationMask
;
942 uint32_t pointSpriteTexCoordMask
;
943 uint8_t numAttributes
;
944 uint8_t numComponents
[KNOB_NUM_ATTRIBUTES
];
948 union SWR_DEPTH_STENCIL_STATE
953 uint32_t depthWriteEnable
: 1;
954 uint32_t depthTestEnable
: 1;
955 uint32_t stencilWriteEnable
: 1;
956 uint32_t stencilTestEnable
: 1;
957 uint32_t doubleSidedStencilTestEnable
: 1;
959 uint32_t depthTestFunc
: 3;
960 uint32_t stencilTestFunc
: 3;
962 uint32_t backfaceStencilPassDepthPassOp
: 3;
963 uint32_t backfaceStencilPassDepthFailOp
: 3;
964 uint32_t backfaceStencilFailOp
: 3;
965 uint32_t backfaceStencilTestFunc
: 3;
966 uint32_t stencilPassDepthPassOp
: 3;
967 uint32_t stencilPassDepthFailOp
: 3;
968 uint32_t stencilFailOp
: 3;
971 uint8_t backfaceStencilWriteMask
;
972 uint8_t backfaceStencilTestMask
;
973 uint8_t stencilWriteMask
;
974 uint8_t stencilTestMask
;
977 uint8_t backfaceStencilRefValue
;
978 uint8_t stencilRefValue
;
983 enum SWR_SHADING_RATE
985 SWR_SHADING_RATE_PIXEL
,
986 SWR_SHADING_RATE_SAMPLE
,
987 SWR_SHADING_RATE_MAX
,
990 enum SWR_INPUT_COVERAGE
992 SWR_INPUT_COVERAGE_NONE
,
993 SWR_INPUT_COVERAGE_NORMAL
,
994 SWR_INPUT_COVERAGE_MAX
,
997 enum SWR_PS_POSITION_OFFSET
999 SWR_PS_POSITION_SAMPLE_NONE
,
1000 SWR_PS_POSITION_SAMPLE_OFFSET
,
1001 SWR_PS_POSITION_CENTROID_OFFSET
,
1002 SWR_PS_POSITION_OFFSET_MAX
,
1005 enum SWR_BARYCENTRICS_MASK
1007 SWR_BARYCENTRIC_PER_PIXEL_MASK
= 0x1,
1008 SWR_BARYCENTRIC_CENTROID_MASK
= 0x2,
1009 SWR_BARYCENTRIC_PER_SAMPLE_MASK
= 0x4,
1010 SWR_BARYCENTRICS_MASK_MAX
= 0x8
1013 // pixel shader state
1017 PFN_PIXEL_KERNEL pfnPixelShader
; // @llvm_pfn
1020 uint32_t killsPixel
: 1; // pixel shader can kill pixels
1021 uint32_t inputCoverage
: 1; // type of input coverage PS uses
1022 uint32_t writesODepth
: 1; // pixel shader writes to depth
1023 uint32_t usesSourceDepth
: 1; // pixel shader reads depth
1024 uint32_t shadingRate
: 2; // shading per pixel / sample / coarse pixel
1025 uint32_t numRenderTargets
: 4; // number of render target outputs in use (0-8)
1026 uint32_t posOffset
: 2; // type of offset (none, sample, centroid) to add to pixel position
1027 uint32_t barycentricsMask
: 3; // which type(s) of barycentric coords does the PS interpolate attributes with
1028 uint32_t usesUAV
: 1; // pixel shader accesses UAV
1029 uint32_t forceEarlyZ
: 1; // force execution of early depth/stencil test