1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Definitions for API state.
27 ******************************************************************************/
30 #include "common/formats.h"
31 #include "common/simdintrin.h"
35 //////////////////////////////////////////////////////////////////////////
36 /// PRIMITIVE_TOPOLOGY.
37 //////////////////////////////////////////////////////////////////////////
38 enum PRIMITIVE_TOPOLOGY
44 TOP_TRIANGLE_LIST
= 0x4,
45 TOP_TRIANGLE_STRIP
= 0x5,
46 TOP_TRIANGLE_FAN
= 0x6,
49 TOP_LINE_LIST_ADJ
= 0x9,
50 TOP_LISTSTRIP_ADJ
= 0xA,
51 TOP_TRI_LIST_ADJ
= 0xB,
52 TOP_TRI_STRIP_ADJ
= 0xC,
53 TOP_TRI_STRIP_REVERSE
= 0xD,
57 TOP_POINT_LIST_BF
= 0x11,
58 TOP_LINE_STRIP_CONT
= 0x12,
59 TOP_LINE_STRIP_BF
= 0x13,
60 TOP_LINE_STRIP_CONT_BF
= 0x14,
61 TOP_TRIANGLE_FAN_NOSTIPPLE
= 0x16,
62 TOP_TRIANGLE_DISC
= 0x17, /// @todo What is this??
64 TOP_PATCHLIST_BASE
= 0x1F, // Invalid topology, used to calculate num verts for a patchlist.
65 TOP_PATCHLIST_1
= 0x20, // List of 1-vertex patches
66 TOP_PATCHLIST_2
= 0x21,
67 TOP_PATCHLIST_3
= 0x22,
68 TOP_PATCHLIST_4
= 0x23,
69 TOP_PATCHLIST_5
= 0x24,
70 TOP_PATCHLIST_6
= 0x25,
71 TOP_PATCHLIST_7
= 0x26,
72 TOP_PATCHLIST_8
= 0x27,
73 TOP_PATCHLIST_9
= 0x28,
74 TOP_PATCHLIST_10
= 0x29,
75 TOP_PATCHLIST_11
= 0x2A,
76 TOP_PATCHLIST_12
= 0x2B,
77 TOP_PATCHLIST_13
= 0x2C,
78 TOP_PATCHLIST_14
= 0x2D,
79 TOP_PATCHLIST_15
= 0x2E,
80 TOP_PATCHLIST_16
= 0x2F,
81 TOP_PATCHLIST_17
= 0x30,
82 TOP_PATCHLIST_18
= 0x31,
83 TOP_PATCHLIST_19
= 0x32,
84 TOP_PATCHLIST_20
= 0x33,
85 TOP_PATCHLIST_21
= 0x34,
86 TOP_PATCHLIST_22
= 0x35,
87 TOP_PATCHLIST_23
= 0x36,
88 TOP_PATCHLIST_24
= 0x37,
89 TOP_PATCHLIST_25
= 0x38,
90 TOP_PATCHLIST_26
= 0x39,
91 TOP_PATCHLIST_27
= 0x3A,
92 TOP_PATCHLIST_28
= 0x3B,
93 TOP_PATCHLIST_29
= 0x3C,
94 TOP_PATCHLIST_30
= 0x3D,
95 TOP_PATCHLIST_31
= 0x3E,
96 TOP_PATCHLIST_32
= 0x3F, // List of 32-vertex patches
99 //////////////////////////////////////////////////////////////////////////
101 //////////////////////////////////////////////////////////////////////////
114 //////////////////////////////////////////////////////////////////////////
115 /// SWR_RENDERTARGET_ATTACHMENT
116 /// @todo Its not clear what an "attachment" means. Its not common term.
117 //////////////////////////////////////////////////////////////////////////
118 enum SWR_RENDERTARGET_ATTACHMENT
120 SWR_ATTACHMENT_COLOR0
,
121 SWR_ATTACHMENT_COLOR1
,
122 SWR_ATTACHMENT_COLOR2
,
123 SWR_ATTACHMENT_COLOR3
,
124 SWR_ATTACHMENT_COLOR4
,
125 SWR_ATTACHMENT_COLOR5
,
126 SWR_ATTACHMENT_COLOR6
,
127 SWR_ATTACHMENT_COLOR7
,
128 SWR_ATTACHMENT_DEPTH
,
129 SWR_ATTACHMENT_STENCIL
,
134 #define SWR_NUM_RENDERTARGETS 8
136 #define SWR_ATTACHMENT_COLOR0_BIT 0x001
137 #define SWR_ATTACHMENT_COLOR1_BIT 0x002
138 #define SWR_ATTACHMENT_COLOR2_BIT 0x004
139 #define SWR_ATTACHMENT_COLOR3_BIT 0x008
140 #define SWR_ATTACHMENT_COLOR4_BIT 0x010
141 #define SWR_ATTACHMENT_COLOR5_BIT 0x020
142 #define SWR_ATTACHMENT_COLOR6_BIT 0x040
143 #define SWR_ATTACHMENT_COLOR7_BIT 0x080
144 #define SWR_ATTACHMENT_DEPTH_BIT 0x100
145 #define SWR_ATTACHMENT_STENCIL_BIT 0x200
146 #define SWR_ATTACHMENT_MASK_ALL 0x3ff
147 #define SWR_ATTACHMENT_MASK_COLOR 0x0ff
150 //////////////////////////////////////////////////////////////////////////
151 /// @brief SWR Inner Tessellation factor ID
152 /// See above GetTessFactorOutputPosition code for documentation
153 enum SWR_INNER_TESSFACTOR_ID
155 SWR_QUAD_U_TRI_INSIDE
,
158 SWR_NUM_INNER_TESS_FACTORS
,
161 //////////////////////////////////////////////////////////////////////////
162 /// @brief SWR Outer Tessellation factor ID
163 /// See above GetTessFactorOutputPosition code for documentation
164 enum SWR_OUTER_TESSFACTOR_ID
166 SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL
,
167 SWR_QUAD_V_EQ0_TRI_V_LINE_DENSITY
,
168 SWR_QUAD_U_EQ1_TRI_W
,
171 SWR_NUM_OUTER_TESS_FACTORS
,
175 /////////////////////////////////////////////////////////////////////////
177 /// @brief Defines a vertex element that holds all the data for SIMD vertices.
178 /// Contains position in clip space, hardcoded to attribute 0,
179 /// space for up to 32 attributes, as well as any SGV values generated
181 /////////////////////////////////////////////////////////////////////////
184 VERTEX_POSITION_SLOT
= 0,
185 VERTEX_POSITION_END_SLOT
= 0,
186 VERTEX_ATTRIB_START_SLOT
= ( 1 + VERTEX_POSITION_END_SLOT
),
187 VERTEX_ATTRIB_END_SLOT
= (32 + VERTEX_POSITION_END_SLOT
),
188 VERTEX_RTAI_SLOT
= (33 + VERTEX_POSITION_END_SLOT
), // GS writes RenderTargetArrayIndex here
189 VERTEX_PRIMID_SLOT
= (34 + VERTEX_POSITION_END_SLOT
), // GS writes PrimId here
190 VERTEX_CLIPCULL_DIST_LO_SLOT
= (35 + VERTEX_POSITION_END_SLOT
), // VS writes lower 4 clip/cull dist
191 VERTEX_CLIPCULL_DIST_HI_SLOT
= (36 + VERTEX_POSITION_END_SLOT
), // VS writes upper 4 clip/cull dist
192 VERTEX_POINT_SIZE_SLOT
= (37 + VERTEX_POSITION_END_SLOT
), // VS writes point size here
193 VERTEX_VIEWPORT_ARRAY_INDEX_SLOT
= (38 + VERTEX_POSITION_END_SLOT
),
194 SWR_VTX_NUM_SLOTS
= VERTEX_VIEWPORT_ARRAY_INDEX_SLOT
,
200 simdvector attrib
[SWR_VTX_NUM_SLOTS
];
203 #if ENABLE_AVX512_SIMD16
206 simd16vector attrib
[SWR_VTX_NUM_SLOTS
];
210 //////////////////////////////////////////////////////////////////////////
212 /// @brief Input to vertex shader
213 /////////////////////////////////////////////////////////////////////////
214 struct SWR_VS_CONTEXT
216 simdvertex
* pVin
; // IN: SIMD input vertex data store
217 simdvertex
* pVout
; // OUT: SIMD output vertex data store
219 uint32_t InstanceID
; // IN: Instance ID, constant across all verts of the SIMD
220 simdscalari VertexID
; // IN: Vertex ID
221 simdscalari mask
; // IN: Active mask for shader
222 #if USE_SIMD16_FRONTEND
223 uint32_t AlternateOffset
; // IN: amount to offset for interleaving even/odd simd8 in simd16vertex output
227 /////////////////////////////////////////////////////////////////////////
229 /// @brief defines a control point element as passed from the output
230 /// of the hull shader to the input of the domain shader
231 /////////////////////////////////////////////////////////////////////////
242 ScalarAttrib attrib
[SWR_VTX_NUM_SLOTS
];
245 //////////////////////////////////////////////////////////////////////////
246 /// SWR_TESSELLATION_FACTORS
247 /// @brief Tessellation factors structure (non-vector)
248 /////////////////////////////////////////////////////////////////////////
249 struct SWR_TESSELLATION_FACTORS
251 float OuterTessFactors
[SWR_NUM_OUTER_TESS_FACTORS
];
252 float InnerTessFactors
[SWR_NUM_INNER_TESS_FACTORS
];
255 #define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
258 SWR_TESSELLATION_FACTORS tessFactors
;
259 ScalarCPoint cp
[MAX_NUM_VERTS_PER_PRIM
];
260 ScalarCPoint patchData
;
263 //////////////////////////////////////////////////////////////////////////
265 /// @brief Input to hull shader
266 /////////////////////////////////////////////////////////////////////////
267 struct SWR_HS_CONTEXT
269 simdvertex vert
[MAX_NUM_VERTS_PER_PRIM
]; // IN: (SIMD) input primitive data
270 simdscalari PrimitiveID
; // IN: (SIMD) primitive ID generated from the draw call
271 simdscalari mask
; // IN: Active mask for shader
272 ScalarPatch
* pCPout
; // OUT: Output control point patch
273 // SIMD-sized-array of SCALAR patches
276 //////////////////////////////////////////////////////////////////////////
278 /// @brief Input to domain shader
279 /////////////////////////////////////////////////////////////////////////
280 struct SWR_DS_CONTEXT
282 uint32_t PrimitiveID
; // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation
283 uint32_t vectorOffset
; // IN: (SCALAR) vector index offset into SIMD data.
284 uint32_t vectorStride
; // IN: (SCALAR) stride (in vectors) of output data per attribute-component
285 ScalarPatch
* pCpIn
; // IN: (SCALAR) Control patch
286 simdscalar
* pDomainU
; // IN: (SIMD) Domain Point U coords
287 simdscalar
* pDomainV
; // IN: (SIMD) Domain Point V coords
288 simdscalari mask
; // IN: Active mask for shader
289 simdscalar
* pOutputData
; // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component)
292 //////////////////////////////////////////////////////////////////////////
294 /// @brief Input to geometry shader.
295 /////////////////////////////////////////////////////////////////////////
296 struct SWR_GS_CONTEXT
298 simdvertex vert
[MAX_NUM_VERTS_PER_PRIM
]; // IN: input primitive data for SIMD prims
299 simdscalari PrimitiveID
; // IN: input primitive ID generated from the draw call
300 uint32_t InstanceID
; // IN: input instance ID
301 simdscalari mask
; // IN: Active mask for shader
302 uint8_t* pStream
; // OUT: output stream (contains vertices for all output streams)
303 uint8_t* pCutOrStreamIdBuffer
; // OUT: cut or stream id buffer
304 simdscalari vertexCount
; // OUT: num vertices emitted per SIMD lane
307 struct PixelPositions
315 #define SWR_MAX_NUM_MULTISAMPLES 16
317 //////////////////////////////////////////////////////////////////////////
319 /// @brief Input to pixel shader.
320 /////////////////////////////////////////////////////////////////////////
321 struct SWR_PS_CONTEXT
323 PixelPositions vX
; // IN: x location(s) of pixels
324 PixelPositions vY
; // IN: x location(s) of pixels
325 simdscalar vZ
; // INOUT: z location of pixels
326 simdscalari activeMask
; // OUT: mask for kill
327 simdscalar inputMask
; // IN: input coverage mask for all samples
328 simdscalari oMask
; // OUT: mask for output coverage
330 PixelPositions vI
; // barycentric coords evaluated at pixel center, sample position, centroid
332 PixelPositions vOneOverW
; // IN: 1/w
334 const float* pAttribs
; // IN: pointer to attribute barycentric coefficients
335 const float* pPerspAttribs
; // IN: pointer to attribute/w barycentric coefficients
336 const float* pRecipW
; // IN: pointer to 1/w coord for each vertex
337 const float *I
; // IN: Barycentric A, B, and C coefs used to compute I
338 const float *J
; // IN: Barycentric A, B, and C coefs used to compute J
339 float recipDet
; // IN: 1/Det, used when barycentric interpolating attributes
340 const float* pSamplePosX
; // IN: array of sample positions
341 const float* pSamplePosY
; // IN: array of sample positions
342 simdvector shaded
[SWR_NUM_RENDERTARGETS
];
343 // OUT: result color per rendertarget
345 uint32_t frontFace
; // IN: front- 1, back- 0
346 uint32_t primID
; // IN: primitive ID
347 uint32_t sampleIndex
; // IN: sampleIndex
348 uint32_t renderTargetArrayIndex
; // IN: render target array index from GS
349 uint32_t rasterizerSampleCount
; // IN: sample count used by the rasterizer
351 uint8_t* pColorBuffer
[SWR_NUM_RENDERTARGETS
]; // IN: Pointers to render target hottiles
354 //////////////////////////////////////////////////////////////////////////
356 /// @brief Input to compute shader.
357 /////////////////////////////////////////////////////////////////////////
358 struct SWR_CS_CONTEXT
360 // The ThreadGroupId is the current thread group index relative
361 // to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup,
362 // and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader.
364 // Compute shader accepts the following system values.
365 // o ThreadId - Current thread id relative to all other threads in dispatch.
366 // o ThreadGroupId - Current thread group id relative to all other groups in dispatch.
367 // o ThreadIdInGroup - Current thread relative to all threads in the current thread group.
368 // o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup.
370 // All of these system values can be computed in the shader. They will be
371 // derived from the current tile counter. The tile counter is an atomic counter that
372 // resides in the draw context and is initialized to the product of the dispatch dims.
374 // tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z
376 // Each CPU worker thread will atomically decrement this counter and passes the current
377 // count into the shader. When the count reaches 0 then all thread groups in the
378 // dispatch call have been completed.
380 uint32_t tileCounter
; // The tile counter value for this thread group.
382 // Dispatch dimensions used by shader to compute system values from the tile counter.
383 uint32_t dispatchDims
[3];
385 uint8_t* pTGSM
; // Thread Group Shared Memory pointer.
387 uint8_t* pSpillFillBuffer
; // Spill/fill buffer for barrier support
389 uint8_t* pScratchSpace
; // Pointer to scratch space buffer used by the shader, shader is responsible
390 // for subdividing scratch space per instance/simd
392 uint32_t scratchSpacePerSimd
; // Scratch space per work item x SIMD_WIDTH
398 SWR_TILE_NONE
= 0x0, // Linear mode (no tiling)
399 SWR_TILE_MODE_WMAJOR
, // W major tiling
400 SWR_TILE_MODE_XMAJOR
, // X major tiling
401 SWR_TILE_MODE_YMAJOR
, // Y major tiling
402 SWR_TILE_SWRZ
, // SWR-Z tiling
407 enum SWR_SURFACE_TYPE
414 SURFACE_STRUCTURED_BUFFER
= 5,
443 enum SWR_BLEND_FACTOR
446 BLENDFACTOR_SRC_COLOR
,
447 BLENDFACTOR_SRC_ALPHA
,
448 BLENDFACTOR_DST_ALPHA
,
449 BLENDFACTOR_DST_COLOR
,
450 BLENDFACTOR_SRC_ALPHA_SATURATE
,
451 BLENDFACTOR_CONST_COLOR
,
452 BLENDFACTOR_CONST_ALPHA
,
453 BLENDFACTOR_SRC1_COLOR
,
454 BLENDFACTOR_SRC1_ALPHA
,
456 BLENDFACTOR_INV_SRC_COLOR
,
457 BLENDFACTOR_INV_SRC_ALPHA
,
458 BLENDFACTOR_INV_DST_ALPHA
,
459 BLENDFACTOR_INV_DST_COLOR
,
460 BLENDFACTOR_INV_CONST_COLOR
,
461 BLENDFACTOR_INV_CONST_ALPHA
,
462 BLENDFACTOR_INV_SRC1_COLOR
,
463 BLENDFACTOR_INV_SRC1_ALPHA
479 LOGICOP_AND_INVERTED
,
480 LOGICOP_COPY_INVERTED
,
495 //////////////////////////////////////////////////////////////////////////
497 /// @brief Specifies how the auxiliary buffer is used by the driver.
498 //////////////////////////////////////////////////////////////////////////
507 //////////////////////////////////////////////////////////////////////////
508 /// SWR_SURFACE_STATE
509 //////////////////////////////////////////////////////////////////////////
510 struct SWR_SURFACE_STATE
512 uint8_t *pBaseAddress
;
513 SWR_SURFACE_TYPE type
; // @llvm_enum
514 SWR_FORMAT format
; // @llvm_enum
519 uint32_t samplePattern
;
522 uint32_t minLod
; // for sampled surfaces, the most detailed LOD that can be accessed by sampler
523 uint32_t maxLod
; // for sampled surfaces, the max LOD that can be accessed
524 float resourceMinLod
; // for sampled surfaces, the most detailed fractional mip that can be accessed by sampler
525 uint32_t lod
; // for render targets, the lod being rendered to
526 uint32_t arrayIndex
; // for render targets, the array index being rendered to for arrayed surfaces
527 SWR_TILE_MODE tileMode
; // @llvm_enum
533 uint32_t lodOffsets
[2][15]; // lod offsets for sampled surfaces
535 uint8_t *pAuxBaseAddress
; // Used for compression, append/consume counter, etc.
536 SWR_AUX_MODE auxMode
; // @llvm_enum
539 bool bInterleavedSamples
; // are MSAA samples stored interleaved or planar
542 // vertex fetch state
543 // WARNING- any changes to this struct need to be reflected
544 // in the fetch shader jit
545 struct SWR_VERTEX_BUFFER_STATE
549 const uint8_t *pData
;
552 uint32_t minVertex
; // min vertex (for bounds checking)
553 uint32_t maxVertex
; // size / pitch. precalculated value used by fetch shader for OOB checks
554 uint32_t partialInboundsSize
; // size % pitch. precalculated value used by fetch shader for partially OOB vertices
557 struct SWR_INDEX_BUFFER_STATE
559 // Format type for indices (e.g. UINT16, UINT32, etc.)
560 SWR_FORMAT format
; // @llvm_enum
561 const void *pIndices
;
566 //////////////////////////////////////////////////////////////////////////
567 /// SWR_FETCH_CONTEXT
568 /// @brief Input to fetch shader.
569 /// @note WARNING - Changes to this struct need to be reflected in the
570 /// fetch shader jit.
571 /////////////////////////////////////////////////////////////////////////
572 struct SWR_FETCH_CONTEXT
574 const SWR_VERTEX_BUFFER_STATE
* pStreams
; // IN: array of bound vertex buffers
575 const int32_t* pIndices
; // IN: pointer to index buffer for indexed draws
576 const int32_t* pLastIndex
; // IN: pointer to end of index buffer, used for bounds checking
577 uint32_t CurInstance
; // IN: current instance
578 uint32_t BaseVertex
; // IN: base vertex
579 uint32_t StartVertex
; // IN: start vertex
580 uint32_t StartInstance
; // IN: start instance
581 simdscalari VertexID
; // OUT: vector of vertex IDs
582 simdscalari CutMask
; // OUT: vector mask of indices which have the cut index value
585 //////////////////////////////////////////////////////////////////////////
588 /// @brief All statistics generated by SWR go here. These are public
590 /////////////////////////////////////////////////////////////////////////
591 OSALIGNLINE(struct) SWR_STATS
594 uint64_t DepthPassCount
; // Number of passing depth tests. Not exact.
597 uint64_t PsInvocations
; // Number of Pixel Shader invocations
598 uint64_t CsInvocations
; // Number of Compute Shader invocations
602 //////////////////////////////////////////////////////////////////////////
605 /// @brief All statistics generated by FE.
606 /////////////////////////////////////////////////////////////////////////
607 OSALIGNLINE(struct) SWR_STATS_FE
609 uint64_t IaVertices
; // Number of Fetch Shader vertices
610 uint64_t IaPrimitives
; // Number of PA primitives.
611 uint64_t VsInvocations
; // Number of Vertex Shader invocations
612 uint64_t HsInvocations
; // Number of Hull Shader invocations
613 uint64_t DsInvocations
; // Number of Domain Shader invocations
614 uint64_t GsInvocations
; // Number of Geometry Shader invocations
615 uint64_t GsPrimitives
; // Number of prims GS outputs.
616 uint64_t CInvocations
; // Number of clipper invocations
617 uint64_t CPrimitives
; // Number of clipper primitives.
620 uint64_t SoPrimStorageNeeded
[4];
621 uint64_t SoNumPrimsWritten
[4];
624 //////////////////////////////////////////////////////////////////////////
625 /// STREAMOUT_BUFFERS
626 /////////////////////////////////////////////////////////////////////////
628 #define MAX_SO_STREAMS 4
629 #define MAX_SO_BUFFERS 4
630 #define MAX_ATTRIBUTES 32
632 struct SWR_STREAMOUT_BUFFER
637 // Pointers to streamout buffers.
640 // Size of buffer in dwords.
643 // Vertex pitch of buffer in dwords.
646 // Offset into buffer in dwords. SOS will increment this offset.
647 uint32_t streamOffset
;
649 // Offset to the SO write offset. If not null then we update offset here.
650 uint32_t* pWriteOffset
;
654 //////////////////////////////////////////////////////////////////////////
656 /////////////////////////////////////////////////////////////////////////
657 struct SWR_STREAMOUT_STATE
659 // This disables stream output.
662 // which streams are enabled for streamout
663 bool streamEnable
[MAX_SO_STREAMS
];
665 // If set then do not send any streams to the rasterizer.
666 bool rasterizerDisable
;
668 // Specifies which stream to send to the rasterizer.
669 uint32_t streamToRasterizer
;
671 // The stream masks specify which attributes are sent to which streams.
672 // These masks help the FE to setup the pPrimData buffer that is passed
673 // the Stream Output Shader (SOS) function.
674 uint32_t streamMasks
[MAX_SO_STREAMS
];
676 // Number of attributes, including position, per vertex that are streamed out.
677 // This should match number of bits in stream mask.
678 uint32_t streamNumEntries
[MAX_SO_STREAMS
];
681 //////////////////////////////////////////////////////////////////////////
682 /// STREAMOUT_CONTEXT - Passed to SOS
683 /////////////////////////////////////////////////////////////////////////
684 struct SWR_STREAMOUT_CONTEXT
687 SWR_STREAMOUT_BUFFER
* pBuffer
[MAX_SO_STREAMS
];
689 // Num prims written for this stream
690 uint32_t numPrimsWritten
;
692 // Num prims that should have been written if there were no overflow.
693 uint32_t numPrimStorageNeeded
;
696 //////////////////////////////////////////////////////////////////////////
697 /// SWR_GS_STATE - Geometry shader state
698 /////////////////////////////////////////////////////////////////////////
703 // number of input attributes per vertex. used by the frontend to
704 // optimize assembling primitives for GS
705 uint32_t numInputAttribs
;
707 // output topology - can be point, tristrip, or linestrip
708 PRIMITIVE_TOPOLOGY outputTopology
; // @llvm_enum
710 // maximum number of verts that can be emitted by a single instance of the GS
711 uint32_t maxNumVerts
;
714 uint32_t instanceCount
;
716 // geometry shader emits renderTargetArrayIndex
717 bool emitsRenderTargetArrayIndex
;
719 // geometry shader emits PrimitiveID
720 bool emitsPrimitiveID
;
722 // geometry shader emits ViewportArrayIndex
723 bool emitsViewportArrayIndex
;
725 // if true, geometry shader emits a single stream, with separate cut buffer.
726 // if false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
727 // to map vertices to streams
730 // when single stream is enabled, singleStreamID dictates which stream is being output.
731 // field ignored if isSingleStream is false
732 uint32_t singleStreamID
;
736 //////////////////////////////////////////////////////////////////////////
737 /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
738 /////////////////////////////////////////////////////////////////////////
739 enum SWR_TS_OUTPUT_TOPOLOGY
743 SWR_TS_OUTPUT_TRI_CW
,
744 SWR_TS_OUTPUT_TRI_CCW
,
746 SWR_TS_OUTPUT_TOPOLOGY_COUNT
749 //////////////////////////////////////////////////////////////////////////
750 /// SWR_TS_PARTITIONING - Defines tessellation algorithm
751 /////////////////////////////////////////////////////////////////////////
752 enum SWR_TS_PARTITIONING
755 SWR_TS_ODD_FRACTIONAL
,
756 SWR_TS_EVEN_FRACTIONAL
,
758 SWR_TS_PARTITIONING_COUNT
761 //////////////////////////////////////////////////////////////////////////
762 /// SWR_TS_DOMAIN - Defines Tessellation Domain
763 /////////////////////////////////////////////////////////////////////////
773 //////////////////////////////////////////////////////////////////////////
774 /// SWR_TS_STATE - Tessellation state
775 /////////////////////////////////////////////////////////////////////////
779 SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology
; // @llvm_enum
780 SWR_TS_PARTITIONING partitioning
; // @llvm_enum
781 SWR_TS_DOMAIN domain
; // @llvm_enum
783 PRIMITIVE_TOPOLOGY postDSTopology
; // @llvm_enum
785 uint32_t numHsInputAttribs
;
786 uint32_t numHsOutputAttribs
;
787 uint32_t numDsOutputAttribs
;
790 // output merger state
791 struct SWR_RENDER_TARGET_BLEND_STATE
793 uint8_t writeDisableRed
: 1;
794 uint8_t writeDisableGreen
: 1;
795 uint8_t writeDisableBlue
: 1;
796 uint8_t writeDisableAlpha
: 1;
798 static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE
) == 1, "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
800 enum SWR_MULTISAMPLE_COUNT
802 SWR_MULTISAMPLE_1X
= 0,
807 SWR_MULTISAMPLE_TYPE_COUNT
810 struct SWR_BLEND_STATE
812 // constant blend factor color in RGBA float
813 float constantColor
[4];
815 // alpha test reference value in unorm8 or float32
816 uint32_t alphaTestReference
;
818 // all RT's have the same sample count
819 ///@todo move this to Output Merger state when we refactor
820 SWR_MULTISAMPLE_COUNT sampleCount
; // @llvm_enum
822 SWR_RENDER_TARGET_BLEND_STATE renderTarget
[SWR_NUM_RENDERTARGETS
];
824 static_assert(sizeof(SWR_BLEND_STATE
) == 36, "Invalid SWR_BLEND_STATE size");
826 //////////////////////////////////////////////////////////////////////////
827 /// FUNCTION POINTERS FOR SHADERS
829 typedef void(__cdecl
*PFN_FETCH_FUNC
)(SWR_FETCH_CONTEXT
& fetchInfo
, simdvertex
& out
);
830 typedef void(__cdecl
*PFN_VERTEX_FUNC
)(HANDLE hPrivateData
, SWR_VS_CONTEXT
* pVsContext
);
831 typedef void(__cdecl
*PFN_HS_FUNC
)(HANDLE hPrivateData
, SWR_HS_CONTEXT
* pHsContext
);
832 typedef void(__cdecl
*PFN_DS_FUNC
)(HANDLE hPrivateData
, SWR_DS_CONTEXT
* pDsContext
);
833 typedef void(__cdecl
*PFN_GS_FUNC
)(HANDLE hPrivateData
, SWR_GS_CONTEXT
* pGsContext
);
834 typedef void(__cdecl
*PFN_CS_FUNC
)(HANDLE hPrivateData
, SWR_CS_CONTEXT
* pCsContext
);
835 typedef void(__cdecl
*PFN_SO_FUNC
)(SWR_STREAMOUT_CONTEXT
& soContext
);
836 typedef void(__cdecl
*PFN_PIXEL_KERNEL
)(HANDLE hPrivateData
, SWR_PS_CONTEXT
*pContext
);
837 typedef void(__cdecl
*PFN_CPIXEL_KERNEL
)(HANDLE hPrivateData
, SWR_PS_CONTEXT
*pContext
);
838 typedef void(__cdecl
*PFN_BLEND_JIT_FUNC
)(const SWR_BLEND_STATE
*,
839 simdvector
& vSrc
, simdvector
& vSrc1
, simdscalar
& vSrc0Alpha
, uint32_t sample
,
840 uint8_t* pDst
, simdvector
& vResult
, simdscalari
* vOMask
, simdscalari
* vCoverageMask
);
841 typedef simdscalar(*PFN_QUANTIZE_DEPTH
)(simdscalar
);
845 //////////////////////////////////////////////////////////////////////////
847 /////////////////////////////////////////////////////////////////////////
848 struct SWR_FRONTEND_STATE
850 // skip clip test, perspective divide, and viewport transform
851 // intended for verts in screen space
852 bool vpTransformDisable
;
853 bool bEnableCutIndex
;
859 uint32_t lineStripList
: 1;
860 uint32_t triStripList
: 2;
864 uint32_t topologyProvokingVertex
; // provoking vertex for the draw topology
867 //////////////////////////////////////////////////////////////////////////
869 /////////////////////////////////////////////////////////////////////////
870 struct SWR_VIEWPORT_MATRIX
880 //////////////////////////////////////////////////////////////////////////
881 /// VIEWPORT_MATRIXES
882 /////////////////////////////////////////////////////////////////////////
883 struct SWR_VIEWPORT_MATRICES
885 float m00
[KNOB_NUM_VIEWPORTS_SCISSORS
];
886 float m11
[KNOB_NUM_VIEWPORTS_SCISSORS
];
887 float m22
[KNOB_NUM_VIEWPORTS_SCISSORS
];
888 float m30
[KNOB_NUM_VIEWPORTS_SCISSORS
];
889 float m31
[KNOB_NUM_VIEWPORTS_SCISSORS
];
890 float m32
[KNOB_NUM_VIEWPORTS_SCISSORS
];
893 //////////////////////////////////////////////////////////////////////////
895 /////////////////////////////////////////////////////////////////////////
906 //////////////////////////////////////////////////////////////////////////
908 //////////////////////////////////////////////////////////////////////////
920 SWR_FILLMODE_WIREFRAME
,
924 enum SWR_FRONTWINDING
931 enum SWR_PIXEL_LOCATION
933 SWR_PIXEL_LOCATION_CENTER
,
934 SWR_PIXEL_LOCATION_UL
,
937 // fixed point screen space sample locations within a pixel
938 struct SWR_MULTISAMPLE_POS
941 INLINE
void SetXi(uint32_t sampleNum
, uint32_t val
) { _xi
[sampleNum
] = val
; }; // @llvm_func
942 INLINE
void SetYi(uint32_t sampleNum
, uint32_t val
) { _yi
[sampleNum
] = val
; }; // @llvm_func
943 INLINE
uint32_t Xi(uint32_t sampleNum
) const { return _xi
[sampleNum
]; }; // @llvm_func
944 INLINE
uint32_t Yi(uint32_t sampleNum
) const { return _yi
[sampleNum
]; }; // @llvm_func
945 INLINE
void SetX(uint32_t sampleNum
, float val
) { _x
[sampleNum
] = val
; }; // @llvm_func
946 INLINE
void SetY(uint32_t sampleNum
, float val
) { _y
[sampleNum
] = val
; }; // @llvm_func
947 INLINE
float X(uint32_t sampleNum
) const { return _x
[sampleNum
]; }; // @llvm_func
948 INLINE
float Y(uint32_t sampleNum
) const { return _y
[sampleNum
]; }; // @llvm_func
949 typedef const float(&sampleArrayT
)[SWR_MAX_NUM_MULTISAMPLES
]; //@llvm_typedef
950 INLINE sampleArrayT
X() const { return _x
; }; // @llvm_func
951 INLINE sampleArrayT
Y() const { return _y
; }; // @llvm_func
952 INLINE
const __m128i
& vXi(uint32_t sampleNum
) const { return _vXi
[sampleNum
]; }; // @llvm_func
953 INLINE
const __m128i
& vYi(uint32_t sampleNum
) const { return _vYi
[sampleNum
]; }; // @llvm_func
954 INLINE
const simdscalar
& vX(uint32_t sampleNum
) const { return _vX
[sampleNum
]; }; // @llvm_func
955 INLINE
const simdscalar
& vY(uint32_t sampleNum
) const { return _vY
[sampleNum
]; }; // @llvm_func
956 INLINE
const __m128i
& TileSampleOffsetsX() const { return tileSampleOffsetsX
; }; // @llvm_func
957 INLINE
const __m128i
& TileSampleOffsetsY() const { return tileSampleOffsetsY
; }; // @llvm_func
959 INLINE
void PrecalcSampleData(int numSamples
) // @llvm_func_start
961 for(int i
= 0; i
< numSamples
; i
++)
963 _vXi
[i
] = _mm_set1_epi32(_xi
[i
]);
964 _vYi
[i
] = _mm_set1_epi32(_yi
[i
]);
965 _vX
[i
] = _simd_set1_ps(_x
[i
]);
966 _vY
[i
] = _simd_set1_ps(_y
[i
]);
968 // precalculate the raster tile BB for the rasterizer.
969 CalcTileSampleOffsets(numSamples
);
974 INLINE
void CalcTileSampleOffsets(int numSamples
) // @llvm_func_start
976 auto expandThenBlend4
= [](uint32_t* min
, uint32_t* max
, auto mask
)
978 __m128i vMin
= _mm_set1_epi32(*min
);
979 __m128i vMax
= _mm_set1_epi32(*max
);
980 return _simd_blend4_epi32
<decltype(mask
)::value
>(vMin
, vMax
);
983 auto minXi
= std::min_element(std::begin(_xi
), &_xi
[numSamples
]);
984 auto maxXi
= std::max_element(std::begin(_xi
), &_xi
[numSamples
]);
985 std::integral_constant
<int, 0xA> xMask
;
986 // BR(max), BL(min), UR(max), UL(min)
987 tileSampleOffsetsX
= expandThenBlend4(minXi
, maxXi
, xMask
);
989 auto minYi
= std::min_element(std::begin(_yi
), &_yi
[numSamples
]);
990 auto maxYi
= std::max_element(std::begin(_yi
), &_yi
[numSamples
]);
991 std::integral_constant
<int, 0xC> yMask
;
992 // BR(max), BL(min), UR(max), UL(min)
993 tileSampleOffsetsY
= expandThenBlend4(minYi
, maxYi
, yMask
);
995 // scalar sample values
996 uint32_t _xi
[SWR_MAX_NUM_MULTISAMPLES
];
997 uint32_t _yi
[SWR_MAX_NUM_MULTISAMPLES
];
998 float _x
[SWR_MAX_NUM_MULTISAMPLES
];
999 float _y
[SWR_MAX_NUM_MULTISAMPLES
];
1001 // precalc'd / vectorized samples
1002 __m128i _vXi
[SWR_MAX_NUM_MULTISAMPLES
];
1003 __m128i _vYi
[SWR_MAX_NUM_MULTISAMPLES
];
1004 simdscalar _vX
[SWR_MAX_NUM_MULTISAMPLES
];
1005 simdscalar _vY
[SWR_MAX_NUM_MULTISAMPLES
];
1006 __m128i tileSampleOffsetsX
;
1007 __m128i tileSampleOffsetsY
;
1011 //////////////////////////////////////////////////////////////////////////
1013 //////////////////////////////////////////////////////////////////////////
1014 struct SWR_RASTSTATE
1016 uint32_t cullMode
: 2;
1017 uint32_t fillMode
: 2;
1018 uint32_t frontWinding
: 1;
1019 uint32_t scissorEnable
: 1;
1020 uint32_t depthClipEnable
: 1;
1021 uint32_t clipHalfZ
: 1;
1022 uint32_t pointParam
: 1;
1023 uint32_t pointSpriteEnable
: 1;
1024 uint32_t pointSpriteTopOrigin
: 1;
1025 uint32_t forcedSampleCount
: 1;
1026 uint32_t pixelOffset
: 1;
1027 uint32_t depthBiasPreAdjusted
: 1; ///< depth bias constant is in float units, not per-format Z units
1028 uint32_t conservativeRast
: 1;
1034 float slopeScaledDepthBias
;
1035 float depthBiasClamp
;
1036 SWR_FORMAT depthFormat
; // @llvm_enum
1038 // sample count the rasterizer is running at
1039 SWR_MULTISAMPLE_COUNT sampleCount
; // @llvm_enum
1040 uint32_t pixelLocation
; // UL or Center
1041 SWR_MULTISAMPLE_POS samplePositions
; // @llvm_struct
1042 bool bIsCenterPattern
; // @llvm_enum
1044 // user clip/cull distance enables
1045 uint8_t cullDistanceMask
;
1046 uint8_t clipDistanceMask
;
1049 enum SWR_CONSTANT_SOURCE
1051 SWR_CONSTANT_SOURCE_CONST_0000
,
1052 SWR_CONSTANT_SOURCE_CONST_0001_FLOAT
,
1053 SWR_CONSTANT_SOURCE_CONST_1111_FLOAT
,
1054 SWR_CONSTANT_SOURCE_PRIM_ID
1057 struct SWR_ATTRIB_SWIZZLE
1059 uint16_t sourceAttrib
: 5; // source attribute
1060 uint16_t constantSource
: 2; // constant source to apply
1061 uint16_t componentOverrideMask
: 4; // override component with constant source
1065 struct SWR_BACKEND_STATE
1067 uint32_t constantInterpolationMask
; // bitmask indicating which attributes have constant interpolation
1068 uint32_t pointSpriteTexCoordMask
; // bitmask indicating the attribute(s) which should be interpreted as tex coordinates
1070 uint8_t numAttributes
; // total number of attributes to send to backend (up to 32)
1071 uint8_t numComponents
[32]; // number of components to setup per attribute, this reduces some calculations for unneeded components
1073 bool swizzleEnable
; // when enabled, core will parse the swizzle map when
1074 // setting up attributes for the backend, otherwise
1075 // all attributes up to numAttributes will be sent
1076 SWR_ATTRIB_SWIZZLE swizzleMap
[32];
1080 union SWR_DEPTH_STENCIL_STATE
1085 uint32_t depthWriteEnable
: 1;
1086 uint32_t depthTestEnable
: 1;
1087 uint32_t stencilWriteEnable
: 1;
1088 uint32_t stencilTestEnable
: 1;
1089 uint32_t doubleSidedStencilTestEnable
: 1;
1091 uint32_t depthTestFunc
: 3;
1092 uint32_t stencilTestFunc
: 3;
1094 uint32_t backfaceStencilPassDepthPassOp
: 3;
1095 uint32_t backfaceStencilPassDepthFailOp
: 3;
1096 uint32_t backfaceStencilFailOp
: 3;
1097 uint32_t backfaceStencilTestFunc
: 3;
1098 uint32_t stencilPassDepthPassOp
: 3;
1099 uint32_t stencilPassDepthFailOp
: 3;
1100 uint32_t stencilFailOp
: 3;
1103 uint8_t backfaceStencilWriteMask
;
1104 uint8_t backfaceStencilTestMask
;
1105 uint8_t stencilWriteMask
;
1106 uint8_t stencilTestMask
;
1109 uint8_t backfaceStencilRefValue
;
1110 uint8_t stencilRefValue
;
1115 enum SWR_SHADING_RATE
1117 SWR_SHADING_RATE_PIXEL
,
1118 SWR_SHADING_RATE_SAMPLE
,
1119 SWR_SHADING_RATE_COUNT
,
1122 enum SWR_INPUT_COVERAGE
1124 SWR_INPUT_COVERAGE_NONE
,
1125 SWR_INPUT_COVERAGE_NORMAL
,
1126 SWR_INPUT_COVERAGE_INNER_CONSERVATIVE
,
1127 SWR_INPUT_COVERAGE_COUNT
,
1130 enum SWR_PS_POSITION_OFFSET
1132 SWR_PS_POSITION_SAMPLE_NONE
,
1133 SWR_PS_POSITION_SAMPLE_OFFSET
,
1134 SWR_PS_POSITION_CENTROID_OFFSET
,
1135 SWR_PS_POSITION_OFFSET_COUNT
,
1138 enum SWR_BARYCENTRICS_MASK
1140 SWR_BARYCENTRIC_PER_PIXEL_MASK
= 0x1,
1141 SWR_BARYCENTRIC_CENTROID_MASK
= 0x2,
1142 SWR_BARYCENTRIC_PER_SAMPLE_MASK
= 0x4,
1145 // pixel shader state
1149 PFN_PIXEL_KERNEL pfnPixelShader
; // @llvm_pfn
1152 uint32_t killsPixel
: 1; // pixel shader can kill pixels
1153 uint32_t inputCoverage
: 2; // ps uses input coverage
1154 uint32_t writesODepth
: 1; // pixel shader writes to depth
1155 uint32_t usesSourceDepth
: 1; // pixel shader reads depth
1156 uint32_t shadingRate
: 2; // shading per pixel / sample / coarse pixel
1157 uint32_t numRenderTargets
: 4; // number of render target outputs in use (0-8)
1158 uint32_t posOffset
: 2; // type of offset (none, sample, centroid) to add to pixel position
1159 uint32_t barycentricsMask
: 3; // which type(s) of barycentric coords does the PS interpolate attributes with
1160 uint32_t usesUAV
: 1; // pixel shader accesses UAV
1161 uint32_t forceEarlyZ
: 1; // force execution of early depth/stencil test
1164 // depth bounds state
1165 struct SWR_DEPTH_BOUNDS_STATE
1167 bool depthBoundsTestEnable
;
1168 float depthBoundsTestMinValue
;
1169 float depthBoundsTestMaxValue
;