swr/rast: Change gfx pointers to gfxptr_t
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / state.h
1 /****************************************************************************
2 * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file state.h
24 *
25 * @brief Definitions for API state.
26 *
27 ******************************************************************************/
28 #pragma once
29
30 #include "common/formats.h"
31 #include "common/intrin.h"
32 using gfxptr_t = unsigned long long;
33 #include <functional>
34 #include <algorithm>
35
36 //////////////////////////////////////////////////////////////////////////
37 /// PRIMITIVE_TOPOLOGY.
38 //////////////////////////////////////////////////////////////////////////
39 enum PRIMITIVE_TOPOLOGY
40 {
41 TOP_UNKNOWN = 0x0,
42 TOP_POINT_LIST = 0x1,
43 TOP_LINE_LIST = 0x2,
44 TOP_LINE_STRIP = 0x3,
45 TOP_TRIANGLE_LIST = 0x4,
46 TOP_TRIANGLE_STRIP = 0x5,
47 TOP_TRIANGLE_FAN = 0x6,
48 TOP_QUAD_LIST = 0x7,
49 TOP_QUAD_STRIP = 0x8,
50 TOP_LINE_LIST_ADJ = 0x9,
51 TOP_LISTSTRIP_ADJ = 0xA,
52 TOP_TRI_LIST_ADJ = 0xB,
53 TOP_TRI_STRIP_ADJ = 0xC,
54 TOP_TRI_STRIP_REVERSE = 0xD,
55 TOP_POLYGON = 0xE,
56 TOP_RECT_LIST = 0xF,
57 TOP_LINE_LOOP = 0x10,
58 TOP_POINT_LIST_BF = 0x11,
59 TOP_LINE_STRIP_CONT = 0x12,
60 TOP_LINE_STRIP_BF = 0x13,
61 TOP_LINE_STRIP_CONT_BF = 0x14,
62 TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16,
63 TOP_TRIANGLE_DISC = 0x17, /// @todo What is this??
64
65 TOP_PATCHLIST_BASE = 0x1F, // Invalid topology, used to calculate num verts for a patchlist.
66 TOP_PATCHLIST_1 = 0x20, // List of 1-vertex patches
67 TOP_PATCHLIST_2 = 0x21,
68 TOP_PATCHLIST_3 = 0x22,
69 TOP_PATCHLIST_4 = 0x23,
70 TOP_PATCHLIST_5 = 0x24,
71 TOP_PATCHLIST_6 = 0x25,
72 TOP_PATCHLIST_7 = 0x26,
73 TOP_PATCHLIST_8 = 0x27,
74 TOP_PATCHLIST_9 = 0x28,
75 TOP_PATCHLIST_10 = 0x29,
76 TOP_PATCHLIST_11 = 0x2A,
77 TOP_PATCHLIST_12 = 0x2B,
78 TOP_PATCHLIST_13 = 0x2C,
79 TOP_PATCHLIST_14 = 0x2D,
80 TOP_PATCHLIST_15 = 0x2E,
81 TOP_PATCHLIST_16 = 0x2F,
82 TOP_PATCHLIST_17 = 0x30,
83 TOP_PATCHLIST_18 = 0x31,
84 TOP_PATCHLIST_19 = 0x32,
85 TOP_PATCHLIST_20 = 0x33,
86 TOP_PATCHLIST_21 = 0x34,
87 TOP_PATCHLIST_22 = 0x35,
88 TOP_PATCHLIST_23 = 0x36,
89 TOP_PATCHLIST_24 = 0x37,
90 TOP_PATCHLIST_25 = 0x38,
91 TOP_PATCHLIST_26 = 0x39,
92 TOP_PATCHLIST_27 = 0x3A,
93 TOP_PATCHLIST_28 = 0x3B,
94 TOP_PATCHLIST_29 = 0x3C,
95 TOP_PATCHLIST_30 = 0x3D,
96 TOP_PATCHLIST_31 = 0x3E,
97 TOP_PATCHLIST_32 = 0x3F, // List of 32-vertex patches
98 };
99
100 //////////////////////////////////////////////////////////////////////////
101 /// SWR_SHADER_TYPE
102 //////////////////////////////////////////////////////////////////////////
103 enum SWR_SHADER_TYPE
104 {
105 SHADER_VERTEX,
106 SHADER_GEOMETRY,
107 SHADER_DOMAIN,
108 SHADER_HULL,
109 SHADER_PIXEL,
110 SHADER_COMPUTE,
111
112 NUM_SHADER_TYPES,
113 };
114
115 //////////////////////////////////////////////////////////////////////////
116 /// SWR_RENDERTARGET_ATTACHMENT
117 /// @todo Its not clear what an "attachment" means. Its not common term.
118 //////////////////////////////////////////////////////////////////////////
119 enum SWR_RENDERTARGET_ATTACHMENT
120 {
121 SWR_ATTACHMENT_COLOR0,
122 SWR_ATTACHMENT_COLOR1,
123 SWR_ATTACHMENT_COLOR2,
124 SWR_ATTACHMENT_COLOR3,
125 SWR_ATTACHMENT_COLOR4,
126 SWR_ATTACHMENT_COLOR5,
127 SWR_ATTACHMENT_COLOR6,
128 SWR_ATTACHMENT_COLOR7,
129 SWR_ATTACHMENT_DEPTH,
130 SWR_ATTACHMENT_STENCIL,
131
132 SWR_NUM_ATTACHMENTS
133 };
134
135 #define SWR_NUM_RENDERTARGETS 8
136
137 #define SWR_ATTACHMENT_COLOR0_BIT 0x001
138 #define SWR_ATTACHMENT_COLOR1_BIT 0x002
139 #define SWR_ATTACHMENT_COLOR2_BIT 0x004
140 #define SWR_ATTACHMENT_COLOR3_BIT 0x008
141 #define SWR_ATTACHMENT_COLOR4_BIT 0x010
142 #define SWR_ATTACHMENT_COLOR5_BIT 0x020
143 #define SWR_ATTACHMENT_COLOR6_BIT 0x040
144 #define SWR_ATTACHMENT_COLOR7_BIT 0x080
145 #define SWR_ATTACHMENT_DEPTH_BIT 0x100
146 #define SWR_ATTACHMENT_STENCIL_BIT 0x200
147 #define SWR_ATTACHMENT_MASK_ALL 0x3ff
148 #define SWR_ATTACHMENT_MASK_COLOR 0x0ff
149
150
151 //////////////////////////////////////////////////////////////////////////
152 /// @brief SWR Inner Tessellation factor ID
153 /// See above GetTessFactorOutputPosition code for documentation
154 enum SWR_INNER_TESSFACTOR_ID
155 {
156 SWR_QUAD_U_TRI_INSIDE,
157 SWR_QUAD_V_INSIDE,
158
159 SWR_NUM_INNER_TESS_FACTORS,
160 };
161
162 //////////////////////////////////////////////////////////////////////////
163 /// @brief SWR Outer Tessellation factor ID
164 /// See above GetTessFactorOutputPosition code for documentation
165 enum SWR_OUTER_TESSFACTOR_ID
166 {
167 SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL,
168 SWR_QUAD_V_EQ0_TRI_V_LINE_DENSITY,
169 SWR_QUAD_U_EQ1_TRI_W,
170 SWR_QUAD_V_EQ1,
171
172 SWR_NUM_OUTER_TESS_FACTORS,
173 };
174
175
176 /////////////////////////////////////////////////////////////////////////
177 /// simdvertex
178 /// @brief Defines a vertex element that holds all the data for SIMD vertices.
179 /// Contains space for position, SGV, and 32 generic attributes
180 /////////////////////////////////////////////////////////////////////////
181 enum SWR_VTX_SLOTS
182 {
183 VERTEX_SGV_SLOT = 0,
184 VERTEX_SGV_RTAI_COMP = 0,
185 VERTEX_SGV_VAI_COMP = 1,
186 VERTEX_SGV_POINT_SIZE_COMP = 2,
187 VERTEX_POSITION_SLOT = 1,
188 VERTEX_POSITION_END_SLOT = 1,
189 VERTEX_CLIPCULL_DIST_LO_SLOT = (1 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist
190 VERTEX_CLIPCULL_DIST_HI_SLOT = (2 + VERTEX_POSITION_END_SLOT), // VS writes upper 4 clip/cull dist
191 VERTEX_ATTRIB_START_SLOT = (3 + VERTEX_POSITION_END_SLOT),
192 VERTEX_ATTRIB_END_SLOT = (34 + VERTEX_POSITION_END_SLOT),
193 SWR_VTX_NUM_SLOTS = (1 + VERTEX_ATTRIB_END_SLOT)
194 };
195
196 // SoAoSoA
197 struct simdvertex
198 {
199 simdvector attrib[SWR_VTX_NUM_SLOTS];
200 };
201
202 #if ENABLE_AVX512_SIMD16
203 struct simd16vertex
204 {
205 simd16vector attrib[SWR_VTX_NUM_SLOTS];
206 };
207
208 #endif
209
210 template<typename SIMD_T>
211 struct SIMDVERTEX_T
212 {
213 typename SIMD_T::Vec4 attrib[SWR_VTX_NUM_SLOTS];
214 };
215
216 //////////////////////////////////////////////////////////////////////////
217 /// SWR_SHADER_STATS
218 /// @brief Structure passed to shader for stats collection.
219 /////////////////////////////////////////////////////////////////////////
220 struct SWR_SHADER_STATS
221 {
222 uint32_t numInstExecuted; // This is roughly the API instructions executed and not x86.
223 };
224
225 //////////////////////////////////////////////////////////////////////////
226 /// SWR_VS_CONTEXT
227 /// @brief Input to vertex shader
228 /////////////////////////////////////////////////////////////////////////
229 struct SWR_VS_CONTEXT
230 {
231 simdvertex* pVin; // IN: SIMD input vertex data store
232 simdvertex* pVout; // OUT: SIMD output vertex data store
233
234 uint32_t InstanceID; // IN: Instance ID, constant across all verts of the SIMD
235 simdscalari VertexID; // IN: Vertex ID
236 simdscalari mask; // IN: Active mask for shader
237
238 // SIMD16 Frontend fields.
239 uint32_t AlternateOffset; // IN: amount to offset for interleaving even/odd simd8 in simd16vertex output
240 simd16scalari mask16; // IN: Active mask for shader (16-wide)
241 simd16scalari VertexID16; // IN: Vertex ID (16-wide)
242
243 SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
244 };
245
246 /////////////////////////////////////////////////////////////////////////
247 /// ScalarCPoint
248 /// @brief defines a control point element as passed from the output
249 /// of the hull shader to the input of the domain shader
250 /////////////////////////////////////////////////////////////////////////
251 struct ScalarAttrib
252 {
253 float x;
254 float y;
255 float z;
256 float w;
257 };
258
259 struct ScalarCPoint
260 {
261 ScalarAttrib attrib[SWR_VTX_NUM_SLOTS];
262 };
263
264 //////////////////////////////////////////////////////////////////////////
265 /// SWR_TESSELLATION_FACTORS
266 /// @brief Tessellation factors structure (non-vector)
267 /////////////////////////////////////////////////////////////////////////
268 struct SWR_TESSELLATION_FACTORS
269 {
270 float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
271 float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
272 };
273
274 #define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
275 struct ScalarPatch
276 {
277 SWR_TESSELLATION_FACTORS tessFactors;
278 ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM];
279 ScalarCPoint patchData;
280 };
281
282 //////////////////////////////////////////////////////////////////////////
283 /// SWR_HS_CONTEXT
284 /// @brief Input to hull shader
285 /////////////////////////////////////////////////////////////////////////
286 struct SWR_HS_CONTEXT
287 {
288 simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
289 simdscalari PrimitiveID; // IN: (SIMD) primitive ID generated from the draw call
290 simdscalari mask; // IN: Active mask for shader
291 ScalarPatch* pCPout; // OUT: Output control point patch
292 // SIMD-sized-array of SCALAR patches
293 SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
294 };
295
296 //////////////////////////////////////////////////////////////////////////
297 /// SWR_DS_CONTEXT
298 /// @brief Input to domain shader
299 /////////////////////////////////////////////////////////////////////////
300 struct SWR_DS_CONTEXT
301 {
302 uint32_t PrimitiveID; // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation
303 uint32_t vectorOffset; // IN: (SCALAR) vector index offset into SIMD data.
304 uint32_t vectorStride; // IN: (SCALAR) stride (in vectors) of output data per attribute-component
305 uint32_t outVertexAttribOffset; // IN: (SCALAR) Offset to the attributes as processed by the next shader stage.
306 ScalarPatch* pCpIn; // IN: (SCALAR) Control patch
307 simdscalar* pDomainU; // IN: (SIMD) Domain Point U coords
308 simdscalar* pDomainV; // IN: (SIMD) Domain Point V coords
309 simdscalari mask; // IN: Active mask for shader
310 simdscalar* pOutputData; // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component)
311 SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
312 };
313
314 //////////////////////////////////////////////////////////////////////////
315 /// SWR_GS_CONTEXT
316 /// @brief Input to geometry shader.
317 /////////////////////////////////////////////////////////////////////////
318 struct SWR_GS_CONTEXT
319 {
320 simdvector* pVerts; // IN: input primitive data for SIMD prims
321 uint32_t inputVertStride; // IN: input vertex stride, in attributes
322 simdscalari PrimitiveID; // IN: input primitive ID generated from the draw call
323 uint32_t InstanceID; // IN: input instance ID
324 simdscalari mask; // IN: Active mask for shader
325 uint8_t* pStreams[KNOB_SIMD_WIDTH]; // OUT: output stream (contains vertices for all output streams)
326 SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
327 };
328
329 struct PixelPositions
330 {
331 simdscalar UL;
332 simdscalar center;
333 simdscalar sample;
334 simdscalar centroid;
335 };
336
337 #define SWR_MAX_NUM_MULTISAMPLES 16
338
339 //////////////////////////////////////////////////////////////////////////
340 /// SWR_PS_CONTEXT
341 /// @brief Input to pixel shader.
342 /////////////////////////////////////////////////////////////////////////
343 struct SWR_PS_CONTEXT
344 {
345 PixelPositions vX; // IN: x location(s) of pixels
346 PixelPositions vY; // IN: x location(s) of pixels
347 simdscalar vZ; // INOUT: z location of pixels
348 simdscalari activeMask; // OUT: mask for kill
349 simdscalar inputMask; // IN: input coverage mask for all samples
350 simdscalari oMask; // OUT: mask for output coverage
351
352 PixelPositions vI; // barycentric coords evaluated at pixel center, sample position, centroid
353 PixelPositions vJ;
354 PixelPositions vOneOverW; // IN: 1/w
355
356 const float* pAttribs; // IN: pointer to attribute barycentric coefficients
357 const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients
358 const float* pRecipW; // IN: pointer to 1/w coord for each vertex
359 const float *I; // IN: Barycentric A, B, and C coefs used to compute I
360 const float *J; // IN: Barycentric A, B, and C coefs used to compute J
361 float recipDet; // IN: 1/Det, used when barycentric interpolating attributes
362 const float* pSamplePosX; // IN: array of sample positions
363 const float* pSamplePosY; // IN: array of sample positions
364 simdvector shaded[SWR_NUM_RENDERTARGETS];
365 // OUT: result color per rendertarget
366
367 uint32_t frontFace; // IN: front- 1, back- 0
368 uint32_t sampleIndex; // IN: sampleIndex
369 uint32_t renderTargetArrayIndex; // IN: render target array index from GS
370 uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
371
372 uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
373
374 SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
375 };
376
377 //////////////////////////////////////////////////////////////////////////
378 /// SWR_CS_CONTEXT
379 /// @brief Input to compute shader.
380 /////////////////////////////////////////////////////////////////////////
381 struct SWR_CS_CONTEXT
382 {
383 // The ThreadGroupId is the current thread group index relative
384 // to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup,
385 // and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader.
386
387 // Compute shader accepts the following system values.
388 // o ThreadId - Current thread id relative to all other threads in dispatch.
389 // o ThreadGroupId - Current thread group id relative to all other groups in dispatch.
390 // o ThreadIdInGroup - Current thread relative to all threads in the current thread group.
391 // o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup.
392 //
393 // All of these system values can be computed in the shader. They will be
394 // derived from the current tile counter. The tile counter is an atomic counter that
395 // resides in the draw context and is initialized to the product of the dispatch dims.
396 //
397 // tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z
398 //
399 // Each CPU worker thread will atomically decrement this counter and passes the current
400 // count into the shader. When the count reaches 0 then all thread groups in the
401 // dispatch call have been completed.
402
403 uint32_t tileCounter; // The tile counter value for this thread group.
404
405 // Dispatch dimensions used by shader to compute system values from the tile counter.
406 uint32_t dispatchDims[3];
407
408 uint8_t* pTGSM; // Thread Group Shared Memory pointer.
409 uint8_t* pSpillFillBuffer; // Spill/fill buffer for barrier support
410 uint8_t* pScratchSpace; // Pointer to scratch space buffer used by the shader, shader is responsible
411 // for subdividing scratch space per instance/simd
412 uint32_t scratchSpacePerSimd; // Scratch space per work item x SIMD_WIDTH
413
414 SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
415 };
416
417 // enums
418 enum SWR_TILE_MODE
419 {
420 SWR_TILE_NONE = 0x0, // Linear mode (no tiling)
421 SWR_TILE_MODE_WMAJOR, // W major tiling
422 SWR_TILE_MODE_XMAJOR, // X major tiling
423 SWR_TILE_MODE_YMAJOR, // Y major tiling
424 SWR_TILE_SWRZ, // SWR-Z tiling
425
426 SWR_TILE_MODE_COUNT
427 };
428
429 enum SWR_SURFACE_TYPE
430 {
431 SURFACE_1D = 0,
432 SURFACE_2D = 1,
433 SURFACE_3D = 2,
434 SURFACE_CUBE = 3,
435 SURFACE_BUFFER = 4,
436 SURFACE_STRUCTURED_BUFFER = 5,
437 SURFACE_NULL = 7
438 };
439
440 enum SWR_ZFUNCTION
441 {
442 ZFUNC_ALWAYS,
443 ZFUNC_NEVER,
444 ZFUNC_LT,
445 ZFUNC_EQ,
446 ZFUNC_LE,
447 ZFUNC_GT,
448 ZFUNC_NE,
449 ZFUNC_GE,
450 NUM_ZFUNC
451 };
452
453 enum SWR_STENCILOP
454 {
455 STENCILOP_KEEP,
456 STENCILOP_ZERO,
457 STENCILOP_REPLACE,
458 STENCILOP_INCRSAT,
459 STENCILOP_DECRSAT,
460 STENCILOP_INCR,
461 STENCILOP_DECR,
462 STENCILOP_INVERT
463 };
464
465 enum SWR_BLEND_FACTOR
466 {
467 BLENDFACTOR_ONE,
468 BLENDFACTOR_SRC_COLOR,
469 BLENDFACTOR_SRC_ALPHA,
470 BLENDFACTOR_DST_ALPHA,
471 BLENDFACTOR_DST_COLOR,
472 BLENDFACTOR_SRC_ALPHA_SATURATE,
473 BLENDFACTOR_CONST_COLOR,
474 BLENDFACTOR_CONST_ALPHA,
475 BLENDFACTOR_SRC1_COLOR,
476 BLENDFACTOR_SRC1_ALPHA,
477 BLENDFACTOR_ZERO,
478 BLENDFACTOR_INV_SRC_COLOR,
479 BLENDFACTOR_INV_SRC_ALPHA,
480 BLENDFACTOR_INV_DST_ALPHA,
481 BLENDFACTOR_INV_DST_COLOR,
482 BLENDFACTOR_INV_CONST_COLOR,
483 BLENDFACTOR_INV_CONST_ALPHA,
484 BLENDFACTOR_INV_SRC1_COLOR,
485 BLENDFACTOR_INV_SRC1_ALPHA
486 };
487
488 enum SWR_BLEND_OP
489 {
490 BLENDOP_ADD,
491 BLENDOP_SUBTRACT,
492 BLENDOP_REVSUBTRACT,
493 BLENDOP_MIN,
494 BLENDOP_MAX,
495 };
496
497 enum SWR_LOGIC_OP
498 {
499 LOGICOP_CLEAR,
500 LOGICOP_NOR,
501 LOGICOP_AND_INVERTED,
502 LOGICOP_COPY_INVERTED,
503 LOGICOP_AND_REVERSE,
504 LOGICOP_INVERT,
505 LOGICOP_XOR,
506 LOGICOP_NAND,
507 LOGICOP_AND,
508 LOGICOP_EQUIV,
509 LOGICOP_NOOP,
510 LOGICOP_OR_INVERTED,
511 LOGICOP_COPY,
512 LOGICOP_OR_REVERSE,
513 LOGICOP_OR,
514 LOGICOP_SET,
515 };
516
517 //////////////////////////////////////////////////////////////////////////
518 /// SWR_AUX_MODE
519 /// @brief Specifies how the auxiliary buffer is used by the driver.
520 //////////////////////////////////////////////////////////////////////////
521 enum SWR_AUX_MODE
522 {
523 AUX_MODE_NONE,
524 AUX_MODE_COLOR,
525 AUX_MODE_UAV,
526 AUX_MODE_DEPTH,
527 };
528
529 struct SWR_LOD_OFFSETS
530 {
531 uint32_t offsets[2][15];
532 };
533
534 //////////////////////////////////////////////////////////////////////////
535 /// SWR_SURFACE_STATE
536 //////////////////////////////////////////////////////////////////////////
537 struct SWR_SURFACE_STATE
538 {
539 gfxptr_t xpBaseAddress;
540 SWR_SURFACE_TYPE type; // @llvm_enum
541 SWR_FORMAT format; // @llvm_enum
542 uint32_t width;
543 uint32_t height;
544 uint32_t depth;
545 uint32_t numSamples;
546 uint32_t samplePattern;
547 uint32_t pitch;
548 uint32_t qpitch;
549 uint32_t minLod; // for sampled surfaces, the most detailed LOD that can be accessed by sampler
550 uint32_t maxLod; // for sampled surfaces, the max LOD that can be accessed
551 float resourceMinLod; // for sampled surfaces, the most detailed fractional mip that can be accessed by sampler
552 uint32_t lod; // for render targets, the lod being rendered to
553 uint32_t arrayIndex; // for render targets, the array index being rendered to for arrayed surfaces
554 SWR_TILE_MODE tileMode; // @llvm_enum
555 uint32_t halign;
556 uint32_t valign;
557 uint32_t xOffset;
558 uint32_t yOffset;
559
560 uint32_t lodOffsets[2][15]; // lod offsets for sampled surfaces
561
562 gfxptr_t xpAuxBaseAddress; // Used for compression, append/consume counter, etc.
563 SWR_AUX_MODE auxMode; // @llvm_enum
564
565
566 bool bInterleavedSamples; // are MSAA samples stored interleaved or planar
567 };
568
569 // vertex fetch state
570 // WARNING- any changes to this struct need to be reflected
571 // in the fetch shader jit
572 struct SWR_VERTEX_BUFFER_STATE
573 {
574 gfxptr_t xpData;
575 uint32_t index;
576 uint32_t pitch;
577 uint32_t size;
578 uint32_t minVertex; // min vertex (for bounds checking)
579 uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks
580 uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for partially OOB vertices
581 };
582
583 struct SWR_INDEX_BUFFER_STATE
584 {
585 gfxptr_t xpIndices;
586 // Format type for indices (e.g. UINT16, UINT32, etc.)
587 SWR_FORMAT format; // @llvm_enum
588 uint32_t size;
589 };
590
591
592 //////////////////////////////////////////////////////////////////////////
593 /// SWR_FETCH_CONTEXT
594 /// @brief Input to fetch shader.
595 /// @note WARNING - Changes to this struct need to be reflected in the
596 /// fetch shader jit.
597 /////////////////////////////////////////////////////////////////////////
598 struct SWR_FETCH_CONTEXT
599 {
600 const SWR_VERTEX_BUFFER_STATE* pStreams; // IN: array of bound vertex buffers
601 gfxptr_t xpIndices; // IN: pointer to int32 index buffer for indexed draws
602 gfxptr_t xpLastIndex; // IN: pointer to end of index buffer, used for bounds checking
603 uint32_t CurInstance; // IN: current instance
604 uint32_t BaseVertex; // IN: base vertex
605 uint32_t StartVertex; // IN: start vertex
606 uint32_t StartInstance; // IN: start instance
607 simdscalari VertexID; // OUT: vector of vertex IDs
608 simdscalari CutMask; // OUT: vector mask of indices which have the cut index value
609 #if USE_SIMD16_SHADERS
610 // simd16scalari VertexID; // OUT: vector of vertex IDs
611 // simd16scalari CutMask; // OUT: vector mask of indices which have the cut index value
612 simdscalari VertexID2; // OUT: vector of vertex IDs
613 simdscalari CutMask2; // OUT: vector mask of indices which have the cut index value
614 #endif
615 };
616
617 //////////////////////////////////////////////////////////////////////////
618 /// SWR_STATS
619 ///
620 /// @brief All statistics generated by SWR go here. These are public
621 /// to driver.
622 /////////////////////////////////////////////////////////////////////////
623 OSALIGNLINE(struct) SWR_STATS
624 {
625 // Occlusion Query
626 uint64_t DepthPassCount; // Number of passing depth tests. Not exact.
627
628 // Pipeline Stats
629 uint64_t PsInvocations; // Number of Pixel Shader invocations
630 uint64_t CsInvocations; // Number of Compute Shader invocations
631
632 };
633
634 //////////////////////////////////////////////////////////////////////////
635 /// SWR_STATS
636 ///
637 /// @brief All statistics generated by FE.
638 /////////////////////////////////////////////////////////////////////////
639 OSALIGNLINE(struct) SWR_STATS_FE
640 {
641 uint64_t IaVertices; // Number of Fetch Shader vertices
642 uint64_t IaPrimitives; // Number of PA primitives.
643 uint64_t VsInvocations; // Number of Vertex Shader invocations
644 uint64_t HsInvocations; // Number of Hull Shader invocations
645 uint64_t DsInvocations; // Number of Domain Shader invocations
646 uint64_t GsInvocations; // Number of Geometry Shader invocations
647 uint64_t GsPrimitives; // Number of prims GS outputs.
648 uint64_t CInvocations; // Number of clipper invocations
649 uint64_t CPrimitives; // Number of clipper primitives.
650
651 // Streamout Stats
652 uint64_t SoPrimStorageNeeded[4];
653 uint64_t SoNumPrimsWritten[4];
654 };
655
656 //////////////////////////////////////////////////////////////////////////
657 /// STREAMOUT_BUFFERS
658 /////////////////////////////////////////////////////////////////////////
659
660 #define MAX_SO_STREAMS 4
661 #define MAX_SO_BUFFERS 4
662 #define MAX_ATTRIBUTES 32
663
664 struct SWR_STREAMOUT_BUFFER
665 {
666 // Pointers to streamout buffers.
667 uint32_t* pBuffer;
668
669 // Offset to the SO write offset. If not null then we update offset here.
670 uint32_t* pWriteOffset;
671
672 bool enable;
673 bool soWriteEnable;
674
675 // Size of buffer in dwords.
676 uint32_t bufferSize;
677
678 // Vertex pitch of buffer in dwords.
679 uint32_t pitch;
680
681 // Offset into buffer in dwords. SOS will increment this offset.
682 uint32_t streamOffset;
683 };
684
685 //////////////////////////////////////////////////////////////////////////
686 /// STREAMOUT_STATE
687 /////////////////////////////////////////////////////////////////////////
688 struct SWR_STREAMOUT_STATE
689 {
690 // This disables stream output.
691 bool soEnable;
692
693 // which streams are enabled for streamout
694 bool streamEnable[MAX_SO_STREAMS];
695
696 // If set then do not send any streams to the rasterizer.
697 bool rasterizerDisable;
698
699 // Specifies which stream to send to the rasterizer.
700 uint32_t streamToRasterizer;
701
702 // The stream masks specify which attributes are sent to which streams.
703 // These masks help the FE to setup the pPrimData buffer that is passed
704 // the Stream Output Shader (SOS) function.
705 uint32_t streamMasks[MAX_SO_STREAMS];
706
707 // Number of attributes, including position, per vertex that are streamed out.
708 // This should match number of bits in stream mask.
709 uint32_t streamNumEntries[MAX_SO_STREAMS];
710
711 // Offset to the start of the attributes of the input vertices, in simdvector units
712 uint32_t vertexAttribOffset[MAX_SO_STREAMS];
713 };
714
715 //////////////////////////////////////////////////////////////////////////
716 /// STREAMOUT_CONTEXT - Passed to SOS
717 /////////////////////////////////////////////////////////////////////////
718 struct SWR_STREAMOUT_CONTEXT
719 {
720 uint32_t* pPrimData;
721 SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS];
722
723 // Num prims written for this stream
724 uint32_t numPrimsWritten;
725
726 // Num prims that should have been written if there were no overflow.
727 uint32_t numPrimStorageNeeded;
728 };
729
730 //////////////////////////////////////////////////////////////////////////
731 /// SWR_GS_STATE - Geometry shader state
732 /////////////////////////////////////////////////////////////////////////
733 struct SWR_GS_STATE
734 {
735 bool gsEnable;
736
737 // If true, geometry shader emits a single stream, with separate cut buffer.
738 // If false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
739 // to map vertices to streams
740 bool isSingleStream;
741
742 // Number of input attributes per vertex. Used by the frontend to
743 // optimize assembling primitives for GS
744 uint32_t numInputAttribs;
745
746 // Stride of incoming verts in attributes
747 uint32_t inputVertStride;
748
749 // Output topology - can be point, tristrip, or linestrip
750 PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum
751
752 // Maximum number of verts that can be emitted by a single instance of the GS
753 uint32_t maxNumVerts;
754
755 // Instance count
756 uint32_t instanceCount;
757
758 // When single stream is enabled, singleStreamID dictates which stream is being output.
759 // field ignored if isSingleStream is false
760 uint32_t singleStreamID;
761
762 // Total amount of memory to allocate for one instance of the shader output in bytes
763 uint32_t allocationSize;
764
765 // Offset to the start of the attributes of the input vertices, in simdvector units, as read by the GS
766 uint32_t vertexAttribOffset;
767
768 // Offset to the attributes as stored by the preceding shader stage.
769 uint32_t srcVertexAttribOffset;
770
771 // Size of the control data section which contains cut or streamID data, in simdscalar units. Should be sized to handle
772 // the maximum number of verts output by the GS. Can be 0 if there are no cuts or streamID bits.
773 uint32_t controlDataSize;
774
775 // Offset to the control data section, in bytes
776 uint32_t controlDataOffset;
777
778 // Total size of an output vertex, in simdvector units
779 uint32_t outputVertexSize;
780
781 // Offset to the start of the vertex section, in bytes
782 uint32_t outputVertexOffset;
783
784 // Set this to non-zero to indicate that the shader outputs a static number of verts. If zero, shader is
785 // expected to store the final vertex count in the first dword of the gs output stream.
786 uint32_t staticVertexCount;
787
788 uint32_t pad;
789 };
790 static_assert(sizeof(SWR_GS_STATE) == 64,
791 "Adjust padding to keep size (or remove this assert)");
792
793
794 //////////////////////////////////////////////////////////////////////////
795 /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
796 /////////////////////////////////////////////////////////////////////////
797 enum SWR_TS_OUTPUT_TOPOLOGY
798 {
799 SWR_TS_OUTPUT_POINT,
800 SWR_TS_OUTPUT_LINE,
801 SWR_TS_OUTPUT_TRI_CW,
802 SWR_TS_OUTPUT_TRI_CCW,
803
804 SWR_TS_OUTPUT_TOPOLOGY_COUNT
805 };
806
807 //////////////////////////////////////////////////////////////////////////
808 /// SWR_TS_PARTITIONING - Defines tessellation algorithm
809 /////////////////////////////////////////////////////////////////////////
810 enum SWR_TS_PARTITIONING
811 {
812 SWR_TS_INTEGER,
813 SWR_TS_ODD_FRACTIONAL,
814 SWR_TS_EVEN_FRACTIONAL,
815
816 SWR_TS_PARTITIONING_COUNT
817 };
818
819 //////////////////////////////////////////////////////////////////////////
820 /// SWR_TS_DOMAIN - Defines Tessellation Domain
821 /////////////////////////////////////////////////////////////////////////
822 enum SWR_TS_DOMAIN
823 {
824 SWR_TS_QUAD,
825 SWR_TS_TRI,
826 SWR_TS_ISOLINE,
827
828 SWR_TS_DOMAIN_COUNT
829 };
830
831 //////////////////////////////////////////////////////////////////////////
832 /// SWR_TS_STATE - Tessellation state
833 /////////////////////////////////////////////////////////////////////////
834 struct SWR_TS_STATE
835 {
836 bool tsEnable;
837
838 SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum
839 SWR_TS_PARTITIONING partitioning; // @llvm_enum
840 SWR_TS_DOMAIN domain; // @llvm_enum
841
842 PRIMITIVE_TOPOLOGY postDSTopology; // @llvm_enum
843
844 uint32_t numHsInputAttribs;
845 uint32_t numHsOutputAttribs;
846 uint32_t numDsOutputAttribs;
847 uint32_t dsAllocationSize;
848 uint32_t dsOutVtxAttribOffset;
849
850 // Offset to the start of the attributes of the input vertices, in simdvector units
851 uint32_t vertexAttribOffset;
852 };
853
854 // output merger state
855 struct SWR_RENDER_TARGET_BLEND_STATE
856 {
857 uint8_t writeDisableRed : 1;
858 uint8_t writeDisableGreen : 1;
859 uint8_t writeDisableBlue : 1;
860 uint8_t writeDisableAlpha : 1;
861 };
862 static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1, "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
863
864 enum SWR_MULTISAMPLE_COUNT
865 {
866 SWR_MULTISAMPLE_1X = 0,
867 SWR_MULTISAMPLE_2X,
868 SWR_MULTISAMPLE_4X,
869 SWR_MULTISAMPLE_8X,
870 SWR_MULTISAMPLE_16X,
871 SWR_MULTISAMPLE_TYPE_COUNT
872 };
873
874 static INLINE uint32_t GetNumSamples(/* SWR_SAMPLE_COUNT */ int sampleCountEnum) // @llvm_func_start
875 {
876 return uint32_t(1) << sampleCountEnum;
877 } // @llvm_func_end
878
879 struct SWR_BLEND_STATE
880 {
881 // constant blend factor color in RGBA float
882 float constantColor[4];
883
884 // alpha test reference value in unorm8 or float32
885 uint32_t alphaTestReference;
886 uint32_t sampleMask;
887 // all RT's have the same sample count
888 ///@todo move this to Output Merger state when we refactor
889 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
890
891 SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
892 };
893 static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
894
895 struct SWR_BLEND_CONTEXT
896 {
897 const SWR_BLEND_STATE* pBlendState;
898 simdvector* src;
899 simdvector* src1;
900 simdvector* src0alpha;
901 uint32_t sampleNum;
902 simdvector* pDst;
903 simdvector* result;
904 simdscalari* oMask;
905 simdscalari* pMask;
906 uint32_t isAlphaTested;
907 uint32_t isAlphaBlended;
908 };
909
910 //////////////////////////////////////////////////////////////////////////
911 /// FUNCTION POINTERS FOR SHADERS
912
913 #if USE_SIMD16_SHADERS
914 typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
915 #else
916 typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
917 #endif
918 typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext);
919 typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext);
920 typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext);
921 typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, SWR_GS_CONTEXT* pGsContext);
922 typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsContext);
923 typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
924 typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
925 typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
926 typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(SWR_BLEND_CONTEXT*);
927 typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &);
928
929
930
931 //////////////////////////////////////////////////////////////////////////
932 /// FRONTEND_STATE
933 /////////////////////////////////////////////////////////////////////////
934 struct SWR_FRONTEND_STATE
935 {
936 // skip clip test, perspective divide, and viewport transform
937 // intended for verts in screen space
938 bool vpTransformDisable;
939 bool bEnableCutIndex;
940 union
941 {
942 struct
943 {
944 uint32_t triFan : 2;
945 uint32_t lineStripList : 1;
946 uint32_t triStripList : 2;
947 };
948 uint32_t bits;
949 } provokingVertex;
950 uint32_t topologyProvokingVertex; // provoking vertex for the draw topology
951
952 // Size of a vertex in simdvector units. Should be sized to the
953 // maximum of the input/output of the vertex shader.
954 uint32_t vsVertexSize;
955 };
956
957 //////////////////////////////////////////////////////////////////////////
958 /// VIEWPORT_MATRIX
959 /////////////////////////////////////////////////////////////////////////
960 struct SWR_VIEWPORT_MATRIX
961 {
962 float m00;
963 float m11;
964 float m22;
965 float m30;
966 float m31;
967 float m32;
968 };
969
970 //////////////////////////////////////////////////////////////////////////
971 /// VIEWPORT_MATRIXES
972 /////////////////////////////////////////////////////////////////////////
973 struct SWR_VIEWPORT_MATRICES
974 {
975 float m00[KNOB_NUM_VIEWPORTS_SCISSORS];
976 float m11[KNOB_NUM_VIEWPORTS_SCISSORS];
977 float m22[KNOB_NUM_VIEWPORTS_SCISSORS];
978 float m30[KNOB_NUM_VIEWPORTS_SCISSORS];
979 float m31[KNOB_NUM_VIEWPORTS_SCISSORS];
980 float m32[KNOB_NUM_VIEWPORTS_SCISSORS];
981 };
982
983 //////////////////////////////////////////////////////////////////////////
984 /// SWR_VIEWPORT
985 /////////////////////////////////////////////////////////////////////////
986 struct SWR_VIEWPORT
987 {
988 float x;
989 float y;
990 float width;
991 float height;
992 float minZ;
993 float maxZ;
994 };
995
996 //////////////////////////////////////////////////////////////////////////
997 /// SWR_CULLMODE
998 //////////////////////////////////////////////////////////////////////////
999 enum SWR_CULLMODE
1000 {
1001 SWR_CULLMODE_BOTH,
1002 SWR_CULLMODE_NONE,
1003 SWR_CULLMODE_FRONT,
1004 SWR_CULLMODE_BACK
1005 };
1006
1007 enum SWR_FILLMODE
1008 {
1009 SWR_FILLMODE_POINT,
1010 SWR_FILLMODE_WIREFRAME,
1011 SWR_FILLMODE_SOLID
1012 };
1013
1014 enum SWR_FRONTWINDING
1015 {
1016 SWR_FRONTWINDING_CW,
1017 SWR_FRONTWINDING_CCW
1018 };
1019
1020
1021 enum SWR_PIXEL_LOCATION
1022 {
1023 SWR_PIXEL_LOCATION_CENTER,
1024 SWR_PIXEL_LOCATION_UL,
1025 };
1026
1027 // fixed point screen space sample locations within a pixel
1028 struct SWR_MULTISAMPLE_POS
1029 {
1030 public:
1031 INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func
1032 INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func
1033 INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func
1034 INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func
1035 INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func
1036 INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func
1037 INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func
1038 INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func
1039 typedef const float(&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef
1040 INLINE sampleArrayT X() const { return _x; }; // @llvm_func
1041 INLINE sampleArrayT Y() const { return _y; }; // @llvm_func
1042 INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func
1043 INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func
1044 INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func
1045 INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
1046 INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
1047 INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
1048
1049 INLINE void PrecalcSampleData(int numSamples); //@llvm_func
1050
1051 private:
1052 template <typename MaskT>
1053 INLINE __m128i expandThenBlend4(uint32_t* min, uint32_t* max); // @llvm_func
1054 INLINE void CalcTileSampleOffsets(int numSamples); // @llvm_func
1055
1056 // scalar sample values
1057 uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES];
1058 uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES];
1059 float _x[SWR_MAX_NUM_MULTISAMPLES];
1060 float _y[SWR_MAX_NUM_MULTISAMPLES];
1061
1062 // precalc'd / vectorized samples
1063 __m128i _vXi[SWR_MAX_NUM_MULTISAMPLES];
1064 __m128i _vYi[SWR_MAX_NUM_MULTISAMPLES];
1065 simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES];
1066 simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES];
1067 __m128i tileSampleOffsetsX;
1068 __m128i tileSampleOffsetsY;
1069 };
1070
1071 //////////////////////////////////////////////////////////////////////////
1072 /// SWR_RASTSTATE
1073 //////////////////////////////////////////////////////////////////////////
1074 struct SWR_RASTSTATE
1075 {
1076 uint32_t cullMode : 2;
1077 uint32_t fillMode : 2;
1078 uint32_t frontWinding : 1;
1079 uint32_t scissorEnable : 1;
1080 uint32_t depthClipEnable : 1;
1081 uint32_t clipHalfZ : 1;
1082 uint32_t pointParam : 1;
1083 uint32_t pointSpriteEnable : 1;
1084 uint32_t pointSpriteTopOrigin : 1;
1085 uint32_t forcedSampleCount : 1;
1086 uint32_t pixelOffset : 1;
1087 uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
1088 uint32_t conservativeRast : 1;
1089
1090 float pointSize;
1091 float lineWidth;
1092
1093 float depthBias;
1094 float slopeScaledDepthBias;
1095 float depthBiasClamp;
1096 SWR_FORMAT depthFormat; // @llvm_enum
1097
1098 // sample count the rasterizer is running at
1099 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
1100 uint32_t pixelLocation; // UL or Center
1101 SWR_MULTISAMPLE_POS samplePositions; // @llvm_struct
1102 bool bIsCenterPattern; // @llvm_enum
1103 };
1104
1105
1106 enum SWR_CONSTANT_SOURCE
1107 {
1108 SWR_CONSTANT_SOURCE_CONST_0000,
1109 SWR_CONSTANT_SOURCE_CONST_0001_FLOAT,
1110 SWR_CONSTANT_SOURCE_CONST_1111_FLOAT,
1111 SWR_CONSTANT_SOURCE_PRIM_ID
1112 };
1113
1114 struct SWR_ATTRIB_SWIZZLE
1115 {
1116 uint16_t sourceAttrib : 5; // source attribute
1117 uint16_t constantSource : 2; // constant source to apply
1118 uint16_t componentOverrideMask : 4; // override component with constant source
1119 };
1120
1121 // backend state
1122 struct SWR_BACKEND_STATE
1123 {
1124 uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant interpolation
1125 uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be interpreted as tex coordinates
1126
1127 bool swizzleEnable; // when enabled, core will parse the swizzle map when
1128 // setting up attributes for the backend, otherwise
1129 // all attributes up to numAttributes will be sent
1130 uint8_t numAttributes; // total number of attributes to send to backend (up to 32)
1131 uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some calculations for unneeded components
1132
1133 bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the backend
1134 bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning
1135
1136 // User clip/cull distance enables
1137 uint8_t cullDistanceMask;
1138 uint8_t clipDistanceMask;
1139
1140 // padding to ensure swizzleMap starts 64B offset from start of the struct
1141 // and that the next fields are dword aligned.
1142 uint8_t pad[10];
1143
1144 // Offset to the start of the attributes of the input vertices, in simdvector units
1145 uint32_t vertexAttribOffset;
1146
1147 // Offset to clip/cull attrib section of the vertex, in simdvector units
1148 uint32_t vertexClipCullOffset;
1149
1150 SWR_ATTRIB_SWIZZLE swizzleMap[32];
1151 };
1152 static_assert(sizeof(SWR_BACKEND_STATE) == 128,
1153 "Adjust padding to keep size (or remove this assert)");
1154
1155
1156 union SWR_DEPTH_STENCIL_STATE
1157 {
1158 struct
1159 {
1160 // dword 0
1161 uint32_t depthWriteEnable : 1;
1162 uint32_t depthTestEnable : 1;
1163 uint32_t stencilWriteEnable : 1;
1164 uint32_t stencilTestEnable : 1;
1165 uint32_t doubleSidedStencilTestEnable : 1;
1166
1167 uint32_t depthTestFunc : 3;
1168 uint32_t stencilTestFunc : 3;
1169
1170 uint32_t backfaceStencilPassDepthPassOp : 3;
1171 uint32_t backfaceStencilPassDepthFailOp : 3;
1172 uint32_t backfaceStencilFailOp : 3;
1173 uint32_t backfaceStencilTestFunc : 3;
1174 uint32_t stencilPassDepthPassOp : 3;
1175 uint32_t stencilPassDepthFailOp : 3;
1176 uint32_t stencilFailOp : 3;
1177
1178 // dword 1
1179 uint8_t backfaceStencilWriteMask;
1180 uint8_t backfaceStencilTestMask;
1181 uint8_t stencilWriteMask;
1182 uint8_t stencilTestMask;
1183
1184 // dword 2
1185 uint8_t backfaceStencilRefValue;
1186 uint8_t stencilRefValue;
1187 };
1188 uint32_t value[3];
1189 };
1190
1191 enum SWR_SHADING_RATE
1192 {
1193 SWR_SHADING_RATE_PIXEL,
1194 SWR_SHADING_RATE_SAMPLE,
1195 SWR_SHADING_RATE_COUNT,
1196 };
1197
1198 enum SWR_INPUT_COVERAGE
1199 {
1200 SWR_INPUT_COVERAGE_NONE,
1201 SWR_INPUT_COVERAGE_NORMAL,
1202 SWR_INPUT_COVERAGE_INNER_CONSERVATIVE,
1203 SWR_INPUT_COVERAGE_COUNT,
1204 };
1205
1206 enum SWR_PS_POSITION_OFFSET
1207 {
1208 SWR_PS_POSITION_SAMPLE_NONE,
1209 SWR_PS_POSITION_SAMPLE_OFFSET,
1210 SWR_PS_POSITION_CENTROID_OFFSET,
1211 SWR_PS_POSITION_OFFSET_COUNT,
1212 };
1213
1214 enum SWR_BARYCENTRICS_MASK
1215 {
1216 SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1,
1217 SWR_BARYCENTRIC_CENTROID_MASK = 0x2,
1218 SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4,
1219 };
1220
1221 // pixel shader state
1222 struct SWR_PS_STATE
1223 {
1224 // dword 0-1
1225 PFN_PIXEL_KERNEL pfnPixelShader; // @llvm_pfn
1226
1227 // dword 2
1228 uint32_t killsPixel : 1; // pixel shader can kill pixels
1229 uint32_t inputCoverage : 2; // ps uses input coverage
1230 uint32_t writesODepth : 1; // pixel shader writes to depth
1231 uint32_t usesSourceDepth : 1; // pixel shader reads depth
1232 uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel
1233 uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position
1234 uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate attributes with
1235 uint32_t usesUAV : 1; // pixel shader accesses UAV
1236 uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test
1237
1238 uint8_t renderTargetMask; // Mask of render targets written
1239 };
1240
1241 // depth bounds state
1242 struct SWR_DEPTH_BOUNDS_STATE
1243 {
1244 bool depthBoundsTestEnable;
1245 float depthBoundsTestMinValue;
1246 float depthBoundsTestMaxValue;
1247 };
1248