swr/rast: cache line align hottile buffers
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / state.h
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file state.h
24 *
25 * @brief Definitions for API state.
26 *
27 ******************************************************************************/
28 #pragma once
29
30 #include "common/formats.h"
31 #include "common/intrin.h"
32 #include <functional>
33 #include <algorithm>
34
35 //////////////////////////////////////////////////////////////////////////
36 /// PRIMITIVE_TOPOLOGY.
37 //////////////////////////////////////////////////////////////////////////
38 enum PRIMITIVE_TOPOLOGY
39 {
40 TOP_UNKNOWN = 0x0,
41 TOP_POINT_LIST = 0x1,
42 TOP_LINE_LIST = 0x2,
43 TOP_LINE_STRIP = 0x3,
44 TOP_TRIANGLE_LIST = 0x4,
45 TOP_TRIANGLE_STRIP = 0x5,
46 TOP_TRIANGLE_FAN = 0x6,
47 TOP_QUAD_LIST = 0x7,
48 TOP_QUAD_STRIP = 0x8,
49 TOP_LINE_LIST_ADJ = 0x9,
50 TOP_LISTSTRIP_ADJ = 0xA,
51 TOP_TRI_LIST_ADJ = 0xB,
52 TOP_TRI_STRIP_ADJ = 0xC,
53 TOP_TRI_STRIP_REVERSE = 0xD,
54 TOP_POLYGON = 0xE,
55 TOP_RECT_LIST = 0xF,
56 TOP_LINE_LOOP = 0x10,
57 TOP_POINT_LIST_BF = 0x11,
58 TOP_LINE_STRIP_CONT = 0x12,
59 TOP_LINE_STRIP_BF = 0x13,
60 TOP_LINE_STRIP_CONT_BF = 0x14,
61 TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16,
62 TOP_TRIANGLE_DISC = 0x17, /// @todo What is this??
63
64 TOP_PATCHLIST_BASE = 0x1F, // Invalid topology, used to calculate num verts for a patchlist.
65 TOP_PATCHLIST_1 = 0x20, // List of 1-vertex patches
66 TOP_PATCHLIST_2 = 0x21,
67 TOP_PATCHLIST_3 = 0x22,
68 TOP_PATCHLIST_4 = 0x23,
69 TOP_PATCHLIST_5 = 0x24,
70 TOP_PATCHLIST_6 = 0x25,
71 TOP_PATCHLIST_7 = 0x26,
72 TOP_PATCHLIST_8 = 0x27,
73 TOP_PATCHLIST_9 = 0x28,
74 TOP_PATCHLIST_10 = 0x29,
75 TOP_PATCHLIST_11 = 0x2A,
76 TOP_PATCHLIST_12 = 0x2B,
77 TOP_PATCHLIST_13 = 0x2C,
78 TOP_PATCHLIST_14 = 0x2D,
79 TOP_PATCHLIST_15 = 0x2E,
80 TOP_PATCHLIST_16 = 0x2F,
81 TOP_PATCHLIST_17 = 0x30,
82 TOP_PATCHLIST_18 = 0x31,
83 TOP_PATCHLIST_19 = 0x32,
84 TOP_PATCHLIST_20 = 0x33,
85 TOP_PATCHLIST_21 = 0x34,
86 TOP_PATCHLIST_22 = 0x35,
87 TOP_PATCHLIST_23 = 0x36,
88 TOP_PATCHLIST_24 = 0x37,
89 TOP_PATCHLIST_25 = 0x38,
90 TOP_PATCHLIST_26 = 0x39,
91 TOP_PATCHLIST_27 = 0x3A,
92 TOP_PATCHLIST_28 = 0x3B,
93 TOP_PATCHLIST_29 = 0x3C,
94 TOP_PATCHLIST_30 = 0x3D,
95 TOP_PATCHLIST_31 = 0x3E,
96 TOP_PATCHLIST_32 = 0x3F, // List of 32-vertex patches
97 };
98
99 //////////////////////////////////////////////////////////////////////////
100 /// SWR_SHADER_TYPE
101 //////////////////////////////////////////////////////////////////////////
102 enum SWR_SHADER_TYPE
103 {
104 SHADER_VERTEX,
105 SHADER_GEOMETRY,
106 SHADER_DOMAIN,
107 SHADER_HULL,
108 SHADER_PIXEL,
109 SHADER_COMPUTE,
110
111 NUM_SHADER_TYPES,
112 };
113
114 //////////////////////////////////////////////////////////////////////////
115 /// SWR_RENDERTARGET_ATTACHMENT
116 /// @todo Its not clear what an "attachment" means. Its not common term.
117 //////////////////////////////////////////////////////////////////////////
118 enum SWR_RENDERTARGET_ATTACHMENT
119 {
120 SWR_ATTACHMENT_COLOR0,
121 SWR_ATTACHMENT_COLOR1,
122 SWR_ATTACHMENT_COLOR2,
123 SWR_ATTACHMENT_COLOR3,
124 SWR_ATTACHMENT_COLOR4,
125 SWR_ATTACHMENT_COLOR5,
126 SWR_ATTACHMENT_COLOR6,
127 SWR_ATTACHMENT_COLOR7,
128 SWR_ATTACHMENT_DEPTH,
129 SWR_ATTACHMENT_STENCIL,
130
131 SWR_NUM_ATTACHMENTS
132 };
133
134 #define SWR_NUM_RENDERTARGETS 8
135
136 #define SWR_ATTACHMENT_COLOR0_BIT 0x001
137 #define SWR_ATTACHMENT_COLOR1_BIT 0x002
138 #define SWR_ATTACHMENT_COLOR2_BIT 0x004
139 #define SWR_ATTACHMENT_COLOR3_BIT 0x008
140 #define SWR_ATTACHMENT_COLOR4_BIT 0x010
141 #define SWR_ATTACHMENT_COLOR5_BIT 0x020
142 #define SWR_ATTACHMENT_COLOR6_BIT 0x040
143 #define SWR_ATTACHMENT_COLOR7_BIT 0x080
144 #define SWR_ATTACHMENT_DEPTH_BIT 0x100
145 #define SWR_ATTACHMENT_STENCIL_BIT 0x200
146 #define SWR_ATTACHMENT_MASK_ALL 0x3ff
147 #define SWR_ATTACHMENT_MASK_COLOR 0x0ff
148
149
150 //////////////////////////////////////////////////////////////////////////
151 /// @brief SWR Inner Tessellation factor ID
152 /// See above GetTessFactorOutputPosition code for documentation
153 enum SWR_INNER_TESSFACTOR_ID
154 {
155 SWR_QUAD_U_TRI_INSIDE,
156 SWR_QUAD_V_INSIDE,
157
158 SWR_NUM_INNER_TESS_FACTORS,
159 };
160
161 //////////////////////////////////////////////////////////////////////////
162 /// @brief SWR Outer Tessellation factor ID
163 /// See above GetTessFactorOutputPosition code for documentation
164 enum SWR_OUTER_TESSFACTOR_ID
165 {
166 SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL,
167 SWR_QUAD_V_EQ0_TRI_V_LINE_DENSITY,
168 SWR_QUAD_U_EQ1_TRI_W,
169 SWR_QUAD_V_EQ1,
170
171 SWR_NUM_OUTER_TESS_FACTORS,
172 };
173
174
175 /////////////////////////////////////////////////////////////////////////
176 /// simdvertex
177 /// @brief Defines a vertex element that holds all the data for SIMD vertices.
178 /// Contains space for position, SGV, and 32 generic attributes
179 /////////////////////////////////////////////////////////////////////////
180 enum SWR_VTX_SLOTS
181 {
182 VERTEX_SGV_SLOT = 0,
183 VERTEX_SGV_RTAI_COMP = 0,
184 VERTEX_SGV_VAI_COMP = 1,
185 VERTEX_SGV_POINT_SIZE_COMP = 2,
186 VERTEX_POSITION_SLOT = 1,
187 VERTEX_POSITION_END_SLOT = 1,
188 VERTEX_CLIPCULL_DIST_LO_SLOT = (1 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist
189 VERTEX_CLIPCULL_DIST_HI_SLOT = (2 + VERTEX_POSITION_END_SLOT), // VS writes upper 4 clip/cull dist
190 VERTEX_ATTRIB_START_SLOT = (3 + VERTEX_POSITION_END_SLOT),
191 VERTEX_ATTRIB_END_SLOT = (34 + VERTEX_POSITION_END_SLOT),
192 SWR_VTX_NUM_SLOTS = (1 + VERTEX_ATTRIB_END_SLOT)
193 };
194
195 // SoAoSoA
196 struct simdvertex
197 {
198 simdvector attrib[SWR_VTX_NUM_SLOTS];
199 };
200
201 #if ENABLE_AVX512_SIMD16
202 struct simd16vertex
203 {
204 simd16vector attrib[SWR_VTX_NUM_SLOTS];
205 };
206
207 #endif
208 //////////////////////////////////////////////////////////////////////////
209 /// SWR_VS_CONTEXT
210 /// @brief Input to vertex shader
211 /////////////////////////////////////////////////////////////////////////
212 struct SWR_VS_CONTEXT
213 {
214 simdvertex* pVin; // IN: SIMD input vertex data store
215 simdvertex* pVout; // OUT: SIMD output vertex data store
216
217 uint32_t InstanceID; // IN: Instance ID, constant across all verts of the SIMD
218 simdscalari VertexID; // IN: Vertex ID
219 simdscalari mask; // IN: Active mask for shader
220 #if USE_SIMD16_FRONTEND
221 uint32_t AlternateOffset; // IN: amount to offset for interleaving even/odd simd8 in simd16vertex output
222 #endif
223 };
224
225 /////////////////////////////////////////////////////////////////////////
226 /// ScalarCPoint
227 /// @brief defines a control point element as passed from the output
228 /// of the hull shader to the input of the domain shader
229 /////////////////////////////////////////////////////////////////////////
230 struct ScalarAttrib
231 {
232 float x;
233 float y;
234 float z;
235 float w;
236 };
237
238 struct ScalarCPoint
239 {
240 ScalarAttrib attrib[SWR_VTX_NUM_SLOTS];
241 };
242
243 //////////////////////////////////////////////////////////////////////////
244 /// SWR_TESSELLATION_FACTORS
245 /// @brief Tessellation factors structure (non-vector)
246 /////////////////////////////////////////////////////////////////////////
247 struct SWR_TESSELLATION_FACTORS
248 {
249 float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
250 float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
251 };
252
253 #define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
254 struct ScalarPatch
255 {
256 SWR_TESSELLATION_FACTORS tessFactors;
257 ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM];
258 ScalarCPoint patchData;
259 };
260
261 //////////////////////////////////////////////////////////////////////////
262 /// SWR_HS_CONTEXT
263 /// @brief Input to hull shader
264 /////////////////////////////////////////////////////////////////////////
265 struct SWR_HS_CONTEXT
266 {
267 simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
268 simdscalari PrimitiveID; // IN: (SIMD) primitive ID generated from the draw call
269 simdscalari mask; // IN: Active mask for shader
270 ScalarPatch* pCPout; // OUT: Output control point patch
271 // SIMD-sized-array of SCALAR patches
272 };
273
274 //////////////////////////////////////////////////////////////////////////
275 /// SWR_DS_CONTEXT
276 /// @brief Input to domain shader
277 /////////////////////////////////////////////////////////////////////////
278 struct SWR_DS_CONTEXT
279 {
280 uint32_t PrimitiveID; // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation
281 uint32_t vectorOffset; // IN: (SCALAR) vector index offset into SIMD data.
282 uint32_t vectorStride; // IN: (SCALAR) stride (in vectors) of output data per attribute-component
283 ScalarPatch* pCpIn; // IN: (SCALAR) Control patch
284 simdscalar* pDomainU; // IN: (SIMD) Domain Point U coords
285 simdscalar* pDomainV; // IN: (SIMD) Domain Point V coords
286 simdscalari mask; // IN: Active mask for shader
287 simdscalar* pOutputData; // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component)
288 };
289
290 //////////////////////////////////////////////////////////////////////////
291 /// SWR_GS_CONTEXT
292 /// @brief Input to geometry shader.
293 /////////////////////////////////////////////////////////////////////////
294 struct SWR_GS_CONTEXT
295 {
296 simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: input primitive data for SIMD prims
297 simdscalari PrimitiveID; // IN: input primitive ID generated from the draw call
298 uint32_t InstanceID; // IN: input instance ID
299 simdscalari mask; // IN: Active mask for shader
300 uint8_t* pStream; // OUT: output stream (contains vertices for all output streams)
301 uint8_t* pCutOrStreamIdBuffer; // OUT: cut or stream id buffer
302 simdscalari vertexCount; // OUT: num vertices emitted per SIMD lane
303 };
304
305 struct PixelPositions
306 {
307 simdscalar UL;
308 simdscalar center;
309 simdscalar sample;
310 simdscalar centroid;
311 };
312
313 #define SWR_MAX_NUM_MULTISAMPLES 16
314
315 //////////////////////////////////////////////////////////////////////////
316 /// SWR_PS_CONTEXT
317 /// @brief Input to pixel shader.
318 /////////////////////////////////////////////////////////////////////////
319 struct SWR_PS_CONTEXT
320 {
321 PixelPositions vX; // IN: x location(s) of pixels
322 PixelPositions vY; // IN: x location(s) of pixels
323 simdscalar vZ; // INOUT: z location of pixels
324 simdscalari activeMask; // OUT: mask for kill
325 simdscalar inputMask; // IN: input coverage mask for all samples
326 simdscalari oMask; // OUT: mask for output coverage
327
328 PixelPositions vI; // barycentric coords evaluated at pixel center, sample position, centroid
329 PixelPositions vJ;
330 PixelPositions vOneOverW; // IN: 1/w
331
332 const float* pAttribs; // IN: pointer to attribute barycentric coefficients
333 const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients
334 const float* pRecipW; // IN: pointer to 1/w coord for each vertex
335 const float *I; // IN: Barycentric A, B, and C coefs used to compute I
336 const float *J; // IN: Barycentric A, B, and C coefs used to compute J
337 float recipDet; // IN: 1/Det, used when barycentric interpolating attributes
338 const float* pSamplePosX; // IN: array of sample positions
339 const float* pSamplePosY; // IN: array of sample positions
340 simdvector shaded[SWR_NUM_RENDERTARGETS];
341 // OUT: result color per rendertarget
342
343 uint32_t frontFace; // IN: front- 1, back- 0
344 uint32_t sampleIndex; // IN: sampleIndex
345 uint32_t renderTargetArrayIndex; // IN: render target array index from GS
346 uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
347
348 uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
349 };
350
351 //////////////////////////////////////////////////////////////////////////
352 /// SWR_CS_CONTEXT
353 /// @brief Input to compute shader.
354 /////////////////////////////////////////////////////////////////////////
355 struct SWR_CS_CONTEXT
356 {
357 // The ThreadGroupId is the current thread group index relative
358 // to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup,
359 // and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader.
360
361 // Compute shader accepts the following system values.
362 // o ThreadId - Current thread id relative to all other threads in dispatch.
363 // o ThreadGroupId - Current thread group id relative to all other groups in dispatch.
364 // o ThreadIdInGroup - Current thread relative to all threads in the current thread group.
365 // o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup.
366 //
367 // All of these system values can be computed in the shader. They will be
368 // derived from the current tile counter. The tile counter is an atomic counter that
369 // resides in the draw context and is initialized to the product of the dispatch dims.
370 //
371 // tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z
372 //
373 // Each CPU worker thread will atomically decrement this counter and passes the current
374 // count into the shader. When the count reaches 0 then all thread groups in the
375 // dispatch call have been completed.
376
377 uint32_t tileCounter; // The tile counter value for this thread group.
378
379 // Dispatch dimensions used by shader to compute system values from the tile counter.
380 uint32_t dispatchDims[3];
381
382 uint8_t* pTGSM; // Thread Group Shared Memory pointer.
383
384 uint8_t* pSpillFillBuffer; // Spill/fill buffer for barrier support
385
386 uint8_t* pScratchSpace; // Pointer to scratch space buffer used by the shader, shader is responsible
387 // for subdividing scratch space per instance/simd
388
389 uint32_t scratchSpacePerSimd; // Scratch space per work item x SIMD_WIDTH
390 };
391
392 // enums
393 enum SWR_TILE_MODE
394 {
395 SWR_TILE_NONE = 0x0, // Linear mode (no tiling)
396 SWR_TILE_MODE_WMAJOR, // W major tiling
397 SWR_TILE_MODE_XMAJOR, // X major tiling
398 SWR_TILE_MODE_YMAJOR, // Y major tiling
399 SWR_TILE_SWRZ, // SWR-Z tiling
400
401 SWR_TILE_MODE_COUNT
402 };
403
404 enum SWR_SURFACE_TYPE
405 {
406 SURFACE_1D = 0,
407 SURFACE_2D = 1,
408 SURFACE_3D = 2,
409 SURFACE_CUBE = 3,
410 SURFACE_BUFFER = 4,
411 SURFACE_STRUCTURED_BUFFER = 5,
412 SURFACE_NULL = 7
413 };
414
415 enum SWR_ZFUNCTION
416 {
417 ZFUNC_ALWAYS,
418 ZFUNC_NEVER,
419 ZFUNC_LT,
420 ZFUNC_EQ,
421 ZFUNC_LE,
422 ZFUNC_GT,
423 ZFUNC_NE,
424 ZFUNC_GE,
425 NUM_ZFUNC
426 };
427
428 enum SWR_STENCILOP
429 {
430 STENCILOP_KEEP,
431 STENCILOP_ZERO,
432 STENCILOP_REPLACE,
433 STENCILOP_INCRSAT,
434 STENCILOP_DECRSAT,
435 STENCILOP_INCR,
436 STENCILOP_DECR,
437 STENCILOP_INVERT
438 };
439
440 enum SWR_BLEND_FACTOR
441 {
442 BLENDFACTOR_ONE,
443 BLENDFACTOR_SRC_COLOR,
444 BLENDFACTOR_SRC_ALPHA,
445 BLENDFACTOR_DST_ALPHA,
446 BLENDFACTOR_DST_COLOR,
447 BLENDFACTOR_SRC_ALPHA_SATURATE,
448 BLENDFACTOR_CONST_COLOR,
449 BLENDFACTOR_CONST_ALPHA,
450 BLENDFACTOR_SRC1_COLOR,
451 BLENDFACTOR_SRC1_ALPHA,
452 BLENDFACTOR_ZERO,
453 BLENDFACTOR_INV_SRC_COLOR,
454 BLENDFACTOR_INV_SRC_ALPHA,
455 BLENDFACTOR_INV_DST_ALPHA,
456 BLENDFACTOR_INV_DST_COLOR,
457 BLENDFACTOR_INV_CONST_COLOR,
458 BLENDFACTOR_INV_CONST_ALPHA,
459 BLENDFACTOR_INV_SRC1_COLOR,
460 BLENDFACTOR_INV_SRC1_ALPHA
461 };
462
463 enum SWR_BLEND_OP
464 {
465 BLENDOP_ADD,
466 BLENDOP_SUBTRACT,
467 BLENDOP_REVSUBTRACT,
468 BLENDOP_MIN,
469 BLENDOP_MAX,
470 };
471
472 enum SWR_LOGIC_OP
473 {
474 LOGICOP_CLEAR,
475 LOGICOP_NOR,
476 LOGICOP_AND_INVERTED,
477 LOGICOP_COPY_INVERTED,
478 LOGICOP_AND_REVERSE,
479 LOGICOP_INVERT,
480 LOGICOP_XOR,
481 LOGICOP_NAND,
482 LOGICOP_AND,
483 LOGICOP_EQUIV,
484 LOGICOP_NOOP,
485 LOGICOP_OR_INVERTED,
486 LOGICOP_COPY,
487 LOGICOP_OR_REVERSE,
488 LOGICOP_OR,
489 LOGICOP_SET,
490 };
491
492 //////////////////////////////////////////////////////////////////////////
493 /// SWR_AUX_MODE
494 /// @brief Specifies how the auxiliary buffer is used by the driver.
495 //////////////////////////////////////////////////////////////////////////
496 enum SWR_AUX_MODE
497 {
498 AUX_MODE_NONE,
499 AUX_MODE_COLOR,
500 AUX_MODE_UAV,
501 AUX_MODE_DEPTH,
502 };
503
504 //////////////////////////////////////////////////////////////////////////
505 /// SWR_SURFACE_STATE
506 //////////////////////////////////////////////////////////////////////////
507 struct SWR_SURFACE_STATE
508 {
509 uint8_t *pBaseAddress;
510 SWR_SURFACE_TYPE type; // @llvm_enum
511 SWR_FORMAT format; // @llvm_enum
512 uint32_t width;
513 uint32_t height;
514 uint32_t depth;
515 uint32_t numSamples;
516 uint32_t samplePattern;
517 uint32_t pitch;
518 uint32_t qpitch;
519 uint32_t minLod; // for sampled surfaces, the most detailed LOD that can be accessed by sampler
520 uint32_t maxLod; // for sampled surfaces, the max LOD that can be accessed
521 float resourceMinLod; // for sampled surfaces, the most detailed fractional mip that can be accessed by sampler
522 uint32_t lod; // for render targets, the lod being rendered to
523 uint32_t arrayIndex; // for render targets, the array index being rendered to for arrayed surfaces
524 SWR_TILE_MODE tileMode; // @llvm_enum
525 uint32_t halign;
526 uint32_t valign;
527 uint32_t xOffset;
528 uint32_t yOffset;
529
530 uint32_t lodOffsets[2][15]; // lod offsets for sampled surfaces
531
532 uint8_t *pAuxBaseAddress; // Used for compression, append/consume counter, etc.
533 SWR_AUX_MODE auxMode; // @llvm_enum
534
535
536 bool bInterleavedSamples; // are MSAA samples stored interleaved or planar
537 };
538
539 // vertex fetch state
540 // WARNING- any changes to this struct need to be reflected
541 // in the fetch shader jit
542 struct SWR_VERTEX_BUFFER_STATE
543 {
544 uint32_t index;
545 uint32_t pitch;
546 const uint8_t *pData;
547 uint32_t size;
548 uint32_t numaNode;
549 uint32_t minVertex; // min vertex (for bounds checking)
550 uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks
551 uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for partially OOB vertices
552 };
553
554 struct SWR_INDEX_BUFFER_STATE
555 {
556 // Format type for indices (e.g. UINT16, UINT32, etc.)
557 SWR_FORMAT format; // @llvm_enum
558 const void *pIndices;
559 uint32_t size;
560 };
561
562
563 //////////////////////////////////////////////////////////////////////////
564 /// SWR_FETCH_CONTEXT
565 /// @brief Input to fetch shader.
566 /// @note WARNING - Changes to this struct need to be reflected in the
567 /// fetch shader jit.
568 /////////////////////////////////////////////////////////////////////////
569 struct SWR_FETCH_CONTEXT
570 {
571 const SWR_VERTEX_BUFFER_STATE* pStreams; // IN: array of bound vertex buffers
572 const int32_t* pIndices; // IN: pointer to index buffer for indexed draws
573 const int32_t* pLastIndex; // IN: pointer to end of index buffer, used for bounds checking
574 uint32_t CurInstance; // IN: current instance
575 uint32_t BaseVertex; // IN: base vertex
576 uint32_t StartVertex; // IN: start vertex
577 uint32_t StartInstance; // IN: start instance
578 simdscalari VertexID; // OUT: vector of vertex IDs
579 simdscalari CutMask; // OUT: vector mask of indices which have the cut index value
580 };
581
582 //////////////////////////////////////////////////////////////////////////
583 /// SWR_STATS
584 ///
585 /// @brief All statistics generated by SWR go here. These are public
586 /// to driver.
587 /////////////////////////////////////////////////////////////////////////
588 OSALIGNLINE(struct) SWR_STATS
589 {
590 // Occlusion Query
591 uint64_t DepthPassCount; // Number of passing depth tests. Not exact.
592
593 // Pipeline Stats
594 uint64_t PsInvocations; // Number of Pixel Shader invocations
595 uint64_t CsInvocations; // Number of Compute Shader invocations
596
597 };
598
599 //////////////////////////////////////////////////////////////////////////
600 /// SWR_STATS
601 ///
602 /// @brief All statistics generated by FE.
603 /////////////////////////////////////////////////////////////////////////
604 OSALIGNLINE(struct) SWR_STATS_FE
605 {
606 uint64_t IaVertices; // Number of Fetch Shader vertices
607 uint64_t IaPrimitives; // Number of PA primitives.
608 uint64_t VsInvocations; // Number of Vertex Shader invocations
609 uint64_t HsInvocations; // Number of Hull Shader invocations
610 uint64_t DsInvocations; // Number of Domain Shader invocations
611 uint64_t GsInvocations; // Number of Geometry Shader invocations
612 uint64_t GsPrimitives; // Number of prims GS outputs.
613 uint64_t CInvocations; // Number of clipper invocations
614 uint64_t CPrimitives; // Number of clipper primitives.
615
616 // Streamout Stats
617 uint64_t SoPrimStorageNeeded[4];
618 uint64_t SoNumPrimsWritten[4];
619 };
620
621 //////////////////////////////////////////////////////////////////////////
622 /// STREAMOUT_BUFFERS
623 /////////////////////////////////////////////////////////////////////////
624
625 #define MAX_SO_STREAMS 4
626 #define MAX_SO_BUFFERS 4
627 #define MAX_ATTRIBUTES 32
628
629 struct SWR_STREAMOUT_BUFFER
630 {
631 bool enable;
632 bool soWriteEnable;
633
634 // Pointers to streamout buffers.
635 uint32_t* pBuffer;
636
637 // Size of buffer in dwords.
638 uint32_t bufferSize;
639
640 // Vertex pitch of buffer in dwords.
641 uint32_t pitch;
642
643 // Offset into buffer in dwords. SOS will increment this offset.
644 uint32_t streamOffset;
645
646 // Offset to the SO write offset. If not null then we update offset here.
647 uint32_t* pWriteOffset;
648
649 };
650
651 //////////////////////////////////////////////////////////////////////////
652 /// STREAMOUT_STATE
653 /////////////////////////////////////////////////////////////////////////
654 struct SWR_STREAMOUT_STATE
655 {
656 // This disables stream output.
657 bool soEnable;
658
659 // which streams are enabled for streamout
660 bool streamEnable[MAX_SO_STREAMS];
661
662 // If set then do not send any streams to the rasterizer.
663 bool rasterizerDisable;
664
665 // Specifies which stream to send to the rasterizer.
666 uint32_t streamToRasterizer;
667
668 // The stream masks specify which attributes are sent to which streams.
669 // These masks help the FE to setup the pPrimData buffer that is passed
670 // the Stream Output Shader (SOS) function.
671 uint32_t streamMasks[MAX_SO_STREAMS];
672
673 // Number of attributes, including position, per vertex that are streamed out.
674 // This should match number of bits in stream mask.
675 uint32_t streamNumEntries[MAX_SO_STREAMS];
676
677 // Offset to the start of the attributes of the input vertices, in simdvector units
678 uint32_t vertexAttribOffset[MAX_SO_STREAMS];
679 };
680
681 //////////////////////////////////////////////////////////////////////////
682 /// STREAMOUT_CONTEXT - Passed to SOS
683 /////////////////////////////////////////////////////////////////////////
684 struct SWR_STREAMOUT_CONTEXT
685 {
686 uint32_t* pPrimData;
687 SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS];
688
689 // Num prims written for this stream
690 uint32_t numPrimsWritten;
691
692 // Num prims that should have been written if there were no overflow.
693 uint32_t numPrimStorageNeeded;
694 };
695
696 //////////////////////////////////////////////////////////////////////////
697 /// SWR_GS_STATE - Geometry shader state
698 /////////////////////////////////////////////////////////////////////////
699 struct SWR_GS_STATE
700 {
701 bool gsEnable;
702
703 // number of input attributes per vertex. used by the frontend to
704 // optimize assembling primitives for GS
705 uint32_t numInputAttribs;
706
707 // output topology - can be point, tristrip, or linestrip
708 PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum
709
710 // maximum number of verts that can be emitted by a single instance of the GS
711 uint32_t maxNumVerts;
712
713 // instance count
714 uint32_t instanceCount;
715
716 // if true, geometry shader emits a single stream, with separate cut buffer.
717 // if false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
718 // to map vertices to streams
719 bool isSingleStream;
720
721 // when single stream is enabled, singleStreamID dictates which stream is being output.
722 // field ignored if isSingleStream is false
723 uint32_t singleStreamID;
724
725 // Offset to the start of the attributes of the input vertices, in simdvector units
726 uint32_t vertexAttribOffset;
727 };
728
729
730 //////////////////////////////////////////////////////////////////////////
731 /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
732 /////////////////////////////////////////////////////////////////////////
733 enum SWR_TS_OUTPUT_TOPOLOGY
734 {
735 SWR_TS_OUTPUT_POINT,
736 SWR_TS_OUTPUT_LINE,
737 SWR_TS_OUTPUT_TRI_CW,
738 SWR_TS_OUTPUT_TRI_CCW,
739
740 SWR_TS_OUTPUT_TOPOLOGY_COUNT
741 };
742
743 //////////////////////////////////////////////////////////////////////////
744 /// SWR_TS_PARTITIONING - Defines tessellation algorithm
745 /////////////////////////////////////////////////////////////////////////
746 enum SWR_TS_PARTITIONING
747 {
748 SWR_TS_INTEGER,
749 SWR_TS_ODD_FRACTIONAL,
750 SWR_TS_EVEN_FRACTIONAL,
751
752 SWR_TS_PARTITIONING_COUNT
753 };
754
755 //////////////////////////////////////////////////////////////////////////
756 /// SWR_TS_DOMAIN - Defines Tessellation Domain
757 /////////////////////////////////////////////////////////////////////////
758 enum SWR_TS_DOMAIN
759 {
760 SWR_TS_QUAD,
761 SWR_TS_TRI,
762 SWR_TS_ISOLINE,
763
764 SWR_TS_DOMAIN_COUNT
765 };
766
767 //////////////////////////////////////////////////////////////////////////
768 /// SWR_TS_STATE - Tessellation state
769 /////////////////////////////////////////////////////////////////////////
770 struct SWR_TS_STATE
771 {
772 bool tsEnable;
773 SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum
774 SWR_TS_PARTITIONING partitioning; // @llvm_enum
775 SWR_TS_DOMAIN domain; // @llvm_enum
776
777 PRIMITIVE_TOPOLOGY postDSTopology; // @llvm_enum
778
779 uint32_t numHsInputAttribs;
780 uint32_t numHsOutputAttribs;
781 uint32_t numDsOutputAttribs;
782
783 // Offset to the start of the attributes of the input vertices, in simdvector units
784 uint32_t vertexAttribOffset;
785 };
786
787 // output merger state
788 struct SWR_RENDER_TARGET_BLEND_STATE
789 {
790 uint8_t writeDisableRed : 1;
791 uint8_t writeDisableGreen : 1;
792 uint8_t writeDisableBlue : 1;
793 uint8_t writeDisableAlpha : 1;
794 };
795 static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1, "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
796
797 enum SWR_MULTISAMPLE_COUNT
798 {
799 SWR_MULTISAMPLE_1X = 0,
800 SWR_MULTISAMPLE_2X,
801 SWR_MULTISAMPLE_4X,
802 SWR_MULTISAMPLE_8X,
803 SWR_MULTISAMPLE_16X,
804 SWR_MULTISAMPLE_TYPE_COUNT
805 };
806
807 INLINE uint32_t GetNumSamples(SWR_MULTISAMPLE_COUNT sampleCount) // @llvm_func_start
808 {
809 static const uint32_t sampleCountLUT[SWR_MULTISAMPLE_TYPE_COUNT] {1, 2, 4, 8, 16};
810 assert(sampleCount < SWR_MULTISAMPLE_TYPE_COUNT);
811 return sampleCountLUT[sampleCount];
812 } // @llvm_func_end
813
814 struct SWR_BLEND_STATE
815 {
816 // constant blend factor color in RGBA float
817 float constantColor[4];
818
819 // alpha test reference value in unorm8 or float32
820 uint32_t alphaTestReference;
821 uint32_t sampleMask;
822 // all RT's have the same sample count
823 ///@todo move this to Output Merger state when we refactor
824 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
825
826 SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
827 };
828 static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
829
830 //////////////////////////////////////////////////////////////////////////
831 /// FUNCTION POINTERS FOR SHADERS
832
833 typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
834 typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext);
835 typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext);
836 typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext);
837 typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, SWR_GS_CONTEXT* pGsContext);
838 typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsContext);
839 typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
840 typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
841 typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
842 typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*,
843 simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t sample,
844 uint8_t* pDst, simdvector& vResult, simdscalari* vOMask, simdscalari* vCoverageMask);
845 typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar);
846
847
848
849 //////////////////////////////////////////////////////////////////////////
850 /// FRONTEND_STATE
851 /////////////////////////////////////////////////////////////////////////
852 struct SWR_FRONTEND_STATE
853 {
854 // skip clip test, perspective divide, and viewport transform
855 // intended for verts in screen space
856 bool vpTransformDisable;
857 bool bEnableCutIndex;
858 union
859 {
860 struct
861 {
862 uint32_t triFan : 2;
863 uint32_t lineStripList : 1;
864 uint32_t triStripList : 2;
865 };
866 uint32_t bits;
867 } provokingVertex;
868 uint32_t topologyProvokingVertex; // provoking vertex for the draw topology
869
870 // Size of a vertex in simdvector units. Should be sized to the
871 // maximum of the input/output of the vertex shader.
872 uint32_t vsVertexSize;
873 };
874
875 //////////////////////////////////////////////////////////////////////////
876 /// VIEWPORT_MATRIX
877 /////////////////////////////////////////////////////////////////////////
878 struct SWR_VIEWPORT_MATRIX
879 {
880 float m00;
881 float m11;
882 float m22;
883 float m30;
884 float m31;
885 float m32;
886 };
887
888 //////////////////////////////////////////////////////////////////////////
889 /// VIEWPORT_MATRIXES
890 /////////////////////////////////////////////////////////////////////////
891 struct SWR_VIEWPORT_MATRICES
892 {
893 float m00[KNOB_NUM_VIEWPORTS_SCISSORS];
894 float m11[KNOB_NUM_VIEWPORTS_SCISSORS];
895 float m22[KNOB_NUM_VIEWPORTS_SCISSORS];
896 float m30[KNOB_NUM_VIEWPORTS_SCISSORS];
897 float m31[KNOB_NUM_VIEWPORTS_SCISSORS];
898 float m32[KNOB_NUM_VIEWPORTS_SCISSORS];
899 };
900
901 //////////////////////////////////////////////////////////////////////////
902 /// SWR_VIEWPORT
903 /////////////////////////////////////////////////////////////////////////
904 struct SWR_VIEWPORT
905 {
906 float x;
907 float y;
908 float width;
909 float height;
910 float minZ;
911 float maxZ;
912 };
913
914 //////////////////////////////////////////////////////////////////////////
915 /// SWR_CULLMODE
916 //////////////////////////////////////////////////////////////////////////
917 enum SWR_CULLMODE
918 {
919 SWR_CULLMODE_BOTH,
920 SWR_CULLMODE_NONE,
921 SWR_CULLMODE_FRONT,
922 SWR_CULLMODE_BACK
923 };
924
925 enum SWR_FILLMODE
926 {
927 SWR_FILLMODE_POINT,
928 SWR_FILLMODE_WIREFRAME,
929 SWR_FILLMODE_SOLID
930 };
931
932 enum SWR_FRONTWINDING
933 {
934 SWR_FRONTWINDING_CW,
935 SWR_FRONTWINDING_CCW
936 };
937
938
939 enum SWR_PIXEL_LOCATION
940 {
941 SWR_PIXEL_LOCATION_CENTER,
942 SWR_PIXEL_LOCATION_UL,
943 };
944
945 // fixed point screen space sample locations within a pixel
946 struct SWR_MULTISAMPLE_POS
947 {
948 public:
949 INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func
950 INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func
951 INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func
952 INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func
953 INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func
954 INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func
955 INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func
956 INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func
957 typedef const float(&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef
958 INLINE sampleArrayT X() const { return _x; }; // @llvm_func
959 INLINE sampleArrayT Y() const { return _y; }; // @llvm_func
960 INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func
961 INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func
962 INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func
963 INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
964 INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
965 INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
966
967 INLINE void PrecalcSampleData(int numSamples); //@llvm_func
968
969 private:
970 template <typename MaskT>
971 INLINE __m128i expandThenBlend4(uint32_t* min, uint32_t* max); // @llvm_func
972 INLINE void CalcTileSampleOffsets(int numSamples); // @llvm_func
973
974 // scalar sample values
975 uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES];
976 uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES];
977 float _x[SWR_MAX_NUM_MULTISAMPLES];
978 float _y[SWR_MAX_NUM_MULTISAMPLES];
979
980 // precalc'd / vectorized samples
981 __m128i _vXi[SWR_MAX_NUM_MULTISAMPLES];
982 __m128i _vYi[SWR_MAX_NUM_MULTISAMPLES];
983 simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES];
984 simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES];
985 __m128i tileSampleOffsetsX;
986 __m128i tileSampleOffsetsY;
987 };
988
989 //////////////////////////////////////////////////////////////////////////
990 /// SWR_RASTSTATE
991 //////////////////////////////////////////////////////////////////////////
992 struct SWR_RASTSTATE
993 {
994 uint32_t cullMode : 2;
995 uint32_t fillMode : 2;
996 uint32_t frontWinding : 1;
997 uint32_t scissorEnable : 1;
998 uint32_t depthClipEnable : 1;
999 uint32_t clipHalfZ : 1;
1000 uint32_t pointParam : 1;
1001 uint32_t pointSpriteEnable : 1;
1002 uint32_t pointSpriteTopOrigin : 1;
1003 uint32_t forcedSampleCount : 1;
1004 uint32_t pixelOffset : 1;
1005 uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
1006 uint32_t conservativeRast : 1;
1007
1008 float pointSize;
1009 float lineWidth;
1010
1011 float depthBias;
1012 float slopeScaledDepthBias;
1013 float depthBiasClamp;
1014 SWR_FORMAT depthFormat; // @llvm_enum
1015
1016 // sample count the rasterizer is running at
1017 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
1018 uint32_t pixelLocation; // UL or Center
1019 SWR_MULTISAMPLE_POS samplePositions; // @llvm_struct
1020 bool bIsCenterPattern; // @llvm_enum
1021
1022 // user clip/cull distance enables
1023 uint8_t cullDistanceMask;
1024 uint8_t clipDistanceMask;
1025 };
1026
1027
1028 enum SWR_CONSTANT_SOURCE
1029 {
1030 SWR_CONSTANT_SOURCE_CONST_0000,
1031 SWR_CONSTANT_SOURCE_CONST_0001_FLOAT,
1032 SWR_CONSTANT_SOURCE_CONST_1111_FLOAT,
1033 SWR_CONSTANT_SOURCE_PRIM_ID
1034 };
1035
1036 struct SWR_ATTRIB_SWIZZLE
1037 {
1038 uint16_t sourceAttrib : 5; // source attribute
1039 uint16_t constantSource : 2; // constant source to apply
1040 uint16_t componentOverrideMask : 4; // override component with constant source
1041 };
1042
1043 // backend state
1044 struct SWR_BACKEND_STATE
1045 {
1046 uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant interpolation
1047 uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be interpreted as tex coordinates
1048
1049 uint8_t numAttributes; // total number of attributes to send to backend (up to 32)
1050 uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some calculations for unneeded components
1051
1052 bool swizzleEnable; // when enabled, core will parse the swizzle map when
1053 // setting up attributes for the backend, otherwise
1054 // all attributes up to numAttributes will be sent
1055 SWR_ATTRIB_SWIZZLE swizzleMap[32];
1056
1057 bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the backend
1058 bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning
1059
1060 // Offset to the start of the attributes of the input vertices, in simdvector units
1061 uint32_t vertexAttribOffset;
1062 };
1063
1064
1065 union SWR_DEPTH_STENCIL_STATE
1066 {
1067 struct
1068 {
1069 // dword 0
1070 uint32_t depthWriteEnable : 1;
1071 uint32_t depthTestEnable : 1;
1072 uint32_t stencilWriteEnable : 1;
1073 uint32_t stencilTestEnable : 1;
1074 uint32_t doubleSidedStencilTestEnable : 1;
1075
1076 uint32_t depthTestFunc : 3;
1077 uint32_t stencilTestFunc : 3;
1078
1079 uint32_t backfaceStencilPassDepthPassOp : 3;
1080 uint32_t backfaceStencilPassDepthFailOp : 3;
1081 uint32_t backfaceStencilFailOp : 3;
1082 uint32_t backfaceStencilTestFunc : 3;
1083 uint32_t stencilPassDepthPassOp : 3;
1084 uint32_t stencilPassDepthFailOp : 3;
1085 uint32_t stencilFailOp : 3;
1086
1087 // dword 1
1088 uint8_t backfaceStencilWriteMask;
1089 uint8_t backfaceStencilTestMask;
1090 uint8_t stencilWriteMask;
1091 uint8_t stencilTestMask;
1092
1093 // dword 2
1094 uint8_t backfaceStencilRefValue;
1095 uint8_t stencilRefValue;
1096 };
1097 uint32_t value[3];
1098 };
1099
1100 enum SWR_SHADING_RATE
1101 {
1102 SWR_SHADING_RATE_PIXEL,
1103 SWR_SHADING_RATE_SAMPLE,
1104 SWR_SHADING_RATE_COUNT,
1105 };
1106
1107 enum SWR_INPUT_COVERAGE
1108 {
1109 SWR_INPUT_COVERAGE_NONE,
1110 SWR_INPUT_COVERAGE_NORMAL,
1111 SWR_INPUT_COVERAGE_INNER_CONSERVATIVE,
1112 SWR_INPUT_COVERAGE_COUNT,
1113 };
1114
1115 enum SWR_PS_POSITION_OFFSET
1116 {
1117 SWR_PS_POSITION_SAMPLE_NONE,
1118 SWR_PS_POSITION_SAMPLE_OFFSET,
1119 SWR_PS_POSITION_CENTROID_OFFSET,
1120 SWR_PS_POSITION_OFFSET_COUNT,
1121 };
1122
1123 enum SWR_BARYCENTRICS_MASK
1124 {
1125 SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1,
1126 SWR_BARYCENTRIC_CENTROID_MASK = 0x2,
1127 SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4,
1128 };
1129
1130 // pixel shader state
1131 struct SWR_PS_STATE
1132 {
1133 // dword 0-1
1134 PFN_PIXEL_KERNEL pfnPixelShader; // @llvm_pfn
1135
1136 // dword 2
1137 uint32_t killsPixel : 1; // pixel shader can kill pixels
1138 uint32_t inputCoverage : 2; // ps uses input coverage
1139 uint32_t writesODepth : 1; // pixel shader writes to depth
1140 uint32_t usesSourceDepth : 1; // pixel shader reads depth
1141 uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel
1142 uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position
1143 uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate attributes with
1144 uint32_t usesUAV : 1; // pixel shader accesses UAV
1145 uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test
1146
1147 uint8_t renderTargetMask; // Mask of render targets written
1148 };
1149
1150 // depth bounds state
1151 struct SWR_DEPTH_BOUNDS_STATE
1152 {
1153 bool depthBoundsTestEnable;
1154 float depthBoundsTestMinValue;
1155 float depthBoundsTestMaxValue;
1156 };
1157