Merge remote-tracking branch 'public/master' into vulkan
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / state.h
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file state.h
24 *
25 * @brief Definitions for API state.
26 *
27 ******************************************************************************/
28 #pragma once
29
30 #include "common/formats.h"
31 #include "common/simdintrin.h"
32
33 // clear flags
34 #define SWR_CLEAR_NONE 0
35 #define SWR_CLEAR_COLOR (1 << 0)
36 #define SWR_CLEAR_DEPTH (1 << 1)
37 #define SWR_CLEAR_STENCIL (1 << 2)
38
39 enum DRIVER_TYPE
40 {
41 DX,
42 GL
43 };
44
45 //////////////////////////////////////////////////////////////////////////
46 /// PRIMITIVE_TOPOLOGY.
47 //////////////////////////////////////////////////////////////////////////
48 enum PRIMITIVE_TOPOLOGY
49 {
50 TOP_UNKNOWN = 0x0,
51 TOP_POINT_LIST = 0x1,
52 TOP_LINE_LIST = 0x2,
53 TOP_LINE_STRIP = 0x3,
54 TOP_TRIANGLE_LIST = 0x4,
55 TOP_TRIANGLE_STRIP = 0x5,
56 TOP_TRIANGLE_FAN = 0x6,
57 TOP_QUAD_LIST = 0x7,
58 TOP_QUAD_STRIP = 0x8,
59 TOP_LINE_LIST_ADJ = 0x9,
60 TOP_LISTSTRIP_ADJ = 0xA,
61 TOP_TRI_LIST_ADJ = 0xB,
62 TOP_TRI_STRIP_ADJ = 0xC,
63 TOP_TRI_STRIP_REVERSE = 0xD,
64 TOP_POLYGON = 0xE,
65 TOP_RECT_LIST = 0xF,
66 TOP_LINE_LOOP = 0x10,
67 TOP_POINT_LIST_BF = 0x11,
68 TOP_LINE_STRIP_CONT = 0x12,
69 TOP_LINE_STRIP_BF = 0x13,
70 TOP_LINE_STRIP_CONT_BF = 0x14,
71 TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16,
72 TOP_TRIANGLE_DISC = 0x17, /// @todo What is this??
73
74 TOP_PATCHLIST_BASE = 0x1F, // Invalid topology, used to calculate num verts for a patchlist.
75 TOP_PATCHLIST_1 = 0x20, // List of 1-vertex patches
76 TOP_PATCHLIST_2 = 0x21,
77 TOP_PATCHLIST_3 = 0x22,
78 TOP_PATCHLIST_4 = 0x23,
79 TOP_PATCHLIST_5 = 0x24,
80 TOP_PATCHLIST_6 = 0x25,
81 TOP_PATCHLIST_7 = 0x26,
82 TOP_PATCHLIST_8 = 0x27,
83 TOP_PATCHLIST_9 = 0x28,
84 TOP_PATCHLIST_10 = 0x29,
85 TOP_PATCHLIST_11 = 0x2A,
86 TOP_PATCHLIST_12 = 0x2B,
87 TOP_PATCHLIST_13 = 0x2C,
88 TOP_PATCHLIST_14 = 0x2D,
89 TOP_PATCHLIST_15 = 0x2E,
90 TOP_PATCHLIST_16 = 0x2F,
91 TOP_PATCHLIST_17 = 0x30,
92 TOP_PATCHLIST_18 = 0x31,
93 TOP_PATCHLIST_19 = 0x32,
94 TOP_PATCHLIST_20 = 0x33,
95 TOP_PATCHLIST_21 = 0x34,
96 TOP_PATCHLIST_22 = 0x35,
97 TOP_PATCHLIST_23 = 0x36,
98 TOP_PATCHLIST_24 = 0x37,
99 TOP_PATCHLIST_25 = 0x38,
100 TOP_PATCHLIST_26 = 0x39,
101 TOP_PATCHLIST_27 = 0x3A,
102 TOP_PATCHLIST_28 = 0x3B,
103 TOP_PATCHLIST_29 = 0x3C,
104 TOP_PATCHLIST_30 = 0x3D,
105 TOP_PATCHLIST_31 = 0x3E,
106 TOP_PATCHLIST_32 = 0x3F, // List of 32-vertex patches
107 };
108
109 //////////////////////////////////////////////////////////////////////////
110 /// SWR_SHADER_TYPE
111 //////////////////////////////////////////////////////////////////////////
112 enum SWR_SHADER_TYPE
113 {
114 SHADER_VERTEX,
115 SHADER_GEOMETRY,
116 SHADER_DOMAIN,
117 SHADER_HULL,
118 SHADER_PIXEL,
119 SHADER_COMPUTE,
120
121 NUM_SHADER_TYPES,
122 };
123
124 //////////////////////////////////////////////////////////////////////////
125 /// SWR_RENDERTARGET_ATTACHMENT
126 /// @todo Its not clear what an "attachment" means. Its not common term.
127 //////////////////////////////////////////////////////////////////////////
128 enum SWR_RENDERTARGET_ATTACHMENT
129 {
130 SWR_ATTACHMENT_COLOR0,
131 SWR_ATTACHMENT_COLOR1,
132 SWR_ATTACHMENT_COLOR2,
133 SWR_ATTACHMENT_COLOR3,
134 SWR_ATTACHMENT_COLOR4,
135 SWR_ATTACHMENT_COLOR5,
136 SWR_ATTACHMENT_COLOR6,
137 SWR_ATTACHMENT_COLOR7,
138 SWR_ATTACHMENT_DEPTH,
139 SWR_ATTACHMENT_STENCIL,
140
141 SWR_NUM_ATTACHMENTS
142 };
143
144 #define SWR_NUM_RENDERTARGETS 8
145
146 #define SWR_ATTACHMENT_COLOR0_BIT 0x001
147 #define SWR_ATTACHMENT_COLOR1_BIT 0x002
148 #define SWR_ATTACHMENT_COLOR2_BIT 0x004
149 #define SWR_ATTACHMENT_COLOR3_BIT 0x008
150 #define SWR_ATTACHMENT_COLOR4_BIT 0x010
151 #define SWR_ATTACHMENT_COLOR5_BIT 0x020
152 #define SWR_ATTACHMENT_COLOR6_BIT 0x040
153 #define SWR_ATTACHMENT_COLOR7_BIT 0x080
154 #define SWR_ATTACHMENT_DEPTH_BIT 0x100
155 #define SWR_ATTACHMENT_STENCIL_BIT 0x200
156 #define SWR_ATTACHMENT_MASK_ALL 0x3ff
157 #define SWR_ATTACHMENT_MASK_COLOR 0x0ff
158
159
160 //////////////////////////////////////////////////////////////////////////
161 /// @brief SWR Inner Tessellation factor ID
162 /// See above GetTessFactorOutputPosition code for documentation
163 enum SWR_INNER_TESSFACTOR_ID
164 {
165 SWR_QUAD_U_TRI_INSIDE,
166 SWR_QUAD_V_INSIDE,
167
168 SWR_NUM_INNER_TESS_FACTORS,
169 };
170
171 //////////////////////////////////////////////////////////////////////////
172 /// @brief SWR Outer Tessellation factor ID
173 /// See above GetTessFactorOutputPosition code for documentation
174 enum SWR_OUTER_TESSFACTOR_ID
175 {
176 SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL,
177 SWR_QUAD_V_EQ0_TRI_V_LINE_DENSITY,
178 SWR_QUAD_U_EQ1_TRI_W,
179 SWR_QUAD_V_EQ1,
180
181 SWR_NUM_OUTER_TESS_FACTORS,
182 };
183
184
185 /////////////////////////////////////////////////////////////////////////
186 /// simdvertex
187 /// @brief Defines a vertex element that holds all the data for SIMD vertices.
188 /// Contains position in clip space, hardcoded to attribute 0,
189 /// space for up to 32 attributes, as well as any SGV values generated
190 /// by the pipeline
191 /////////////////////////////////////////////////////////////////////////
192 #define VERTEX_POSITION_SLOT 0
193 #define VERTEX_ATTRIB_START_SLOT 1
194 #define VERTEX_ATTRIB_END_SLOT 32
195 #define VERTEX_RTAI_SLOT 33 // GS writes RenderTargetArrayIndex here
196 #define VERTEX_PRIMID_SLOT 34 // GS writes PrimId here
197 #define VERTEX_CLIPCULL_DIST_LO_SLOT 35 // VS writes lower 4 clip/cull dist
198 #define VERTEX_CLIPCULL_DIST_HI_SLOT 36 // VS writes upper 4 clip/cull dist
199 #define VERTEX_POINT_SIZE_SLOT 37 // VS writes point size here
200 static_assert(VERTEX_POINT_SIZE_SLOT < KNOB_NUM_ATTRIBUTES, "Mismatched attribute slot size");
201
202 // SoAoSoA
203 struct simdvertex
204 {
205 simdvector attrib[KNOB_NUM_ATTRIBUTES];
206 };
207
208 //////////////////////////////////////////////////////////////////////////
209 /// SWR_VS_CONTEXT
210 /// @brief Input to vertex shader
211 /////////////////////////////////////////////////////////////////////////
212 struct SWR_VS_CONTEXT
213 {
214 simdvertex* pVin; // IN: SIMD input vertex data store
215 simdvertex* pVout; // OUT: SIMD output vertex data store
216
217 uint32_t InstanceID; // IN: Instance ID, constant across all verts of the SIMD
218 simdscalari VertexID; // IN: Vertex ID
219 simdscalari mask; // IN: Active mask for shader
220 };
221
222 /////////////////////////////////////////////////////////////////////////
223 /// ScalarCPoint
224 /// @brief defines a control point element as passed from the output
225 /// of the hull shader to the input of the domain shader
226 /////////////////////////////////////////////////////////////////////////
227 struct ScalarAttrib
228 {
229 float x;
230 float y;
231 float z;
232 float w;
233 };
234
235 struct ScalarCPoint
236 {
237 ScalarAttrib attrib[KNOB_NUM_ATTRIBUTES];
238 };
239
240 //////////////////////////////////////////////////////////////////////////
241 /// SWR_TESSELLATION_FACTORS
242 /// @brief Tessellation factors structure (non-vector)
243 /////////////////////////////////////////////////////////////////////////
244 struct SWR_TESSELLATION_FACTORS
245 {
246 float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
247 float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
248 };
249
250 #define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
251 struct ScalarPatch
252 {
253 SWR_TESSELLATION_FACTORS tessFactors;
254 ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM];
255 ScalarCPoint patchData;
256 };
257
258 //////////////////////////////////////////////////////////////////////////
259 /// SWR_HS_CONTEXT
260 /// @brief Input to hull shader
261 /////////////////////////////////////////////////////////////////////////
262 struct SWR_HS_CONTEXT
263 {
264 simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
265 simdscalari PrimitiveID; // IN: (SIMD) primitive ID generated from the draw call
266 simdscalari mask; // IN: Active mask for shader
267 ScalarPatch* pCPout; // OUT: Output control point patch
268 // SIMD-sized-array of SCALAR patches
269 };
270
271 //////////////////////////////////////////////////////////////////////////
272 /// SWR_DS_CONTEXT
273 /// @brief Input to domain shader
274 /////////////////////////////////////////////////////////////////////////
275 struct SWR_DS_CONTEXT
276 {
277 uint32_t PrimitiveID; // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation
278 uint32_t vectorOffset; // IN: (SCALAR) vector index offset into SIMD data.
279 uint32_t vectorStride; // IN: (SCALAR) stride (in vectors) of output data per attribute-component
280 ScalarPatch* pCpIn; // IN: (SCALAR) Control patch
281 simdscalar* pDomainU; // IN: (SIMD) Domain Point U coords
282 simdscalar* pDomainV; // IN: (SIMD) Domain Point V coords
283 simdscalari mask; // IN: Active mask for shader
284 simdscalar* pOutputData; // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component)
285 };
286
287 //////////////////////////////////////////////////////////////////////////
288 /// SWR_GS_CONTEXT
289 /// @brief Input to geometry shader.
290 /////////////////////////////////////////////////////////////////////////
291 struct SWR_GS_CONTEXT
292 {
293 simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: input primitive data for SIMD prims
294 simdscalari PrimitiveID; // IN: input primitive ID generated from the draw call
295 uint32_t InstanceID; // IN: input instance ID
296 simdscalari mask; // IN: Active mask for shader
297 uint8_t* pStream; // OUT: output stream (contains vertices for all output streams)
298 uint8_t* pCutOrStreamIdBuffer; // OUT: cut or stream id buffer
299 simdscalari vertexCount; // OUT: num vertices emitted per SIMD lane
300 };
301
302 struct PixelPositions
303 {
304 simdscalar UL;
305 simdscalar center;
306 simdscalar sample;
307 simdscalar centroid;
308 };
309
310 //////////////////////////////////////////////////////////////////////////
311 /// SWR_PS_CONTEXT
312 /// @brief Input to pixel shader.
313 /////////////////////////////////////////////////////////////////////////
314 struct SWR_PS_CONTEXT
315 {
316 PixelPositions vX; // IN: x location(s) of pixels
317 PixelPositions vY; // IN: x location(s) of pixels
318 simdscalar vZ; // INOUT: z location of pixels
319 simdscalari activeMask; // OUT: mask for kill
320 simdscalar inputMask; // IN: input coverage mask for all samples
321 simdscalari oMask; // OUT: mask for output coverage
322
323 PixelPositions vI; // barycentric coords evaluated at pixel center, sample position, centroid
324 PixelPositions vJ;
325 PixelPositions vOneOverW; // IN: 1/w
326
327 const float* pAttribs; // IN: pointer to attribute barycentric coefficients
328 const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients
329 const float* pRecipW; // IN: pointer to 1/w coord for each vertex
330 const float *I; // IN: Barycentric A, B, and C coefs used to compute I
331 const float *J; // IN: Barycentric A, B, and C coefs used to compute J
332 float recipDet; // IN: 1/Det, used when barycentric interpolating attributes
333 const float* pSamplePosX; // IN: array of sample positions
334 const float* pSamplePosY; // IN: array of sample positions
335 simdvector shaded[SWR_NUM_RENDERTARGETS];
336 // OUT: result color per rendertarget
337
338 uint32_t frontFace; // IN: front- 1, back- 0
339 uint32_t primID; // IN: primitive ID
340 uint32_t sampleIndex; // IN: sampleIndex
341 };
342
343 //////////////////////////////////////////////////////////////////////////
344 /// SWR_CS_CONTEXT
345 /// @brief Input to compute shader.
346 /////////////////////////////////////////////////////////////////////////
347 struct SWR_CS_CONTEXT
348 {
349 // The ThreadGroupId is the current thread group index relative
350 // to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup,
351 // and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader.
352
353 // Compute shader accepts the following system values.
354 // o ThreadId - Current thread id relative to all other threads in dispatch.
355 // o ThreadGroupId - Current thread group id relative to all other groups in dispatch.
356 // o ThreadIdInGroup - Current thread relative to all threads in the current thread group.
357 // o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup.
358 //
359 // All of these system values can be computed in the shader. They will be
360 // derived from the current tile counter. The tile counter is an atomic counter that
361 // resides in the draw context and is initialized to the product of the dispatch dims.
362 //
363 // tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z
364 //
365 // Each CPU worker thread will atomically decrement this counter and passes the current
366 // count into the shader. When the count reaches 0 then all thread groups in the
367 // dispatch call have been completed.
368
369 uint32_t tileCounter; // The tile counter value for this thread group.
370
371 // Dispatch dimensions used by shader to compute system values from the tile counter.
372 uint32_t dispatchDims[3];
373
374 uint8_t* pTGSM; // Thread Group Shared Memory pointer.
375
376 uint8_t* pSpillFillBuffer; // Spill/fill buffer for barrier support
377 };
378
379 // enums
380 enum SWR_TILE_MODE
381 {
382 SWR_TILE_NONE = 0x0, // Linear mode (no tiling)
383 SWR_TILE_MODE_WMAJOR, // W major tiling
384 SWR_TILE_MODE_XMAJOR, // X major tiling
385 SWR_TILE_MODE_YMAJOR, // Y major tiling
386 SWR_TILE_SWRZ, // SWR-Z tiling
387
388 SWR_TILE_MODE_COUNT
389 };
390
391 enum SWR_SURFACE_TYPE
392 {
393 SURFACE_1D = 0,
394 SURFACE_2D = 1,
395 SURFACE_3D = 2,
396 SURFACE_CUBE = 3,
397 SURFACE_BUFFER = 4,
398 SURFACE_STRUCTURED_BUFFER = 5,
399 SURFACE_NULL = 7
400 };
401
402 enum SWR_ZFUNCTION
403 {
404 ZFUNC_ALWAYS,
405 ZFUNC_NEVER,
406 ZFUNC_LT,
407 ZFUNC_EQ,
408 ZFUNC_LE,
409 ZFUNC_GT,
410 ZFUNC_NE,
411 ZFUNC_GE,
412 NUM_ZFUNC
413 };
414
415 enum SWR_STENCILOP
416 {
417 STENCILOP_KEEP,
418 STENCILOP_ZERO,
419 STENCILOP_REPLACE,
420 STENCILOP_INCRSAT,
421 STENCILOP_DECRSAT,
422 STENCILOP_INCR,
423 STENCILOP_DECR,
424 STENCILOP_INVERT
425 };
426
427 enum SWR_BLEND_FACTOR
428 {
429 BLENDFACTOR_ONE,
430 BLENDFACTOR_SRC_COLOR,
431 BLENDFACTOR_SRC_ALPHA,
432 BLENDFACTOR_DST_ALPHA,
433 BLENDFACTOR_DST_COLOR,
434 BLENDFACTOR_SRC_ALPHA_SATURATE,
435 BLENDFACTOR_CONST_COLOR,
436 BLENDFACTOR_CONST_ALPHA,
437 BLENDFACTOR_SRC1_COLOR,
438 BLENDFACTOR_SRC1_ALPHA,
439 BLENDFACTOR_ZERO,
440 BLENDFACTOR_INV_SRC_COLOR,
441 BLENDFACTOR_INV_SRC_ALPHA,
442 BLENDFACTOR_INV_DST_ALPHA,
443 BLENDFACTOR_INV_DST_COLOR,
444 BLENDFACTOR_INV_CONST_COLOR,
445 BLENDFACTOR_INV_CONST_ALPHA,
446 BLENDFACTOR_INV_SRC1_COLOR,
447 BLENDFACTOR_INV_SRC1_ALPHA
448 };
449
450 enum SWR_BLEND_OP
451 {
452 BLENDOP_ADD,
453 BLENDOP_SUBTRACT,
454 BLENDOP_REVSUBTRACT,
455 BLENDOP_MIN,
456 BLENDOP_MAX,
457 };
458
459 enum SWR_LOGIC_OP
460 {
461 LOGICOP_CLEAR,
462 LOGICOP_NOR,
463 LOGICOP_AND_INVERTED,
464 LOGICOP_COPY_INVERTED,
465 LOGICOP_AND_REVERSE,
466 LOGICOP_INVERT,
467 LOGICOP_XOR,
468 LOGICOP_NAND,
469 LOGICOP_AND,
470 LOGICOP_EQUIV,
471 LOGICOP_NOOP,
472 LOGICOP_OR_INVERTED,
473 LOGICOP_COPY,
474 LOGICOP_OR_REVERSE,
475 LOGICOP_OR,
476 LOGICOP_SET,
477 };
478
479 struct SWR_SURFACE_STATE
480 {
481 uint8_t *pBaseAddress;
482 SWR_SURFACE_TYPE type; // @llvm_enum
483 SWR_FORMAT format; // @llvm_enum
484 uint32_t width;
485 uint32_t height;
486 uint32_t depth;
487 uint32_t numSamples;
488 uint32_t samplePattern;
489 uint32_t pitch;
490 uint32_t qpitch;
491 uint32_t minLod; // for sampled surfaces, the most detailed LOD that can be accessed by sampler
492 uint32_t maxLod; // for sampled surfaces, the max LOD that can be accessed
493 float resourceMinLod; // for sampled surfaces, the most detailed fractional mip that can be accessed by sampler
494 uint32_t lod; // for render targets, the lod being rendered to
495 uint32_t arrayIndex; // for render targets, the array index being rendered to for arrayed surfaces
496 SWR_TILE_MODE tileMode; // @llvm_enum
497 bool bInterleavedSamples; // are MSAA samples stored interleaved or planar
498 uint32_t halign;
499 uint32_t valign;
500 uint32_t xOffset;
501 uint32_t yOffset;
502
503 uint32_t lodOffsets[2][15]; // lod offsets for sampled surfaces
504
505 uint8_t *pAuxBaseAddress; // Used for compression, append/consume counter, etc.
506 };
507
508 // vertex fetch state
509 // WARNING- any changes to this struct need to be reflected
510 // in the fetch shader jit
511 struct SWR_VERTEX_BUFFER_STATE
512 {
513 uint32_t index;
514 uint32_t pitch;
515 const uint8_t *pData;
516 uint32_t size;
517 uint32_t numaNode;
518 uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks
519 uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for partially OOB vertices
520 };
521
522 struct SWR_INDEX_BUFFER_STATE
523 {
524 // Format type for indices (e.g. UINT16, UINT32, etc.)
525 SWR_FORMAT format; // @llvm_enum
526 const void *pIndices;
527 uint32_t size;
528 };
529
530
531 //////////////////////////////////////////////////////////////////////////
532 /// SWR_FETCH_CONTEXT
533 /// @brief Input to fetch shader.
534 /// @note WARNING - Changes to this struct need to be reflected in the
535 /// fetch shader jit.
536 /////////////////////////////////////////////////////////////////////////
537 struct SWR_FETCH_CONTEXT
538 {
539 const SWR_VERTEX_BUFFER_STATE* pStreams; // IN: array of bound vertex buffers
540 const int32_t* pIndices; // IN: pointer to index buffer for indexed draws
541 const int32_t* pLastIndex; // IN: pointer to end of index buffer, used for bounds checking
542 uint32_t CurInstance; // IN: current instance
543 uint32_t BaseVertex; // IN: base vertex
544 uint32_t StartVertex; // IN: start vertex
545 uint32_t StartInstance; // IN: start instance
546 simdscalari VertexID; // OUT: vector of vertex IDs
547 simdscalari CutMask; // OUT: vector mask of indices which have the cut index value
548 };
549
550 //////////////////////////////////////////////////////////////////////////
551 /// SWR_STATS
552 ///
553 /// @brief All statistics generated by SWR go here. These are public
554 /// to driver.
555 /////////////////////////////////////////////////////////////////////////
556 struct SWR_STATS
557 {
558 // Occlusion Query
559 uint64_t DepthPassCount; // Number of passing depth tests. Not exact.
560
561 // Pipeline Stats
562 uint64_t IaVertices; // Number of Fetch Shader vertices
563 uint64_t IaPrimitives; // Number of PA primitives.
564 uint64_t VsInvocations; // Number of Vertex Shader invocations
565 uint64_t HsInvocations; // Number of Hull Shader invocations
566 uint64_t DsInvocations; // Number of Domain Shader invocations
567 uint64_t GsInvocations; // Number of Geometry Shader invocations
568 uint64_t PsInvocations; // Number of Pixel Shader invocations
569 uint64_t CsInvocations; // Number of Compute Shader invocations
570 uint64_t CInvocations; // Number of clipper invocations
571 uint64_t CPrimitives; // Number of clipper primitives.
572 uint64_t GsPrimitives; // Number of prims GS outputs.
573
574 // Streamout Stats
575 uint32_t SoWriteOffset[4];
576 uint64_t SoPrimStorageNeeded[4];
577 uint64_t SoNumPrimsWritten[4];
578 };
579
580 //////////////////////////////////////////////////////////////////////////
581 /// STREAMOUT_BUFFERS
582 /////////////////////////////////////////////////////////////////////////
583
584 #define MAX_SO_STREAMS 4
585 #define MAX_ATTRIBUTES 32
586
587 struct SWR_STREAMOUT_BUFFER
588 {
589 bool enable;
590
591 // Pointers to streamout buffers.
592 uint32_t* pBuffer;
593
594 // Size of buffer in dwords.
595 uint32_t bufferSize;
596
597 // Vertex pitch of buffer in dwords.
598 uint32_t pitch;
599
600 // Offset into buffer in dwords. SOS will increment this offset.
601 uint32_t streamOffset;
602
603 // Offset to the SO write offset. If not null then we update offset here.
604 uint32_t* pWriteOffset;
605
606 };
607
608 //////////////////////////////////////////////////////////////////////////
609 /// STREAMOUT_STATE
610 /////////////////////////////////////////////////////////////////////////
611 struct SWR_STREAMOUT_STATE
612 {
613 // This disables stream output.
614 bool soEnable;
615
616 // which streams are enabled for streamout
617 bool streamEnable[MAX_SO_STREAMS];
618
619 // If set then do not send any streams to the rasterizer.
620 bool rasterizerDisable;
621
622 // Specifies which stream to send to the rasterizer.
623 uint32_t streamToRasterizer;
624
625 // The stream masks specify which attributes are sent to which streams.
626 // These masks help the FE to setup the pPrimData buffer that is passed
627 // the the Stream Output Shader (SOS) function.
628 uint32_t streamMasks[MAX_SO_STREAMS];
629
630 // Number of attributes, including position, per vertex that are streamed out.
631 // This should match number of bits in stream mask.
632 uint32_t streamNumEntries[MAX_SO_STREAMS];
633 };
634
635 //////////////////////////////////////////////////////////////////////////
636 /// STREAMOUT_CONTEXT - Passed to SOS
637 /////////////////////////////////////////////////////////////////////////
638 struct SWR_STREAMOUT_CONTEXT
639 {
640 uint32_t* pPrimData;
641 SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS];
642
643 // Num prims written for this stream
644 uint32_t numPrimsWritten;
645
646 // Num prims that should have been written if there were no overflow.
647 uint32_t numPrimStorageNeeded;
648 };
649
650 //////////////////////////////////////////////////////////////////////////
651 /// SWR_GS_STATE - Geometry shader state
652 /////////////////////////////////////////////////////////////////////////
653 struct SWR_GS_STATE
654 {
655 bool gsEnable;
656
657 // number of input attributes per vertex. used by the frontend to
658 // optimize assembling primitives for GS
659 uint32_t numInputAttribs;
660
661 // output topology - can be point, tristrip, or linestrip
662 PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum
663
664 // maximum number of verts that can be emitted by a single instance of the GS
665 uint32_t maxNumVerts;
666
667 // instance count
668 uint32_t instanceCount;
669
670 // geometry shader emits renderTargetArrayIndex
671 bool emitsRenderTargetArrayIndex;
672
673 // geometry shader emits PrimitiveID
674 bool emitsPrimitiveID;
675
676 // if true, geometry shader emits a single stream, with separate cut buffer.
677 // if false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
678 // to map vertices to streams
679 bool isSingleStream;
680
681 // when single stream is enabled, singleStreamID dictates which stream is being output.
682 // field ignored if isSingleStream is false
683 uint32_t singleStreamID;
684 };
685
686
687 //////////////////////////////////////////////////////////////////////////
688 /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
689 /////////////////////////////////////////////////////////////////////////
690 enum SWR_TS_OUTPUT_TOPOLOGY
691 {
692 SWR_TS_OUTPUT_POINT,
693 SWR_TS_OUTPUT_LINE,
694 SWR_TS_OUTPUT_TRI_CW,
695 SWR_TS_OUTPUT_TRI_CCW,
696
697 SWR_TS_OUTPUT_TOPOLOGY_COUNT
698 };
699
700 //////////////////////////////////////////////////////////////////////////
701 /// SWR_TS_PARTITIONING - Defines tessellation algorithm
702 /////////////////////////////////////////////////////////////////////////
703 enum SWR_TS_PARTITIONING
704 {
705 SWR_TS_INTEGER,
706 SWR_TS_ODD_FRACTIONAL,
707 SWR_TS_EVEN_FRACTIONAL,
708
709 SWR_TS_PARTITIONING_COUNT
710 };
711
712 //////////////////////////////////////////////////////////////////////////
713 /// SWR_TS_DOMAIN - Defines Tessellation Domain
714 /////////////////////////////////////////////////////////////////////////
715 enum SWR_TS_DOMAIN
716 {
717 SWR_TS_QUAD,
718 SWR_TS_TRI,
719 SWR_TS_ISOLINE,
720
721 SWR_TS_DOMAIN_COUNT
722 };
723
724 //////////////////////////////////////////////////////////////////////////
725 /// SWR_TS_STATE - Tessellation state
726 /////////////////////////////////////////////////////////////////////////
727 struct SWR_TS_STATE
728 {
729 bool tsEnable;
730 SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum
731 SWR_TS_PARTITIONING partitioning; // @llvm_enum
732 SWR_TS_DOMAIN domain; // @llvm_enum
733
734 PRIMITIVE_TOPOLOGY postDSTopology; // @llvm_enum
735
736 uint32_t numHsInputAttribs;
737 uint32_t numHsOutputAttribs;
738 uint32_t numDsOutputAttribs;
739 };
740
741 // output merger state
742 struct SWR_RENDER_TARGET_BLEND_STATE
743 {
744 uint8_t writeDisableRed : 1;
745 uint8_t writeDisableGreen : 1;
746 uint8_t writeDisableBlue : 1;
747 uint8_t writeDisableAlpha : 1;
748 };
749 static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1, "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
750
751 #define SWR_MAX_NUM_MULTISAMPLES 16
752 enum SWR_MULTISAMPLE_COUNT
753 {
754 SWR_MULTISAMPLE_1X = 0,
755 SWR_MULTISAMPLE_2X,
756 SWR_MULTISAMPLE_4X,
757 SWR_MULTISAMPLE_8X,
758 SWR_MULTISAMPLE_16X,
759 SWR_MULTISAMPLE_TYPE_MAX
760 };
761
762 struct SWR_BLEND_STATE
763 {
764 // constant blend factor color in RGBA float
765 float constantColor[4];
766
767 // alpha test reference value in unorm8 or float32
768 uint32_t alphaTestReference;
769 uint32_t sampleMask;
770 // all RT's have the same sample count
771 ///@todo move this to Output Merger state when we refactor
772 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
773
774 SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
775 };
776 static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
777
778 //////////////////////////////////////////////////////////////////////////
779 /// FUNCTION POINTERS FOR SHADERS
780
781 typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
782 typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext);
783 typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext);
784 typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext);
785 typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, SWR_GS_CONTEXT* pGsContext);
786 typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsContext);
787 typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
788 typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
789 typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, simdvector&, simdvector&, uint32_t, BYTE*, simdvector&, simdscalari*, simdscalari*);
790
791 //////////////////////////////////////////////////////////////////////////
792 /// FRONTEND_STATE
793 /////////////////////////////////////////////////////////////////////////
794 struct SWR_FRONTEND_STATE
795 {
796 // skip clip test, perspective divide, and viewport transform
797 // intended for verts in screen space
798 bool vpTransformDisable;
799 union
800 {
801 struct
802 {
803 uint32_t triFan : 2;
804 uint32_t lineStripList : 1;
805 uint32_t triStripList : 2;
806 };
807 uint32_t bits;
808 }provokingVertex;
809 uint32_t topologyProvokingVertex; // provoking vertex for the draw topology
810 };
811
812 //////////////////////////////////////////////////////////////////////////
813 /// VIEWPORT_MATRIX
814 /////////////////////////////////////////////////////////////////////////
815 struct SWR_VIEWPORT_MATRIX
816 {
817 float m00;
818 float m11;
819 float m22;
820 float m30;
821 float m31;
822 float m32;
823 };
824
825 //////////////////////////////////////////////////////////////////////////
826 /// SWR_VIEWPORT
827 /////////////////////////////////////////////////////////////////////////
828 struct SWR_VIEWPORT
829 {
830 float x;
831 float y;
832 float width;
833 float height;
834 float minZ;
835 float maxZ;
836 };
837
838 //////////////////////////////////////////////////////////////////////////
839 /// SWR_CULLMODE
840 //////////////////////////////////////////////////////////////////////////
841 enum SWR_CULLMODE
842 {
843 SWR_CULLMODE_BOTH,
844 SWR_CULLMODE_NONE,
845 SWR_CULLMODE_FRONT,
846 SWR_CULLMODE_BACK
847 };
848
849 enum SWR_FILLMODE
850 {
851 SWR_FILLMODE_POINT,
852 SWR_FILLMODE_WIREFRAME,
853 SWR_FILLMODE_SOLID
854 };
855
856 enum SWR_FRONTWINDING
857 {
858 SWR_FRONTWINDING_CW,
859 SWR_FRONTWINDING_CCW
860 };
861
862
863 enum SWR_MSAA_SAMPLE_PATTERN
864 {
865 SWR_MSAA_CENTER_PATTERN,
866 SWR_MSAA_STANDARD_PATTERN,
867 SWR_MSAA_SAMPLE_PATTERN_MAX
868 };
869
870 enum SWR_PIXEL_LOCATION
871 {
872 SWR_PIXEL_LOCATION_CENTER,
873 SWR_PIXEL_LOCATION_UL,
874 };
875
876 // fixed point screen space sample locations within a pixel
877 struct SWR_MULTISAMPLE_POS
878 {
879 uint32_t x;
880 uint32_t y;
881 };
882
883 enum SWR_MSAA_RASTMODE
884 {
885 SWR_MSAA_RASTMODE_OFF_PIXEL,
886 SWR_MSAA_RASTMODE_OFF_PATTERN,
887 SWR_MSAA_RASTMODE_ON_PIXEL,
888 SWR_MSAA_RASTMODE_ON_PATTERN
889 };
890
891 //////////////////////////////////////////////////////////////////////////
892 /// SWR_RASTSTATE
893 //////////////////////////////////////////////////////////////////////////
894 struct SWR_RASTSTATE
895 {
896 uint32_t cullMode : 2;
897 uint32_t fillMode : 2;
898 uint32_t frontWinding : 1;
899 uint32_t scissorEnable : 1;
900 uint32_t depthClipEnable : 1;
901 float pointSize;
902 float lineWidth;
903
904 // point size output from the VS
905 bool pointParam;
906
907 // point sprite
908 bool pointSpriteEnable;
909 bool pointSpriteTopOrigin;
910
911 // depth bias
912 float depthBias;
913 float slopeScaledDepthBias;
914 float depthBiasClamp;
915 SWR_FORMAT depthFormat; // @llvm_enum
916
917 ///@todo: MSAA lines
918 // multisample state for MSAA lines
919 bool msaaRastEnable;
920 SWR_MSAA_RASTMODE rastMode; // @llvm_enum
921
922 // sample count the rasterizer is running at
923 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
924 bool bForcedSampleCount;
925 uint32_t pixelLocation; // UL or Center
926 bool pixelOffset; // offset pixel positions by .5 in both the horizontal and vertical direction
927 SWR_MULTISAMPLE_POS iSamplePos[SWR_MAX_NUM_MULTISAMPLES];
928 SWR_MSAA_SAMPLE_PATTERN samplePattern; // @llvm_enum
929
930 // user clip/cull distance enables
931 uint8_t cullDistanceMask;
932 uint8_t clipDistanceMask;
933 };
934
935 // backend state
936 struct SWR_BACKEND_STATE
937 {
938 uint32_t constantInterpolationMask;
939 uint32_t pointSpriteTexCoordMask;
940 uint8_t numAttributes;
941 uint8_t numComponents[KNOB_NUM_ATTRIBUTES];
942 };
943
944 union SWR_DEPTH_STENCIL_STATE
945 {
946 struct
947 {
948 // dword 0
949 uint32_t depthWriteEnable : 1;
950 uint32_t depthTestEnable : 1;
951 uint32_t stencilWriteEnable : 1;
952 uint32_t stencilTestEnable : 1;
953 uint32_t doubleSidedStencilTestEnable : 1;
954
955 uint32_t depthTestFunc : 3;
956 uint32_t stencilTestFunc : 3;
957
958 uint32_t backfaceStencilPassDepthPassOp : 3;
959 uint32_t backfaceStencilPassDepthFailOp : 3;
960 uint32_t backfaceStencilFailOp : 3;
961 uint32_t backfaceStencilTestFunc : 3;
962 uint32_t stencilPassDepthPassOp : 3;
963 uint32_t stencilPassDepthFailOp : 3;
964 uint32_t stencilFailOp : 3;
965
966 // dword 1
967 uint8_t backfaceStencilWriteMask;
968 uint8_t backfaceStencilTestMask;
969 uint8_t stencilWriteMask;
970 uint8_t stencilTestMask;
971
972 // dword 2
973 uint8_t backfaceStencilRefValue;
974 uint8_t stencilRefValue;
975 };
976 uint32_t value[3];
977 };
978
979 enum SWR_SHADING_RATE
980 {
981 SWR_SHADING_RATE_PIXEL,
982 SWR_SHADING_RATE_SAMPLE,
983 SWR_SHADING_RATE_COARSE,
984 SWR_SHADING_RATE_MAX,
985 };
986
987 enum SWR_INPUT_COVERAGE
988 {
989 SWR_INPUT_COVERAGE_NONE,
990 SWR_INPUT_COVERAGE_NORMAL,
991 SWR_INPUT_COVERAGE_MAX,
992 };
993
994 enum SWR_PS_POSITION_OFFSET
995 {
996 SWR_PS_POSITION_SAMPLE_NONE,
997 SWR_PS_POSITION_SAMPLE_OFFSET,
998 SWR_PS_POSITION_CENTROID_OFFSET,
999 SWR_PS_POSITION_OFFSET_MAX,
1000 };
1001
1002 enum SWR_BARYCENTRICS_MASK
1003 {
1004 SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1,
1005 SWR_BARYCENTRIC_CENTROID_MASK = 0x2,
1006 SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4,
1007 SWR_BARYCENTRICS_MASK_MAX = 0x8
1008 };
1009
1010 // pixel shader state
1011 struct SWR_PS_STATE
1012 {
1013 // dword 0-1
1014 PFN_PIXEL_KERNEL pfnPixelShader; // @llvm_pfn
1015
1016 // dword 2
1017 uint32_t killsPixel : 1; // pixel shader can kill pixels
1018 uint32_t inputCoverage : 1; // type of input coverage PS uses
1019 uint32_t writesODepth : 1; // pixel shader writes to depth
1020 uint32_t usesSourceDepth : 1; // pixel shader reads depth
1021 uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel
1022 uint32_t numRenderTargets : 4; // number of render target outputs in use (0-8)
1023 uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position
1024 uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate attributes with
1025 uint32_t usesUAV : 1; // pixel shader accesses UAV
1026 uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test
1027 };