swr: [rasterizer core] implement depth bounds test
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / state.h
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file state.h
24 *
25 * @brief Definitions for API state.
26 *
27 ******************************************************************************/
28 #pragma once
29
30 #include "common/formats.h"
31 #include "common/simdintrin.h"
32
33 // clear flags
34 #define SWR_CLEAR_NONE 0
35 #define SWR_CLEAR_COLOR (1 << 0)
36 #define SWR_CLEAR_DEPTH (1 << 1)
37 #define SWR_CLEAR_STENCIL (1 << 2)
38
39 enum DRIVER_TYPE
40 {
41 DX,
42 GL
43 };
44
45 //////////////////////////////////////////////////////////////////////////
46 /// PRIMITIVE_TOPOLOGY.
47 //////////////////////////////////////////////////////////////////////////
48 enum PRIMITIVE_TOPOLOGY
49 {
50 TOP_UNKNOWN = 0x0,
51 TOP_POINT_LIST = 0x1,
52 TOP_LINE_LIST = 0x2,
53 TOP_LINE_STRIP = 0x3,
54 TOP_TRIANGLE_LIST = 0x4,
55 TOP_TRIANGLE_STRIP = 0x5,
56 TOP_TRIANGLE_FAN = 0x6,
57 TOP_QUAD_LIST = 0x7,
58 TOP_QUAD_STRIP = 0x8,
59 TOP_LINE_LIST_ADJ = 0x9,
60 TOP_LISTSTRIP_ADJ = 0xA,
61 TOP_TRI_LIST_ADJ = 0xB,
62 TOP_TRI_STRIP_ADJ = 0xC,
63 TOP_TRI_STRIP_REVERSE = 0xD,
64 TOP_POLYGON = 0xE,
65 TOP_RECT_LIST = 0xF,
66 TOP_LINE_LOOP = 0x10,
67 TOP_POINT_LIST_BF = 0x11,
68 TOP_LINE_STRIP_CONT = 0x12,
69 TOP_LINE_STRIP_BF = 0x13,
70 TOP_LINE_STRIP_CONT_BF = 0x14,
71 TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16,
72 TOP_TRIANGLE_DISC = 0x17, /// @todo What is this??
73
74 TOP_PATCHLIST_BASE = 0x1F, // Invalid topology, used to calculate num verts for a patchlist.
75 TOP_PATCHLIST_1 = 0x20, // List of 1-vertex patches
76 TOP_PATCHLIST_2 = 0x21,
77 TOP_PATCHLIST_3 = 0x22,
78 TOP_PATCHLIST_4 = 0x23,
79 TOP_PATCHLIST_5 = 0x24,
80 TOP_PATCHLIST_6 = 0x25,
81 TOP_PATCHLIST_7 = 0x26,
82 TOP_PATCHLIST_8 = 0x27,
83 TOP_PATCHLIST_9 = 0x28,
84 TOP_PATCHLIST_10 = 0x29,
85 TOP_PATCHLIST_11 = 0x2A,
86 TOP_PATCHLIST_12 = 0x2B,
87 TOP_PATCHLIST_13 = 0x2C,
88 TOP_PATCHLIST_14 = 0x2D,
89 TOP_PATCHLIST_15 = 0x2E,
90 TOP_PATCHLIST_16 = 0x2F,
91 TOP_PATCHLIST_17 = 0x30,
92 TOP_PATCHLIST_18 = 0x31,
93 TOP_PATCHLIST_19 = 0x32,
94 TOP_PATCHLIST_20 = 0x33,
95 TOP_PATCHLIST_21 = 0x34,
96 TOP_PATCHLIST_22 = 0x35,
97 TOP_PATCHLIST_23 = 0x36,
98 TOP_PATCHLIST_24 = 0x37,
99 TOP_PATCHLIST_25 = 0x38,
100 TOP_PATCHLIST_26 = 0x39,
101 TOP_PATCHLIST_27 = 0x3A,
102 TOP_PATCHLIST_28 = 0x3B,
103 TOP_PATCHLIST_29 = 0x3C,
104 TOP_PATCHLIST_30 = 0x3D,
105 TOP_PATCHLIST_31 = 0x3E,
106 TOP_PATCHLIST_32 = 0x3F, // List of 32-vertex patches
107 };
108
109 //////////////////////////////////////////////////////////////////////////
110 /// SWR_SHADER_TYPE
111 //////////////////////////////////////////////////////////////////////////
112 enum SWR_SHADER_TYPE
113 {
114 SHADER_VERTEX,
115 SHADER_GEOMETRY,
116 SHADER_DOMAIN,
117 SHADER_HULL,
118 SHADER_PIXEL,
119 SHADER_COMPUTE,
120
121 NUM_SHADER_TYPES,
122 };
123
124 //////////////////////////////////////////////////////////////////////////
125 /// SWR_RENDERTARGET_ATTACHMENT
126 /// @todo Its not clear what an "attachment" means. Its not common term.
127 //////////////////////////////////////////////////////////////////////////
128 enum SWR_RENDERTARGET_ATTACHMENT
129 {
130 SWR_ATTACHMENT_COLOR0,
131 SWR_ATTACHMENT_COLOR1,
132 SWR_ATTACHMENT_COLOR2,
133 SWR_ATTACHMENT_COLOR3,
134 SWR_ATTACHMENT_COLOR4,
135 SWR_ATTACHMENT_COLOR5,
136 SWR_ATTACHMENT_COLOR6,
137 SWR_ATTACHMENT_COLOR7,
138 SWR_ATTACHMENT_DEPTH,
139 SWR_ATTACHMENT_STENCIL,
140
141 SWR_NUM_ATTACHMENTS
142 };
143
144 #define SWR_NUM_RENDERTARGETS 8
145
146 #define SWR_ATTACHMENT_COLOR0_BIT 0x001
147 #define SWR_ATTACHMENT_COLOR1_BIT 0x002
148 #define SWR_ATTACHMENT_COLOR2_BIT 0x004
149 #define SWR_ATTACHMENT_COLOR3_BIT 0x008
150 #define SWR_ATTACHMENT_COLOR4_BIT 0x010
151 #define SWR_ATTACHMENT_COLOR5_BIT 0x020
152 #define SWR_ATTACHMENT_COLOR6_BIT 0x040
153 #define SWR_ATTACHMENT_COLOR7_BIT 0x080
154 #define SWR_ATTACHMENT_DEPTH_BIT 0x100
155 #define SWR_ATTACHMENT_STENCIL_BIT 0x200
156 #define SWR_ATTACHMENT_MASK_ALL 0x3ff
157 #define SWR_ATTACHMENT_MASK_COLOR 0x0ff
158
159
160 //////////////////////////////////////////////////////////////////////////
161 /// @brief SWR Inner Tessellation factor ID
162 /// See above GetTessFactorOutputPosition code for documentation
163 enum SWR_INNER_TESSFACTOR_ID
164 {
165 SWR_QUAD_U_TRI_INSIDE,
166 SWR_QUAD_V_INSIDE,
167
168 SWR_NUM_INNER_TESS_FACTORS,
169 };
170
171 //////////////////////////////////////////////////////////////////////////
172 /// @brief SWR Outer Tessellation factor ID
173 /// See above GetTessFactorOutputPosition code for documentation
174 enum SWR_OUTER_TESSFACTOR_ID
175 {
176 SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL,
177 SWR_QUAD_V_EQ0_TRI_V_LINE_DENSITY,
178 SWR_QUAD_U_EQ1_TRI_W,
179 SWR_QUAD_V_EQ1,
180
181 SWR_NUM_OUTER_TESS_FACTORS,
182 };
183
184
185 /////////////////////////////////////////////////////////////////////////
186 /// simdvertex
187 /// @brief Defines a vertex element that holds all the data for SIMD vertices.
188 /// Contains position in clip space, hardcoded to attribute 0,
189 /// space for up to 32 attributes, as well as any SGV values generated
190 /// by the pipeline
191 /////////////////////////////////////////////////////////////////////////
192 #define VERTEX_POSITION_SLOT 0
193 #define VERTEX_ATTRIB_START_SLOT 1
194 #define VERTEX_ATTRIB_END_SLOT 32
195 #define VERTEX_RTAI_SLOT 33 // GS writes RenderTargetArrayIndex here
196 #define VERTEX_PRIMID_SLOT 34 // GS writes PrimId here
197 #define VERTEX_CLIPCULL_DIST_LO_SLOT 35 // VS writes lower 4 clip/cull dist
198 #define VERTEX_CLIPCULL_DIST_HI_SLOT 36 // VS writes upper 4 clip/cull dist
199 #define VERTEX_POINT_SIZE_SLOT 37 // VS writes point size here
200 #define VERTEX_VIEWPORT_ARRAY_INDEX_SLOT 38
201 // SoAoSoA
202 struct simdvertex
203 {
204 simdvector attrib[KNOB_NUM_ATTRIBUTES];
205 };
206
207 //////////////////////////////////////////////////////////////////////////
208 /// SWR_VS_CONTEXT
209 /// @brief Input to vertex shader
210 /////////////////////////////////////////////////////////////////////////
211 struct SWR_VS_CONTEXT
212 {
213 simdvertex* pVin; // IN: SIMD input vertex data store
214 simdvertex* pVout; // OUT: SIMD output vertex data store
215
216 uint32_t InstanceID; // IN: Instance ID, constant across all verts of the SIMD
217 simdscalari VertexID; // IN: Vertex ID
218 simdscalari mask; // IN: Active mask for shader
219 };
220
221 /////////////////////////////////////////////////////////////////////////
222 /// ScalarCPoint
223 /// @brief defines a control point element as passed from the output
224 /// of the hull shader to the input of the domain shader
225 /////////////////////////////////////////////////////////////////////////
226 struct ScalarAttrib
227 {
228 float x;
229 float y;
230 float z;
231 float w;
232 };
233
234 struct ScalarCPoint
235 {
236 ScalarAttrib attrib[KNOB_NUM_ATTRIBUTES];
237 };
238
239 //////////////////////////////////////////////////////////////////////////
240 /// SWR_TESSELLATION_FACTORS
241 /// @brief Tessellation factors structure (non-vector)
242 /////////////////////////////////////////////////////////////////////////
243 struct SWR_TESSELLATION_FACTORS
244 {
245 float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
246 float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
247 };
248
249 #define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
250 struct ScalarPatch
251 {
252 SWR_TESSELLATION_FACTORS tessFactors;
253 ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM];
254 ScalarCPoint patchData;
255 };
256
257 //////////////////////////////////////////////////////////////////////////
258 /// SWR_HS_CONTEXT
259 /// @brief Input to hull shader
260 /////////////////////////////////////////////////////////////////////////
261 struct SWR_HS_CONTEXT
262 {
263 simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
264 simdscalari PrimitiveID; // IN: (SIMD) primitive ID generated from the draw call
265 simdscalari mask; // IN: Active mask for shader
266 ScalarPatch* pCPout; // OUT: Output control point patch
267 // SIMD-sized-array of SCALAR patches
268 };
269
270 //////////////////////////////////////////////////////////////////////////
271 /// SWR_DS_CONTEXT
272 /// @brief Input to domain shader
273 /////////////////////////////////////////////////////////////////////////
274 struct SWR_DS_CONTEXT
275 {
276 uint32_t PrimitiveID; // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation
277 uint32_t vectorOffset; // IN: (SCALAR) vector index offset into SIMD data.
278 uint32_t vectorStride; // IN: (SCALAR) stride (in vectors) of output data per attribute-component
279 ScalarPatch* pCpIn; // IN: (SCALAR) Control patch
280 simdscalar* pDomainU; // IN: (SIMD) Domain Point U coords
281 simdscalar* pDomainV; // IN: (SIMD) Domain Point V coords
282 simdscalari mask; // IN: Active mask for shader
283 simdscalar* pOutputData; // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component)
284 };
285
286 //////////////////////////////////////////////////////////////////////////
287 /// SWR_GS_CONTEXT
288 /// @brief Input to geometry shader.
289 /////////////////////////////////////////////////////////////////////////
290 struct SWR_GS_CONTEXT
291 {
292 simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: input primitive data for SIMD prims
293 simdscalari PrimitiveID; // IN: input primitive ID generated from the draw call
294 uint32_t InstanceID; // IN: input instance ID
295 simdscalari mask; // IN: Active mask for shader
296 uint8_t* pStream; // OUT: output stream (contains vertices for all output streams)
297 uint8_t* pCutOrStreamIdBuffer; // OUT: cut or stream id buffer
298 simdscalari vertexCount; // OUT: num vertices emitted per SIMD lane
299 };
300
301 struct PixelPositions
302 {
303 simdscalar UL;
304 simdscalar center;
305 simdscalar sample;
306 simdscalar centroid;
307 };
308
309 #define SWR_MAX_NUM_MULTISAMPLES 16
310
311 //////////////////////////////////////////////////////////////////////////
312 /// SWR_PS_CONTEXT
313 /// @brief Input to pixel shader.
314 /////////////////////////////////////////////////////////////////////////
315 struct SWR_PS_CONTEXT
316 {
317 PixelPositions vX; // IN: x location(s) of pixels
318 PixelPositions vY; // IN: x location(s) of pixels
319 simdscalar vZ; // INOUT: z location of pixels
320 simdscalari activeMask; // OUT: mask for kill
321 simdscalar inputMask; // IN: input coverage mask for all samples
322 simdscalari oMask; // OUT: mask for output coverage
323
324 PixelPositions vI; // barycentric coords evaluated at pixel center, sample position, centroid
325 PixelPositions vJ;
326 PixelPositions vOneOverW; // IN: 1/w
327
328 const float* pAttribs; // IN: pointer to attribute barycentric coefficients
329 const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients
330 const float* pRecipW; // IN: pointer to 1/w coord for each vertex
331 const float *I; // IN: Barycentric A, B, and C coefs used to compute I
332 const float *J; // IN: Barycentric A, B, and C coefs used to compute J
333 float recipDet; // IN: 1/Det, used when barycentric interpolating attributes
334 const float* pSamplePosX; // IN: array of sample positions
335 const float* pSamplePosY; // IN: array of sample positions
336 simdvector shaded[SWR_NUM_RENDERTARGETS];
337 // OUT: result color per rendertarget
338
339 uint32_t frontFace; // IN: front- 1, back- 0
340 uint32_t primID; // IN: primitive ID
341 uint32_t sampleIndex; // IN: sampleIndex
342
343 uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
344
345 };
346
347 //////////////////////////////////////////////////////////////////////////
348 /// SWR_CS_CONTEXT
349 /// @brief Input to compute shader.
350 /////////////////////////////////////////////////////////////////////////
351 struct SWR_CS_CONTEXT
352 {
353 // The ThreadGroupId is the current thread group index relative
354 // to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup,
355 // and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader.
356
357 // Compute shader accepts the following system values.
358 // o ThreadId - Current thread id relative to all other threads in dispatch.
359 // o ThreadGroupId - Current thread group id relative to all other groups in dispatch.
360 // o ThreadIdInGroup - Current thread relative to all threads in the current thread group.
361 // o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup.
362 //
363 // All of these system values can be computed in the shader. They will be
364 // derived from the current tile counter. The tile counter is an atomic counter that
365 // resides in the draw context and is initialized to the product of the dispatch dims.
366 //
367 // tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z
368 //
369 // Each CPU worker thread will atomically decrement this counter and passes the current
370 // count into the shader. When the count reaches 0 then all thread groups in the
371 // dispatch call have been completed.
372
373 uint32_t tileCounter; // The tile counter value for this thread group.
374
375 // Dispatch dimensions used by shader to compute system values from the tile counter.
376 uint32_t dispatchDims[3];
377
378 uint8_t* pTGSM; // Thread Group Shared Memory pointer.
379
380 uint8_t* pSpillFillBuffer; // Spill/fill buffer for barrier support
381 };
382
383 // enums
384 enum SWR_TILE_MODE
385 {
386 SWR_TILE_NONE = 0x0, // Linear mode (no tiling)
387 SWR_TILE_MODE_WMAJOR, // W major tiling
388 SWR_TILE_MODE_XMAJOR, // X major tiling
389 SWR_TILE_MODE_YMAJOR, // Y major tiling
390 SWR_TILE_SWRZ, // SWR-Z tiling
391
392 SWR_TILE_MODE_COUNT
393 };
394
395 enum SWR_SURFACE_TYPE
396 {
397 SURFACE_1D = 0,
398 SURFACE_2D = 1,
399 SURFACE_3D = 2,
400 SURFACE_CUBE = 3,
401 SURFACE_BUFFER = 4,
402 SURFACE_STRUCTURED_BUFFER = 5,
403 SURFACE_NULL = 7
404 };
405
406 enum SWR_ZFUNCTION
407 {
408 ZFUNC_ALWAYS,
409 ZFUNC_NEVER,
410 ZFUNC_LT,
411 ZFUNC_EQ,
412 ZFUNC_LE,
413 ZFUNC_GT,
414 ZFUNC_NE,
415 ZFUNC_GE,
416 NUM_ZFUNC
417 };
418
419 enum SWR_STENCILOP
420 {
421 STENCILOP_KEEP,
422 STENCILOP_ZERO,
423 STENCILOP_REPLACE,
424 STENCILOP_INCRSAT,
425 STENCILOP_DECRSAT,
426 STENCILOP_INCR,
427 STENCILOP_DECR,
428 STENCILOP_INVERT
429 };
430
431 enum SWR_BLEND_FACTOR
432 {
433 BLENDFACTOR_ONE,
434 BLENDFACTOR_SRC_COLOR,
435 BLENDFACTOR_SRC_ALPHA,
436 BLENDFACTOR_DST_ALPHA,
437 BLENDFACTOR_DST_COLOR,
438 BLENDFACTOR_SRC_ALPHA_SATURATE,
439 BLENDFACTOR_CONST_COLOR,
440 BLENDFACTOR_CONST_ALPHA,
441 BLENDFACTOR_SRC1_COLOR,
442 BLENDFACTOR_SRC1_ALPHA,
443 BLENDFACTOR_ZERO,
444 BLENDFACTOR_INV_SRC_COLOR,
445 BLENDFACTOR_INV_SRC_ALPHA,
446 BLENDFACTOR_INV_DST_ALPHA,
447 BLENDFACTOR_INV_DST_COLOR,
448 BLENDFACTOR_INV_CONST_COLOR,
449 BLENDFACTOR_INV_CONST_ALPHA,
450 BLENDFACTOR_INV_SRC1_COLOR,
451 BLENDFACTOR_INV_SRC1_ALPHA
452 };
453
454 enum SWR_BLEND_OP
455 {
456 BLENDOP_ADD,
457 BLENDOP_SUBTRACT,
458 BLENDOP_REVSUBTRACT,
459 BLENDOP_MIN,
460 BLENDOP_MAX,
461 };
462
463 enum SWR_LOGIC_OP
464 {
465 LOGICOP_CLEAR,
466 LOGICOP_NOR,
467 LOGICOP_AND_INVERTED,
468 LOGICOP_COPY_INVERTED,
469 LOGICOP_AND_REVERSE,
470 LOGICOP_INVERT,
471 LOGICOP_XOR,
472 LOGICOP_NAND,
473 LOGICOP_AND,
474 LOGICOP_EQUIV,
475 LOGICOP_NOOP,
476 LOGICOP_OR_INVERTED,
477 LOGICOP_COPY,
478 LOGICOP_OR_REVERSE,
479 LOGICOP_OR,
480 LOGICOP_SET,
481 };
482
483 struct SWR_SURFACE_STATE
484 {
485 uint8_t *pBaseAddress;
486 SWR_SURFACE_TYPE type; // @llvm_enum
487 SWR_FORMAT format; // @llvm_enum
488 uint32_t width;
489 uint32_t height;
490 uint32_t depth;
491 uint32_t numSamples;
492 uint32_t samplePattern;
493 uint32_t pitch;
494 uint32_t qpitch;
495 uint32_t minLod; // for sampled surfaces, the most detailed LOD that can be accessed by sampler
496 uint32_t maxLod; // for sampled surfaces, the max LOD that can be accessed
497 float resourceMinLod; // for sampled surfaces, the most detailed fractional mip that can be accessed by sampler
498 uint32_t lod; // for render targets, the lod being rendered to
499 uint32_t arrayIndex; // for render targets, the array index being rendered to for arrayed surfaces
500 SWR_TILE_MODE tileMode; // @llvm_enum
501 uint32_t halign;
502 uint32_t valign;
503 uint32_t xOffset;
504 uint32_t yOffset;
505
506 uint32_t lodOffsets[2][15]; // lod offsets for sampled surfaces
507
508 uint8_t *pAuxBaseAddress; // Used for compression, append/consume counter, etc.
509
510 bool bInterleavedSamples; // are MSAA samples stored interleaved or planar
511 };
512
513 // vertex fetch state
514 // WARNING- any changes to this struct need to be reflected
515 // in the fetch shader jit
516 struct SWR_VERTEX_BUFFER_STATE
517 {
518 uint32_t index;
519 uint32_t pitch;
520 const uint8_t *pData;
521 uint32_t size;
522 uint32_t numaNode;
523 uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks
524 uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for partially OOB vertices
525 };
526
527 struct SWR_INDEX_BUFFER_STATE
528 {
529 // Format type for indices (e.g. UINT16, UINT32, etc.)
530 SWR_FORMAT format; // @llvm_enum
531 const void *pIndices;
532 uint32_t size;
533 };
534
535
536 //////////////////////////////////////////////////////////////////////////
537 /// SWR_FETCH_CONTEXT
538 /// @brief Input to fetch shader.
539 /// @note WARNING - Changes to this struct need to be reflected in the
540 /// fetch shader jit.
541 /////////////////////////////////////////////////////////////////////////
542 struct SWR_FETCH_CONTEXT
543 {
544 const SWR_VERTEX_BUFFER_STATE* pStreams; // IN: array of bound vertex buffers
545 const int32_t* pIndices; // IN: pointer to index buffer for indexed draws
546 const int32_t* pLastIndex; // IN: pointer to end of index buffer, used for bounds checking
547 uint32_t CurInstance; // IN: current instance
548 uint32_t BaseVertex; // IN: base vertex
549 uint32_t StartVertex; // IN: start vertex
550 uint32_t StartInstance; // IN: start instance
551 simdscalari VertexID; // OUT: vector of vertex IDs
552 simdscalari CutMask; // OUT: vector mask of indices which have the cut index value
553 };
554
555 //////////////////////////////////////////////////////////////////////////
556 /// SWR_STATS
557 ///
558 /// @brief All statistics generated by SWR go here. These are public
559 /// to driver.
560 /////////////////////////////////////////////////////////////////////////
561 OSALIGNLINE(struct) SWR_STATS
562 {
563 // Occlusion Query
564 uint64_t DepthPassCount; // Number of passing depth tests. Not exact.
565
566 // Pipeline Stats
567 uint64_t PsInvocations; // Number of Pixel Shader invocations
568 uint64_t CsInvocations; // Number of Compute Shader invocations
569
570 };
571
572 //////////////////////////////////////////////////////////////////////////
573 /// SWR_STATS
574 ///
575 /// @brief All statistics generated by FE.
576 /////////////////////////////////////////////////////////////////////////
577 OSALIGNLINE(struct) SWR_STATS_FE
578 {
579 uint64_t IaVertices; // Number of Fetch Shader vertices
580 uint64_t IaPrimitives; // Number of PA primitives.
581 uint64_t VsInvocations; // Number of Vertex Shader invocations
582 uint64_t HsInvocations; // Number of Hull Shader invocations
583 uint64_t DsInvocations; // Number of Domain Shader invocations
584 uint64_t GsInvocations; // Number of Geometry Shader invocations
585 uint64_t GsPrimitives; // Number of prims GS outputs.
586 uint64_t CInvocations; // Number of clipper invocations
587 uint64_t CPrimitives; // Number of clipper primitives.
588
589 // Streamout Stats
590 uint64_t SoPrimStorageNeeded[4];
591 uint64_t SoNumPrimsWritten[4];
592 };
593
594 //////////////////////////////////////////////////////////////////////////
595 /// STREAMOUT_BUFFERS
596 /////////////////////////////////////////////////////////////////////////
597
598 #define MAX_SO_STREAMS 4
599 #define MAX_SO_BUFFERS 4
600 #define MAX_ATTRIBUTES 32
601
602 struct SWR_STREAMOUT_BUFFER
603 {
604 bool enable;
605 bool soWriteEnable;
606
607 // Pointers to streamout buffers.
608 uint32_t* pBuffer;
609
610 // Size of buffer in dwords.
611 uint32_t bufferSize;
612
613 // Vertex pitch of buffer in dwords.
614 uint32_t pitch;
615
616 // Offset into buffer in dwords. SOS will increment this offset.
617 uint32_t streamOffset;
618
619 // Offset to the SO write offset. If not null then we update offset here.
620 uint32_t* pWriteOffset;
621
622 };
623
624 //////////////////////////////////////////////////////////////////////////
625 /// STREAMOUT_STATE
626 /////////////////////////////////////////////////////////////////////////
627 struct SWR_STREAMOUT_STATE
628 {
629 // This disables stream output.
630 bool soEnable;
631
632 // which streams are enabled for streamout
633 bool streamEnable[MAX_SO_STREAMS];
634
635 // If set then do not send any streams to the rasterizer.
636 bool rasterizerDisable;
637
638 // Specifies which stream to send to the rasterizer.
639 uint32_t streamToRasterizer;
640
641 // The stream masks specify which attributes are sent to which streams.
642 // These masks help the FE to setup the pPrimData buffer that is passed
643 // the Stream Output Shader (SOS) function.
644 uint32_t streamMasks[MAX_SO_STREAMS];
645
646 // Number of attributes, including position, per vertex that are streamed out.
647 // This should match number of bits in stream mask.
648 uint32_t streamNumEntries[MAX_SO_STREAMS];
649 };
650
651 //////////////////////////////////////////////////////////////////////////
652 /// STREAMOUT_CONTEXT - Passed to SOS
653 /////////////////////////////////////////////////////////////////////////
654 struct SWR_STREAMOUT_CONTEXT
655 {
656 uint32_t* pPrimData;
657 SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS];
658
659 // Num prims written for this stream
660 uint32_t numPrimsWritten;
661
662 // Num prims that should have been written if there were no overflow.
663 uint32_t numPrimStorageNeeded;
664 };
665
666 //////////////////////////////////////////////////////////////////////////
667 /// SWR_GS_STATE - Geometry shader state
668 /////////////////////////////////////////////////////////////////////////
669 struct SWR_GS_STATE
670 {
671 bool gsEnable;
672
673 // number of input attributes per vertex. used by the frontend to
674 // optimize assembling primitives for GS
675 uint32_t numInputAttribs;
676
677 // output topology - can be point, tristrip, or linestrip
678 PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum
679
680 // maximum number of verts that can be emitted by a single instance of the GS
681 uint32_t maxNumVerts;
682
683 // instance count
684 uint32_t instanceCount;
685
686 // geometry shader emits renderTargetArrayIndex
687 bool emitsRenderTargetArrayIndex;
688
689 // geometry shader emits PrimitiveID
690 bool emitsPrimitiveID;
691
692 // geometry shader emits ViewportArrayIndex
693 bool emitsViewportArrayIndex;
694
695 // if true, geometry shader emits a single stream, with separate cut buffer.
696 // if false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
697 // to map vertices to streams
698 bool isSingleStream;
699
700 // when single stream is enabled, singleStreamID dictates which stream is being output.
701 // field ignored if isSingleStream is false
702 uint32_t singleStreamID;
703 };
704
705
706 //////////////////////////////////////////////////////////////////////////
707 /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
708 /////////////////////////////////////////////////////////////////////////
709 enum SWR_TS_OUTPUT_TOPOLOGY
710 {
711 SWR_TS_OUTPUT_POINT,
712 SWR_TS_OUTPUT_LINE,
713 SWR_TS_OUTPUT_TRI_CW,
714 SWR_TS_OUTPUT_TRI_CCW,
715
716 SWR_TS_OUTPUT_TOPOLOGY_COUNT
717 };
718
719 //////////////////////////////////////////////////////////////////////////
720 /// SWR_TS_PARTITIONING - Defines tessellation algorithm
721 /////////////////////////////////////////////////////////////////////////
722 enum SWR_TS_PARTITIONING
723 {
724 SWR_TS_INTEGER,
725 SWR_TS_ODD_FRACTIONAL,
726 SWR_TS_EVEN_FRACTIONAL,
727
728 SWR_TS_PARTITIONING_COUNT
729 };
730
731 //////////////////////////////////////////////////////////////////////////
732 /// SWR_TS_DOMAIN - Defines Tessellation Domain
733 /////////////////////////////////////////////////////////////////////////
734 enum SWR_TS_DOMAIN
735 {
736 SWR_TS_QUAD,
737 SWR_TS_TRI,
738 SWR_TS_ISOLINE,
739
740 SWR_TS_DOMAIN_COUNT
741 };
742
743 //////////////////////////////////////////////////////////////////////////
744 /// SWR_TS_STATE - Tessellation state
745 /////////////////////////////////////////////////////////////////////////
746 struct SWR_TS_STATE
747 {
748 bool tsEnable;
749 SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum
750 SWR_TS_PARTITIONING partitioning; // @llvm_enum
751 SWR_TS_DOMAIN domain; // @llvm_enum
752
753 PRIMITIVE_TOPOLOGY postDSTopology; // @llvm_enum
754
755 uint32_t numHsInputAttribs;
756 uint32_t numHsOutputAttribs;
757 uint32_t numDsOutputAttribs;
758 };
759
760 // output merger state
761 struct SWR_RENDER_TARGET_BLEND_STATE
762 {
763 uint8_t writeDisableRed : 1;
764 uint8_t writeDisableGreen : 1;
765 uint8_t writeDisableBlue : 1;
766 uint8_t writeDisableAlpha : 1;
767 };
768 static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1, "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
769
770 enum SWR_MULTISAMPLE_COUNT
771 {
772 SWR_MULTISAMPLE_1X = 0,
773 SWR_MULTISAMPLE_2X,
774 SWR_MULTISAMPLE_4X,
775 SWR_MULTISAMPLE_8X,
776 SWR_MULTISAMPLE_16X,
777 SWR_MULTISAMPLE_TYPE_COUNT
778 };
779
780 struct SWR_BLEND_STATE
781 {
782 // constant blend factor color in RGBA float
783 float constantColor[4];
784
785 // alpha test reference value in unorm8 or float32
786 uint32_t alphaTestReference;
787 uint32_t sampleMask;
788 // all RT's have the same sample count
789 ///@todo move this to Output Merger state when we refactor
790 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
791
792 SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
793 };
794 static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
795
796 //////////////////////////////////////////////////////////////////////////
797 /// FUNCTION POINTERS FOR SHADERS
798
799 typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
800 typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext);
801 typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext);
802 typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext);
803 typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, SWR_GS_CONTEXT* pGsContext);
804 typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsContext);
805 typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
806 typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
807 typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
808 typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, simdvector&, simdvector&, uint32_t, uint8_t*, simdvector&, simdscalari*, simdscalari*);
809 typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar);
810
811 //////////////////////////////////////////////////////////////////////////
812 /// FRONTEND_STATE
813 /////////////////////////////////////////////////////////////////////////
814 struct SWR_FRONTEND_STATE
815 {
816 // skip clip test, perspective divide, and viewport transform
817 // intended for verts in screen space
818 bool vpTransformDisable;
819 bool bEnableCutIndex;
820 union
821 {
822 struct
823 {
824 uint32_t triFan : 2;
825 uint32_t lineStripList : 1;
826 uint32_t triStripList : 2;
827 };
828 uint32_t bits;
829 } provokingVertex;
830 uint32_t topologyProvokingVertex; // provoking vertex for the draw topology
831 };
832
833 //////////////////////////////////////////////////////////////////////////
834 /// VIEWPORT_MATRIX
835 /////////////////////////////////////////////////////////////////////////
836 struct SWR_VIEWPORT_MATRIX
837 {
838 float m00;
839 float m11;
840 float m22;
841 float m30;
842 float m31;
843 float m32;
844 };
845
846 //////////////////////////////////////////////////////////////////////////
847 /// VIEWPORT_MATRIXES
848 /////////////////////////////////////////////////////////////////////////
849 struct SWR_VIEWPORT_MATRICES
850 {
851 float m00[KNOB_NUM_VIEWPORTS_SCISSORS];
852 float m11[KNOB_NUM_VIEWPORTS_SCISSORS];
853 float m22[KNOB_NUM_VIEWPORTS_SCISSORS];
854 float m30[KNOB_NUM_VIEWPORTS_SCISSORS];
855 float m31[KNOB_NUM_VIEWPORTS_SCISSORS];
856 float m32[KNOB_NUM_VIEWPORTS_SCISSORS];
857 };
858
859 //////////////////////////////////////////////////////////////////////////
860 /// SWR_VIEWPORT
861 /////////////////////////////////////////////////////////////////////////
862 struct SWR_VIEWPORT
863 {
864 float x;
865 float y;
866 float width;
867 float height;
868 float minZ;
869 float maxZ;
870 };
871
872 //////////////////////////////////////////////////////////////////////////
873 /// SWR_CULLMODE
874 //////////////////////////////////////////////////////////////////////////
875 enum SWR_CULLMODE
876 {
877 SWR_CULLMODE_BOTH,
878 SWR_CULLMODE_NONE,
879 SWR_CULLMODE_FRONT,
880 SWR_CULLMODE_BACK
881 };
882
883 enum SWR_FILLMODE
884 {
885 SWR_FILLMODE_POINT,
886 SWR_FILLMODE_WIREFRAME,
887 SWR_FILLMODE_SOLID
888 };
889
890 enum SWR_FRONTWINDING
891 {
892 SWR_FRONTWINDING_CW,
893 SWR_FRONTWINDING_CCW
894 };
895
896
897 enum SWR_MSAA_SAMPLE_PATTERN
898 {
899 SWR_MSAA_CENTER_PATTERN,
900 SWR_MSAA_STANDARD_PATTERN,
901 SWR_MSAA_SAMPLE_PATTERN_COUNT
902 };
903
904 enum SWR_PIXEL_LOCATION
905 {
906 SWR_PIXEL_LOCATION_CENTER,
907 SWR_PIXEL_LOCATION_UL,
908 };
909
910 // fixed point screen space sample locations within a pixel
911 struct SWR_MULTISAMPLE_POS
912 {
913 uint32_t x;
914 uint32_t y;
915 };
916
917 enum SWR_MSAA_RASTMODE
918 {
919 SWR_MSAA_RASTMODE_OFF_PIXEL,
920 SWR_MSAA_RASTMODE_OFF_PATTERN,
921 SWR_MSAA_RASTMODE_ON_PIXEL,
922 SWR_MSAA_RASTMODE_ON_PATTERN
923 };
924
925 //////////////////////////////////////////////////////////////////////////
926 /// SWR_RASTSTATE
927 //////////////////////////////////////////////////////////////////////////
928 struct SWR_RASTSTATE
929 {
930 uint32_t cullMode : 2;
931 uint32_t fillMode : 2;
932 uint32_t frontWinding : 1;
933 uint32_t scissorEnable : 1;
934 uint32_t depthClipEnable : 1;
935 uint32_t pointParam : 1;
936 uint32_t pointSpriteEnable : 1;
937 uint32_t pointSpriteTopOrigin : 1;
938 uint32_t msaaRastEnable : 1;
939 uint32_t forcedSampleCount : 1;
940 uint32_t pixelOffset : 1;
941 uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
942 uint32_t conservativeRast : 1;
943
944 float pointSize;
945 float lineWidth;
946
947 float depthBias;
948 float slopeScaledDepthBias;
949 float depthBiasClamp;
950 SWR_FORMAT depthFormat; // @llvm_enum
951
952 ///@todo: MSAA lines
953 // multisample state for MSAA lines
954 SWR_MSAA_RASTMODE rastMode; // @llvm_enum
955
956 // sample count the rasterizer is running at
957 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
958 uint32_t pixelLocation; // UL or Center
959 SWR_MULTISAMPLE_POS iSamplePos[SWR_MAX_NUM_MULTISAMPLES];
960 SWR_MSAA_SAMPLE_PATTERN samplePattern; // @llvm_enum
961
962 // user clip/cull distance enables
963 uint8_t cullDistanceMask;
964 uint8_t clipDistanceMask;
965 };
966
967 enum SWR_CONSTANT_SOURCE
968 {
969 SWR_CONSTANT_SOURCE_CONST_0000,
970 SWR_CONSTANT_SOURCE_CONST_0001_FLOAT,
971 SWR_CONSTANT_SOURCE_CONST_1111_FLOAT,
972 SWR_CONSTANT_SOURCE_PRIM_ID
973 };
974
975 struct SWR_ATTRIB_SWIZZLE
976 {
977 uint16_t sourceAttrib : 5; // source attribute
978 uint16_t constantSource : 2; // constant source to apply
979 uint16_t componentOverrideMask : 4; // override component with constant source
980 };
981
982 // backend state
983 struct SWR_BACKEND_STATE
984 {
985 uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant interpolation
986 uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be interpreted as tex coordinates
987
988 uint8_t numAttributes; // total number of attributes to send to backend (up to 32)
989 uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some calculations for unneeded components
990
991 bool swizzleEnable; // when enabled, core will parse the swizzle map when
992 // setting up attributes for the backend, otherwise
993 // all attributes up to numAttributes will be sent
994 SWR_ATTRIB_SWIZZLE swizzleMap[32];
995 };
996
997
998 union SWR_DEPTH_STENCIL_STATE
999 {
1000 struct
1001 {
1002 // dword 0
1003 uint32_t depthWriteEnable : 1;
1004 uint32_t depthTestEnable : 1;
1005 uint32_t stencilWriteEnable : 1;
1006 uint32_t stencilTestEnable : 1;
1007 uint32_t doubleSidedStencilTestEnable : 1;
1008
1009 uint32_t depthTestFunc : 3;
1010 uint32_t stencilTestFunc : 3;
1011
1012 uint32_t backfaceStencilPassDepthPassOp : 3;
1013 uint32_t backfaceStencilPassDepthFailOp : 3;
1014 uint32_t backfaceStencilFailOp : 3;
1015 uint32_t backfaceStencilTestFunc : 3;
1016 uint32_t stencilPassDepthPassOp : 3;
1017 uint32_t stencilPassDepthFailOp : 3;
1018 uint32_t stencilFailOp : 3;
1019
1020 // dword 1
1021 uint8_t backfaceStencilWriteMask;
1022 uint8_t backfaceStencilTestMask;
1023 uint8_t stencilWriteMask;
1024 uint8_t stencilTestMask;
1025
1026 // dword 2
1027 uint8_t backfaceStencilRefValue;
1028 uint8_t stencilRefValue;
1029 };
1030 uint32_t value[3];
1031 };
1032
1033 enum SWR_SHADING_RATE
1034 {
1035 SWR_SHADING_RATE_PIXEL,
1036 SWR_SHADING_RATE_SAMPLE,
1037 SWR_SHADING_RATE_COUNT,
1038 };
1039
1040 enum SWR_INPUT_COVERAGE
1041 {
1042 SWR_INPUT_COVERAGE_NONE,
1043 SWR_INPUT_COVERAGE_NORMAL,
1044 SWR_INPUT_COVERAGE_INNER_CONSERVATIVE,
1045 SWR_INPUT_COVERAGE_COUNT,
1046 };
1047
1048 enum SWR_PS_POSITION_OFFSET
1049 {
1050 SWR_PS_POSITION_SAMPLE_NONE,
1051 SWR_PS_POSITION_SAMPLE_OFFSET,
1052 SWR_PS_POSITION_CENTROID_OFFSET,
1053 SWR_PS_POSITION_OFFSET_COUNT,
1054 };
1055
1056 enum SWR_BARYCENTRICS_MASK
1057 {
1058 SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1,
1059 SWR_BARYCENTRIC_CENTROID_MASK = 0x2,
1060 SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4,
1061 };
1062
1063 // pixel shader state
1064 struct SWR_PS_STATE
1065 {
1066 // dword 0-1
1067 PFN_PIXEL_KERNEL pfnPixelShader; // @llvm_pfn
1068
1069 // dword 2
1070 uint32_t killsPixel : 1; // pixel shader can kill pixels
1071 uint32_t inputCoverage : 2; // ps uses input coverage
1072 uint32_t writesODepth : 1; // pixel shader writes to depth
1073 uint32_t usesSourceDepth : 1; // pixel shader reads depth
1074 uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel
1075 uint32_t numRenderTargets : 4; // number of render target outputs in use (0-8)
1076 uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position
1077 uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate attributes with
1078 uint32_t usesUAV : 1; // pixel shader accesses UAV
1079 uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test
1080
1081 };
1082
1083 // depth bounds state
1084 struct SWR_DEPTH_BOUNDS_STATE
1085 {
1086 bool depthBoundsTestEnable;
1087 float depthBoundsTestMinValue;
1088 float depthBoundsTestMaxValue;
1089 };
1090