// Vertex Buffers
SWR_VERTEX_BUFFER_STATE vertexBuffers[KNOB_NUM_STREAMS];
- // Index Buffer
- SWR_INDEX_BUFFER_STATE indexBuffer;
+ // GS - Geometry Shader State
+ SWR_GS_STATE gsState;
+ PFN_GS_FUNC pfnGsFunc;
// FS - Fetch Shader State
PFN_FETCH_FUNC pfnFetchFunc;
// VS - Vertex Shader State
PFN_VERTEX_FUNC pfnVertexFunc;
- // GS - Geometry Shader State
- PFN_GS_FUNC pfnGsFunc;
- SWR_GS_STATE gsState;
+ // Index Buffer
+ SWR_INDEX_BUFFER_STATE indexBuffer;
// CS - Compute Shader
PFN_CS_FUNC pfnCsFunc;
// Number of attributes used by the frontend (vs, so, gs)
uint32_t feNumAttributes;
- PRIMITIVE_TOPOLOGY topology;
- bool forceFront;
// RS - Rasterizer State
SWR_RASTSTATE rastState;
SWR_RECT scissorsInFixedPoint[KNOB_NUM_VIEWPORTS_SCISSORS];
bool scissorsTileAligned;
+ bool forceFront;
+ PRIMITIVE_TOPOLOGY topology;
+
+
// Backend state
- SWR_BACKEND_STATE backendState;
+ OSALIGNLINE(SWR_BACKEND_STATE) backendState;
SWR_DEPTH_BOUNDS_STATE depthBoundsState;
DispatchQueue* pDispatch; // Queue for thread groups. (isCompute)
};
DRAW_STATE* pState; // Read-only state. Core should not update this outside of API thread.
- DRAW_DYNAMIC_STATE dynState;
-
CachingArena* pArena;
uint32_t drawId;
FE_WORK FeWork;
+ SYNC_DESC retireCallback; // Call this func when this DC is retired.
+
+ DRAW_DYNAMIC_STATE dynState;
+
volatile OSALIGNLINE(bool) doneFE; // Is FE work done for this draw?
volatile OSALIGNLINE(uint32_t) FeLock;
volatile OSALIGNLINE(uint32_t) threadsDone;
-
- SYNC_DESC retireCallback; // Call this func when this DC is retired.
};
static_assert((sizeof(DRAW_CONTEXT) & 63) == 0, "Invalid size for DRAW_CONTEXT");
#if USE_SIMD16_FRONTEND
uint32_t AlternateOffset; // IN: amount to offset for interleaving even/odd simd8 in simd16vertex output
#if USE_SIMD16_VS
- simd16scalari mask16; // IN: Active mask for shader (16-wide)
- simd16scalari VertexID16; // IN: Vertex ID (16-wide)
+ simd16scalari mask16; // IN: Active mask for shader (16-wide)
+ simd16scalari VertexID16; // IN: Vertex ID (16-wide)
#endif
#endif
};
// in the fetch shader jit
struct SWR_VERTEX_BUFFER_STATE
{
+ gfxptr_t xpData;
uint32_t index;
uint32_t pitch;
- gfxptr_t xpData;
uint32_t size;
- uint32_t numaNode;
uint32_t minVertex; // min vertex (for bounds checking)
uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks
uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for partially OOB vertices
struct SWR_INDEX_BUFFER_STATE
{
+ const void *pIndices;
// Format type for indices (e.g. UINT16, UINT32, etc.)
SWR_FORMAT format; // @llvm_enum
- const void *pIndices;
uint32_t size;
};
struct SWR_STREAMOUT_BUFFER
{
- bool enable;
- bool soWriteEnable;
-
// Pointers to streamout buffers.
uint32_t* pBuffer;
+ // Offset to the SO write offset. If not null then we update offset here.
+ uint32_t* pWriteOffset;
+
+ bool enable;
+ bool soWriteEnable;
+
// Size of buffer in dwords.
uint32_t bufferSize;
// Offset into buffer in dwords. SOS will increment this offset.
uint32_t streamOffset;
-
- // Offset to the SO write offset. If not null then we update offset here.
- uint32_t* pWriteOffset;
-
};
//////////////////////////////////////////////////////////////////////////
{
bool gsEnable;
+ // If true, geometry shader emits a single stream, with separate cut buffer.
+ // If false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
+ // to map vertices to streams
+ bool isSingleStream;
+
// Number of input attributes per vertex. Used by the frontend to
// optimize assembling primitives for GS
uint32_t numInputAttribs;
// Maximum number of verts that can be emitted by a single instance of the GS
uint32_t maxNumVerts;
-
+
// Instance count
uint32_t instanceCount;
- // If true, geometry shader emits a single stream, with separate cut buffer.
- // If false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
- // to map vertices to streams
- bool isSingleStream;
-
// When single stream is enabled, singleStreamID dictates which stream is being output.
// field ignored if isSingleStream is false
uint32_t singleStreamID;
// Set this to non-zero to indicate that the shader outputs a static number of verts. If zero, shader is
// expected to store the final vertex count in the first dword of the gs output stream.
uint32_t staticVertexCount;
+
+ uint32_t pad;
};
+static_assert(sizeof(SWR_GS_STATE) == 64,
+ "Adjust padding to keep size (or remove this assert)");
//////////////////////////////////////////////////////////////////////////
struct SWR_TS_STATE
{
bool tsEnable;
+
SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum
SWR_TS_PARTITIONING partitioning; // @llvm_enum
SWR_TS_DOMAIN domain; // @llvm_enum
float constantColor[4];
// alpha test reference value in unorm8 or float32
- uint32_t alphaTestReference;
+ uint32_t alphaTestReference;
uint32_t sampleMask;
// all RT's have the same sample count
///@todo move this to Output Merger state when we refactor
- SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
+ SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
};
typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
-typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*,
- simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t sample,
+typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*,
+ simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t sample,
uint8_t* pDst, simdvector& vResult, simdscalari* vOMask, simdscalari* vCoverageMask);
typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &);
} provokingVertex;
uint32_t topologyProvokingVertex; // provoking vertex for the draw topology
- // Size of a vertex in simdvector units. Should be sized to the
+ // Size of a vertex in simdvector units. Should be sized to the
// maximum of the input/output of the vertex shader.
uint32_t vsVertexSize;
};
INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
-
+
INLINE void PrecalcSampleData(int numSamples); //@llvm_func
private:
struct SWR_ATTRIB_SWIZZLE
{
- uint16_t sourceAttrib : 5; // source attribute
+ uint16_t sourceAttrib : 5; // source attribute
uint16_t constantSource : 2; // constant source to apply
uint16_t componentOverrideMask : 4; // override component with constant source
};
uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant interpolation
uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be interpreted as tex coordinates
- uint8_t numAttributes; // total number of attributes to send to backend (up to 32)
- uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some calculations for unneeded components
-
- bool swizzleEnable; // when enabled, core will parse the swizzle map when
+ bool swizzleEnable; // when enabled, core will parse the swizzle map when
// setting up attributes for the backend, otherwise
// all attributes up to numAttributes will be sent
- SWR_ATTRIB_SWIZZLE swizzleMap[32];
+ uint8_t numAttributes; // total number of attributes to send to backend (up to 32)
+ uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some calculations for unneeded components
bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the backend
bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning
-
- // Offset to the start of the attributes of the input vertices, in simdvector units
- uint32_t vertexAttribOffset;
// User clip/cull distance enables
uint8_t cullDistanceMask;
uint8_t clipDistanceMask;
+ // padding to ensure swizzleMap starts 64B offset from start of the struct
+ // and that the next fields are dword aligned.
+ uint8_t pad[10];
+
+ // Offset to the start of the attributes of the input vertices, in simdvector units
+ uint32_t vertexAttribOffset;
+
// Offset to clip/cull attrib section of the vertex, in simdvector units
uint32_t vertexClipCullOffset;
+
+ SWR_ATTRIB_SWIZZLE swizzleMap[32];
};
+static_assert(sizeof(SWR_BACKEND_STATE) == 128,
+ "Adjust padding to keep size (or remove this assert)");
union SWR_DEPTH_STENCIL_STATE
enum SWR_PS_POSITION_OFFSET
{
- SWR_PS_POSITION_SAMPLE_NONE,
- SWR_PS_POSITION_SAMPLE_OFFSET,
+ SWR_PS_POSITION_SAMPLE_NONE,
+ SWR_PS_POSITION_SAMPLE_OFFSET,
SWR_PS_POSITION_CENTROID_OFFSET,
SWR_PS_POSITION_OFFSET_COUNT,
};
uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel
uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position
uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate attributes with
- uint32_t usesUAV : 1; // pixel shader accesses UAV
+ uint32_t usesUAV : 1; // pixel shader accesses UAV
uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test
uint8_t renderTargetMask; // Mask of render targets written