uint32_t soVertsPerPrim = NumVertsPerPrim(pa.binTopology, false);
// The pPrimData buffer is sparse in that we allocate memory for all 32 attributes for each vertex.
- uint32_t primDataDwordVertexStride = (KNOB_NUM_ATTRIBUTES * sizeof(float) * 4) / sizeof(uint32_t);
+ uint32_t primDataDwordVertexStride = (SWR_VTX_NUM_SLOTS * sizeof(float) * 4) / sizeof(uint32_t);
SWR_STREAMOUT_CONTEXT soContext = { 0 };
///
/// attribCount will limit the vector copies to those attribs specified
///
-/// note: the stride between vertexes is determinded by KNOB_NUM_ATTRIBUTES
+/// note: the stride between vertexes is determinded by SWR_VTX_NUM_SLOTS
///
void PackPairsOfSimdVertexIntoSimd16Vertex(simd16vertex *vertex_simd16, const simdvertex *vertex, uint32_t vertexCount, uint32_t attribCount)
{
SWR_ASSERT(vertex);
SWR_ASSERT(vertex_simd16);
- SWR_ASSERT(attribCount <= KNOB_NUM_ATTRIBUTES);
+ SWR_ASSERT(attribCount <= SWR_VTX_NUM_SLOTS);
simd16vertex temp;
}
curInputByte >>= 2;
}
-
+
*pCutBuffer++ = outByte;
}
}
tlsGsContext.vert[i].attrib[attribSlot] = attrib[i];
}
}
-
+
// assemble position
pa.Assemble(VERTEX_POSITION_SLOT, attrib);
for (uint32_t i = 0; i < numVertsPerPrim; ++i)
uint8_t* pBase = pInstanceBase + instance * bufferInfo.vertexInstanceStride;
uint8_t* pCutBase = pCutBufferBase + instance * bufferInfo.cutInstanceStride;
-
+
uint32_t numAttribs = state.feNumAttributes;
for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream)
tempVertex_simd16,
reinterpret_cast<const simdvertex *>(pBase),
numEmittedVerts,
- KNOB_NUM_ATTRIBUTES);
+ SWR_VTX_NUM_SLOTS);
#endif
#if USE_SIMD16_FRONTEND
// TODO: this belongs in state.h alongside the simdvector definition, but there is a llvm codegen issue
struct simd16vertex
{
- simd16vector attrib[KNOB_NUM_ATTRIBUTES];
+ simd16vector attrib[SWR_VTX_NUM_SLOTS];
};
#endif
// Maximum supported number of active vertex buffer streams
#define KNOB_NUM_STREAMS 32
-// Maximum supported number of attributes per vertex
-#define KNOB_NUM_ATTRIBUTES 39
-
// Maximum supported active viewports and scissors
#define KNOB_NUM_VIEWPORTS_SCISSORS 16
static const uint32_t vertsPerTri = 3, componentsPerAttrib = 4;
// try to avoid _chkstk insertions; make this thread local
-static THREAD OSALIGNLINE(float) perspAttribsTLS[vertsPerTri * KNOB_NUM_ATTRIBUTES * componentsPerAttrib];
+static THREAD OSALIGNLINE(float) perspAttribsTLS[vertsPerTri * SWR_VTX_NUM_SLOTS * componentsPerAttrib];
INLINE
void ComputeEdgeData(int32_t a, int32_t b, EDGE& edge)
newWorkDesc.pTriBuffer = &newTriBuffer[0];
// create a copy of the attrib buffer to write our adjusted attribs to
- OSALIGNSIMD(float) newAttribBuffer[4 * 3 * KNOB_NUM_ATTRIBUTES];
+ OSALIGNSIMD(float) newAttribBuffer[4 * 3 * SWR_VTX_NUM_SLOTS];
newWorkDesc.pAttribs = &newAttribBuffer[0];
newWorkDesc.pUserClipBuffer = workDesc.pUserClipBuffer;
newWorkDesc.pTriBuffer = &newTriBuffer[0];
// create a copy of the attrib buffer to write our adjusted attribs to
- OSALIGNSIMD(float) newAttribBuffer[4 * 3 * KNOB_NUM_ATTRIBUTES];
+ OSALIGNSIMD(float) newAttribBuffer[4 * 3 * SWR_VTX_NUM_SLOTS];
newWorkDesc.pAttribs = &newAttribBuffer[0];
const __m128 vBloat0 = _mm_set_ps(0.5f, -0.5f, -0.5f, 0.5f);
/// space for up to 32 attributes, as well as any SGV values generated
/// by the pipeline
/////////////////////////////////////////////////////////////////////////
-#define VERTEX_POSITION_SLOT 0
-#define VERTEX_ATTRIB_START_SLOT 1
-#define VERTEX_ATTRIB_END_SLOT 32
-#define VERTEX_RTAI_SLOT 33 // GS writes RenderTargetArrayIndex here
-#define VERTEX_PRIMID_SLOT 34 // GS writes PrimId here
-#define VERTEX_CLIPCULL_DIST_LO_SLOT 35 // VS writes lower 4 clip/cull dist
-#define VERTEX_CLIPCULL_DIST_HI_SLOT 36 // VS writes upper 4 clip/cull dist
-#define VERTEX_POINT_SIZE_SLOT 37 // VS writes point size here
-#define VERTEX_VIEWPORT_ARRAY_INDEX_SLOT 38
+enum SWR_VTX_SLOTS
+{
+ VERTEX_POSITION_SLOT = 0,
+ VERTEX_POSITION_END_SLOT = 0,
+ VERTEX_ATTRIB_START_SLOT = ( 1 + VERTEX_POSITION_END_SLOT),
+ VERTEX_ATTRIB_END_SLOT = (32 + VERTEX_POSITION_END_SLOT),
+ VERTEX_RTAI_SLOT = (33 + VERTEX_POSITION_END_SLOT), // GS writes RenderTargetArrayIndex here
+ VERTEX_PRIMID_SLOT = (34 + VERTEX_POSITION_END_SLOT), // GS writes PrimId here
+ VERTEX_CLIPCULL_DIST_LO_SLOT = (35 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist
+ VERTEX_CLIPCULL_DIST_HI_SLOT = (36 + VERTEX_POSITION_END_SLOT), // VS writes upper 4 clip/cull dist
+ VERTEX_POINT_SIZE_SLOT = (37 + VERTEX_POSITION_END_SLOT), // VS writes point size here
+ VERTEX_VIEWPORT_ARRAY_INDEX_SLOT = (38 + VERTEX_POSITION_END_SLOT),
+ SWR_VTX_NUM_SLOTS = VERTEX_VIEWPORT_ARRAY_INDEX_SLOT,
+};
+
// SoAoSoA
struct simdvertex
{
- simdvector attrib[KNOB_NUM_ATTRIBUTES];
+ simdvector attrib[SWR_VTX_NUM_SLOTS];
};
//////////////////////////////////////////////////////////////////////////
struct ScalarCPoint
{
- ScalarAttrib attrib[KNOB_NUM_ATTRIBUTES];
+ ScalarAttrib attrib[SWR_VTX_NUM_SLOTS];
};
//////////////////////////////////////////////////////////////////////////
struct FETCH_COMPILE_STATE
{
uint32_t numAttribs{ 0 };
- INPUT_ELEMENT_DESC layout[KNOB_NUM_ATTRIBUTES];
+ INPUT_ELEMENT_DESC layout[SWR_VTX_NUM_SLOTS];
SWR_FORMAT indexType;
uint32_t cutIndex{ 0xffffffff };
// increment stream and output buffer pointers
// stream verts are always 32*4 dwords apart
- pStreamData = GEP(pStreamData, C(KNOB_NUM_ATTRIBUTES * 4));
+ pStreamData = GEP(pStreamData, C(SWR_VTX_NUM_SLOTS * 4));
// output buffers offset using pitch in buffer state
for (uint32_t b : activeSOBuffers)