#include <limits>
#include <iostream>
-//////////////////////////////////////////////////////////////////////////
-/// @brief Helper macro to generate a bitmask
-static INLINE uint32_t GenMask(uint32_t numBits)
-{
- SWR_ASSERT(
- numBits <= (sizeof(uint32_t) * 8), "Too many bits (%d) for %s", numBits, __FUNCTION__);
- return ((1U << numBits) - 1);
-}
-
//////////////////////////////////////////////////////////////////////////
/// @brief FE handler for SwrSync.
/// @param pContext - pointer to SWR context.
/// @brief Return number of verts per primitive.
/// @param topology - topology
/// @param includeAdjVerts - include adjacent verts in primitive vertices
-INLINE uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVerts)
+uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVerts)
{
uint32_t numVerts = 0;
switch (topology)
uint32_t* (&in_ppIndices)[3],
uint32_t in_numPrims,
PRIMITIVE_TOPOLOGY in_binTopology,
- uint32_t numVertsPerPrim) :
+ uint32_t numVertsPerPrim,
+ bool SOA = true) :
PA_STATE(in_pDC, nullptr, 0, in_vertexStride, numVertsPerPrim),
m_pVertexData(in_pVertData), m_attributeStrideInVectors(in_attributeStrideInVectors),
- m_numAttributes(in_numAttributes), m_numPrims(in_numPrims)
+ m_numAttributes(in_numAttributes), m_numPrims(in_numPrims), m_SOA(SOA)
{
#if USE_SIMD16_FRONTEND
m_vPrimId = _simd16_setzero_si();
SIMDSCALARI mask = GenPrimMask(numPrimsToAssemble);
- const float* pBaseAttrib =
- (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
+ const float* pBaseAttrib;
+ if (m_SOA)
+ {
+ pBaseAttrib = (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
+ }
+ else
+ {
+ const float* pVertData = (const float*)m_pVertexData;
+ pBaseAttrib = pVertData + slot * 4;
+ }
+
for (uint32_t i = 0; i < m_numVertsPerPrim; ++i)
{
#if USE_SIMD16_FRONTEND
_simd_castsi_ps(mask),
4); // gcc doesn't like sizeof(float)
#endif
- pBase += m_attributeStrideInVectors * SIMD_WIDTH;
+ if (m_SOA)
+ {
+ pBase += m_attributeStrideInVectors * SIMD_WIDTH;
+ }
+ else
+ {
+ pBase += sizeof(float);
+ }
}
}
SIMDSCALARI mask = GenPrimMask(numPrimsToAssemble);
- const float* pBaseAttrib =
- (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
+ const float* pBaseAttrib;
+ if (m_SOA)
+ {
+ pBaseAttrib = (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
+ }
+ else
+ {
+ const float* pVertData = (const float*)m_pVertexData;
+ pBaseAttrib = pVertData + slot * 4;
+ }
+
for (uint32_t i = 0; i < m_numVertsPerPrim; ++i)
{
#if USE_SIMD16_FRONTEND
SIMDSCALARI indices = _simd16_load_si((const SIMDSCALARI*)m_ppIndices[i]);
+ if (!m_SOA)
+ {
+ indices = _simd16_mul_epi32(indices, _simd16_set1_epi32(vertexStride / 4));
+ }
#else
SIMDSCALARI indices = _simd_load_si((const SIMDSCALARI*)m_ppIndices[i]);
#endif
4 /* gcc doesn't like sizeof(float) */);
verts[i].v[c] = _simd16_insert_ps(_simd16_setzero_ps(), temp, 0);
#endif
- pBase += m_attributeStrideInVectors * SIMD_WIDTH;
+ if (m_SOA)
+ {
+ pBase += m_attributeStrideInVectors * SIMD_WIDTH;
+ }
+ else
+ {
+ pBase++;
+ }
}
}
SWR_ASSERT(primIndex < PA_TESS::NumPrims());
- const float* pVertDataBase =
- (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
+ const float* pVertDataBase;
+ if (m_SOA)
+ {
+ pVertDataBase = (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
+ }
+ else
+ {
+ const float* pVertData = (const float*)m_pVertexData;
+ pVertDataBase = pVertData + slot * 4;
+ };
for (uint32_t i = 0; i < m_numVertsPerPrim; ++i)
{
#if USE_SIMD16_FRONTEND
uint32_t index = useAlternateOffset ? m_ppIndices[i][primIndex + SIMD_WIDTH_DIV2]
: m_ppIndices[i][primIndex];
+ if (!m_SOA)
+ {
+ index *= (vertexStride / 4);
+ }
#else
uint32_t index = m_ppIndices[i][primIndex];
#endif
for (uint32_t c = 0; c < 4; ++c)
{
pVert[c] = pVertData[index];
- pVertData += m_attributeStrideInVectors * SIMD_WIDTH;
+ if (m_SOA)
+ {
+ pVertData += m_attributeStrideInVectors * SIMD_WIDTH;
+ }
+ else
+ {
+ pVertData++;
+ }
}
+
}
}
#endif
SIMDVERTEX junkVertex; // junk SIMDVERTEX for unimplemented API
SIMDMASK junkIndices; // temporary index store for unused virtual function
+
+ bool m_SOA;
};
// Primitive Assembler factory class, responsible for creating and initializing the correct