#include "core/pa.h"
#include "rdtsc_core.h"
+// Temp storage used by the clipper
+extern THREAD simdvertex tlsTempVertices[7];
+
enum SWR_CLIPCODES
{
// Shift clip codes out of the mantissa to prevent denormalized values when used in float compare.
simdscalari ClipPrims(float* pVertices, const simdscalar& vPrimMask, const simdscalar& vClipMask, int numAttribs)
{
// temp storage
- simdvertex tempVertices[7];
- float* pTempVerts = (float*)&tempVertices[0];
+ float* pTempVerts = (float*)&tlsTempVertices[0];
// zero out num input verts for non-active lanes
simdscalari vNumInPts = _simd_set1_epi32(NumVertsPerPrim);
}
}
+THREAD SWR_GS_CONTEXT tlsGsContext;
+
//////////////////////////////////////////////////////////////////////////
/// @brief Implements GS stage.
/// @param pDC - pointer to draw context.
{
RDTSC_START(FEGeometryShader);
- SWR_GS_CONTEXT gsContext;
SWR_CONTEXT* pContext = pDC->pContext;
const API_STATE& state = GetApiState(pDC);
SWR_ASSERT(pGsOut != nullptr, "GS output buffer should be initialized");
SWR_ASSERT(pCutBuffer != nullptr, "GS output cut buffer should be initialized");
- gsContext.pStream = (uint8_t*)pGsOut;
- gsContext.pCutOrStreamIdBuffer = (uint8_t*)pCutBuffer;
- gsContext.PrimitiveID = primID;
+ tlsGsContext.pStream = (uint8_t*)pGsOut;
+ tlsGsContext.pCutOrStreamIdBuffer = (uint8_t*)pCutBuffer;
+ tlsGsContext.PrimitiveID = primID;
uint32_t numVertsPerPrim = NumVertsPerPrim(pa.binTopology, true);
simdvector attrib[MAX_ATTRIBUTES];
for (uint32_t i = 0; i < numVertsPerPrim; ++i)
{
- gsContext.vert[i].attrib[attribSlot] = attrib[i];
+ tlsGsContext.vert[i].attrib[attribSlot] = attrib[i];
}
}
pa.Assemble(VERTEX_POSITION_SLOT, attrib);
for (uint32_t i = 0; i < numVertsPerPrim; ++i)
{
- gsContext.vert[i].attrib[VERTEX_POSITION_SLOT] = attrib[i];
+ tlsGsContext.vert[i].attrib[VERTEX_POSITION_SLOT] = attrib[i];
}
const uint32_t vertexStride = sizeof(simdvertex);
for (uint32_t instance = 0; instance < pState->instanceCount; ++instance)
{
- gsContext.InstanceID = instance;
- gsContext.mask = GenerateMask(numInputPrims);
+ tlsGsContext.InstanceID = instance;
+ tlsGsContext.mask = GenerateMask(numInputPrims);
// execute the geometry shader
- state.pfnGsFunc(GetPrivateState(pDC), &gsContext);
+ state.pfnGsFunc(GetPrivateState(pDC), &tlsGsContext);
- gsContext.pStream += instanceStride;
- gsContext.pCutOrStreamIdBuffer += cutInstanceStride;
+ tlsGsContext.pStream += instanceStride;
+ tlsGsContext.pCutOrStreamIdBuffer += cutInstanceStride;
}
// set up new binner and state for the GS output topology
// foreach input prim:
// - setup a new PA based on the emitted verts for that prim
// - loop over the new verts, calling PA to assemble each prim
- uint32_t* pVertexCount = (uint32_t*)&gsContext.vertexCount;
+ uint32_t* pVertexCount = (uint32_t*)&tlsGsContext.vertexCount;
uint32_t* pPrimitiveId = (uint32_t*)&primID;
uint32_t totalPrimsGenerated = 0;