// Write all entries into primitive data buffer for SOS.
while (_BitScanForward(&slot, soMask))
{
- __m128 attrib[MAX_NUM_VERTS_PER_PRIM]; // prim attribs (always 4 wide)
- uint32_t paSlot = slot + VERTEX_ATTRIB_START_SLOT;
+ simd4scalar attrib[MAX_NUM_VERTS_PER_PRIM]; // prim attribs (always 4 wide)
+ uint32_t paSlot = slot + soState.vertexAttribOffset[streamIndex];
pa.AssembleSingle(paSlot, primIndex, attrib);
// Attribute offset is relative offset from start of vertex.
tlsGsContext.PrimitiveID = primID;
uint32_t numVertsPerPrim = NumVertsPerPrim(pa.binTopology, true);
- simdvector attrib[MAX_ATTRIBUTES];
+ simdvector attrib[MAX_NUM_VERTS_PER_PRIM];
// assemble all attributes for the input primitive
for (uint32_t slot = 0; slot < pState->numInputAttribs; ++slot)
{
- uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + slot;
+ uint32_t attribSlot = pState->vertexAttribOffset + slot;
pa.Assemble(attribSlot, attrib);
for (uint32_t i = 0; i < numVertsPerPrim; ++i)
{
- tlsGsContext.vert[i].attrib[attribSlot] = attrib[i];
+ tlsGsContext.vert[i].attrib[VERTEX_ATTRIB_START_SLOT + slot] = attrib[i];
}
}
if (HasStreamOutT::value)
{
+#if ENABLE_AVX512_SIMD16
gsPa.useAlternateOffset = false;
+#endif
StreamOut(pDC, gsPa, workerId, pSoPrimData, stream);
}
#if USE_SIMD16_FRONTEND
simd16scalari vPrimId = _simd16_set1_epi32(pPrimitiveId[inputPrim]);
- // use viewport array index if GS declares it as an output attribute. Otherwise use index 0.
- simd16scalari vViewPortIdx;
- if (state.gsState.emitsViewportArrayIndex)
- {
- simd16vector vpiAttrib[3];
- gsPa.Assemble_simd16(VERTEX_SGV_SLOT, vpiAttrib);
-
- // OOB indices => forced to zero.
- simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
- simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports);
- vViewPortIdx = _simd16_and_si(vClearMask, vpai);
- }
- else
- {
- vViewPortIdx = _simd16_set1_epi32(0);
- }
-
gsPa.useAlternateOffset = false;
- pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId, vViewPortIdx);
+ pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId);
#else
simdscalari vPrimId = _simd_set1_epi32(pPrimitiveId[inputPrim]);
-
- // use viewport array index if GS declares it as an output attribute. Otherwise use index 0.
- simdscalari vViewPortIdx;
- if (state.gsState.emitsViewportArrayIndex)
- {
- simdvector vpiAttrib[3];
- gsPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
- simdscalari vpai = _simd_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
-
- // OOB indices => forced to zero.
- simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simdscalari vClearMask = _simd_cmplt_epi32(vpai, vNumViewports);
- vViewPortIdx = _simd_and_si(vClearMask, vpai);
- }
- else
- {
- vViewPortIdx = _simd_set1_epi32(0);
- }
-
- pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId, vViewPortIdx);
+ pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId);
#endif
}
}
// assemble all attributes for the input primitives
for (uint32_t slot = 0; slot < tsState.numHsInputAttribs; ++slot)
{
- uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + slot;
+ uint32_t attribSlot = tsState.vertexAttribOffset + slot;
pa.Assemble(attribSlot, simdattrib);
for (uint32_t i = 0; i < numVertsPerPrim; ++i)
{
- hsContext.vert[i].attrib[attribSlot] = simdattrib[i];
+ hsContext.vert[i].attrib[VERTEX_ATTRIB_START_SLOT + slot] = simdattrib[i];
}
}
{
if (HasStreamOutT::value)
{
+#if ENABLE_AVX512_SIMD16
tessPa.useAlternateOffset = false;
+#endif
StreamOut(pDC, tessPa, workerId, pSoPrimData, 0);
}
SWR_ASSERT(pfnClipFunc);
#if USE_SIMD16_FRONTEND
tessPa.useAlternateOffset = false;
- pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID, _simd16_set1_epi32(0));
+ pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID);
#else
pfnClipFunc(pDC, tessPa, workerId, prim,
- GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), _simd_set1_epi32(0));
+ GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID));
#endif
}
}
SWR_ASSERT(pDC->pState->pfnProcessPrims_simd16);
pa.useAlternateOffset = false;
- pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID, _simd16_setzero_si());
+ pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID);
}
}
}
}
#else
- simdvertex vin;
SWR_VS_CONTEXT vsContext;
-
- vsContext.pVin = &vin;
-
SWR_FETCH_CONTEXT fetchInfo = { 0 };
fetchInfo.pStreams = &state.vertexBuffers[0];
}
simdvertex& vout = pa.GetNextVsOutput();
+ vsContext.pVin = &vout;
vsContext.pVout = &vout;
if (i < endVertex)
// 1. Execute FS/VS for a single SIMD.
AR_BEGIN(FEFetchShader, pDC->drawId);
- state.pfnFetchFunc(fetchInfo, vin);
+ state.pfnFetchFunc(fetchInfo, vout);
AR_END(FEFetchShader, 0);
// forward fetch generated vertex IDs to the vertex shader
SWR_ASSERT(pDC->pState->pfnProcessPrims);
pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim,
- GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID), _simd_set1_epi32(0));
+ GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID));
}
}
}