Binner/clipper read viewport array index from the vertex header as needed.
Move viewport state to BACKEND_STATE.
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
// update guardband multipliers for the viewport
void updateGuardbands(API_STATE *pState)
{
- uint32_t numGbs = pState->backendState.readRenderTargetArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
+ uint32_t numGbs = pState->backendState.readViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
for(uint32_t i = 0; i < numGbs; ++i)
{
void SetupMacroTileScissors(DRAW_CONTEXT *pDC)
{
API_STATE *pState = &pDC->pState->state;
- uint32_t numScissors = pState->gsState.emitsViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
+ uint32_t numScissors = pState->backendState.readViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
pState->scissorsTileAligned = true;
for (uint32_t index = 0; index < numScissors; ++index)
uint32_t workerId,
simdvector tri[3],
uint32_t triMask,
- simdscalari primID,
- simdscalari viewportIdx)
+ simdscalari primID)
{
SWR_CONTEXT *pContext = pDC->pContext;
simdscalar vRecipW1 = _simd_set1_ps(1.0f);
simdscalar vRecipW2 = _simd_set1_ps(1.0f);
+ // Read viewport array index if needed
+ simdscalari viewportIdx = _simd_set1_epi32(0);
+ if (state.backendState.readViewportArrayIndex)
+ {
+ simdvector vpiAttrib[3];
+ pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
+
+ // OOB indices => forced to zero.
+ simdscalari vpai = _simd_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ vpai = _simd_max_epi32(_simd_setzero_si(), vpai);
+ simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simdscalari vClearMask = _simd_cmplt_epi32(vpai, vNumViewports);
+ viewportIdx = _simd_and_si(vClearMask, vpai);
+ }
+
if (feState.vpTransformDisable)
{
// RHW is passed in directly when VP transform is disabled
tri[2].v[2] = _simd_mul_ps(tri[2].v[2], vRecipW2);
// Viewport transform to screen space coords
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
viewportTransform<3>(tri, state.vpMatrices, viewportIdx);
}
// Gather the AOS effective scissor rects based on the per-prim VP index.
/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
simdscalari scisXmin, scisYmin, scisXmax, scisYmax;
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
GatherScissors<KNOB_SIMD_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
scisXmin, scisYmin, scisXmax, scisYmax);
uint32_t workerId,
simd16vector tri[3],
uint32_t triMask,
- simd16scalari primID,
- simd16scalari viewportIdx)
+ simd16scalari primID)
{
SWR_CONTEXT *pContext = pDC->pContext;
simd16scalar vRecipW0 = _simd16_set1_ps(1.0f);
simd16scalar vRecipW1 = _simd16_set1_ps(1.0f);
simd16scalar vRecipW2 = _simd16_set1_ps(1.0f);
+
+ simd16scalari viewportIdx = _simd16_set1_epi32(0);
+ if (state.backendState.readViewportArrayIndex)
+ {
+ simd16vector vpiAttrib[3];
+ pa.Assemble_simd16(VERTEX_SGV_SLOT, vpiAttrib);
+
+ // OOB indices => forced to zero.
+ simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ vpai = _simd16_max_epi32(_simd16_setzero_si(), vpai);
+ simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports);
+ viewportIdx = _simd16_and_si(vClearMask, vpai);
+ }
if (feState.vpTransformDisable)
{
tri[2].v[2] = _simd16_mul_ps(tri[2].v[2], vRecipW2);
// Viewport transform to screen space coords
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
viewportTransform<3>(tri, state.vpMatrices, viewportIdx);
}
/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
simd16scalari scisXmin, scisYmin, scisXmax, scisYmax;
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
GatherScissors_simd16<KNOB_SIMD16_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
scisXmin, scisYmin, scisXmax, scisYmax);
// Gather the AOS effective scissor rects based on the per-prim VP index.
/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
simdscalari scisXmin, scisYmin, scisXmax, scisYmax;
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
GatherScissors<KNOB_SIMD_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
scisXmin, scisYmin, scisXmax, scisYmax);
uint32_t workerId,
simdvector prim[3],
uint32_t primMask,
- simdscalari primID,
- simdscalari viewportIdx)
+ simdscalari primID)
{
simdvector& primVerts = prim[0];
const SWR_FRONTEND_STATE& feState = state.frontendState;
const SWR_RASTSTATE& rastState = state.rastState;
+ // Read back viewport index if required
+ simdscalari viewportIdx = _simd_set1_epi32(0);
+ if (state.backendState.readViewportArrayIndex)
+ {
+ simdvector vpiAttrib[1];
+ pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
+ simdscalari vpai = _simd_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+
+ // OOB indices => forced to zero.
+ vpai = _simd_max_epi32(_simd_setzero_si(), vpai);
+ simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simdscalari vClearMask = _simd_cmplt_epi32(vpai, vNumViewports);
+ viewportIdx = _simd_and_si(vClearMask, vpai);
+ }
+
if (!feState.vpTransformDisable)
{
// perspective divide
primVerts.z = _simd_mul_ps(primVerts.z, vRecipW0);
// viewport transform to screen coords
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
viewportTransform<1>(&primVerts, state.vpMatrices, viewportIdx);
}
// Gather the AOS effective scissor rects based on the per-prim VP index.
/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
simd16scalari scisXmin, scisYmin, scisXmax, scisYmax;
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
GatherScissors_simd16<KNOB_SIMD16_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
scisXmin, scisYmin, scisXmax, scisYmax);
uint32_t workerId,
simd16vector prim[3],
uint32_t primMask,
- simd16scalari primID,
- simd16scalari viewportIdx)
+ simd16scalari primID)
{
simd16vector& primVerts = prim[0];
const SWR_FRONTEND_STATE& feState = state.frontendState;
const SWR_RASTSTATE& rastState = state.rastState;
+ // Read back viewport index if required
+ simd16scalari viewportIdx = _simd16_set1_epi32(0);
+ if (state.backendState.readViewportArrayIndex)
+ {
+ simd16vector vpiAttrib[1];
+ pa.Assemble_simd16(VERTEX_SGV_SLOT, vpiAttrib);
+
+ // OOB indices => forced to zero.
+ simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ vpai = _simd16_max_epi32(_simd16_setzero_si(), vpai)
+ simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports);
+ viewportIdx = _simd16_and_si(vClearMask, vpai);
+ }
+
if (!feState.vpTransformDisable)
{
// perspective divide
primVerts.z = _simd16_mul_ps(primVerts.z, vRecipW0);
// viewport transform to screen coords
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
viewportTransform<1>(&primVerts, state.vpMatrices, viewportIdx);
}
// Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
simdscalari scisXmin, scisYmin, scisXmax, scisYmax;
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
GatherScissors<KNOB_SIMD_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
scisXmin, scisYmin, scisXmax, scisYmax);
// Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
simd16scalari scisXmin, scisYmin, scisXmax, scisYmax;
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
GatherScissors_simd16<KNOB_SIMD16_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
scisXmin, scisYmin, scisXmax, scisYmax);
uint32_t workerId,
simdvector prim[],
uint32_t primMask,
- simdscalari primID,
- simdscalari viewportIdx)
+ simdscalari primID)
{
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
simdscalar vRecipW[2] = { _simd_set1_ps(1.0f), _simd_set1_ps(1.0f) };
+ simdscalari viewportIdx = _simd_set1_epi32(0);
+ if (state.backendState.readViewportArrayIndex)
+ {
+ simdvector vpiAttrib[2];
+ pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
+ simdscalari vpai = _simd_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ vpai = _simd_max_epi32(_simd_setzero_si(), vpai);
+
+ // OOB indices => forced to zero.
+ simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simdscalari vClearMask = _simd_cmplt_epi32(vpai, vNumViewports);
+ viewportIdx = _simd_and_si(vClearMask, vpai);
+ }
+
if (!feState.vpTransformDisable)
{
// perspective divide
prim[1].v[2] = _simd_mul_ps(prim[1].v[2], vRecipW[1]);
// viewport transform to screen coords
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
viewportTransform<2>(prim, state.vpMatrices, viewportIdx);
}
uint32_t workerId,
simd16vector prim[3],
uint32_t primMask,
- simd16scalari primID,
- simd16scalari viewportIdx)
+ simd16scalari primID)
{
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
simd16scalar vRecipW[2] = { _simd16_set1_ps(1.0f), _simd16_set1_ps(1.0f) };
+ simd16scalari viewportIdx = _simd16_set1_epi32(0);
+ if (state.backendState.readViewportArrayIndex)
+ {
+ simd16vector vpiAttrib[2];
+ pa.Assemble_simd16(VERTEX_SGV_SLOT, vpiAttrib);
+
+ // OOB indices => forced to zero.
+ simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ vpai = _simd16_max_epi32(_simd16_setzero_si(), vpai);
+ simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports);
+ viewportIdx = _simd16_and_si(vClearMask, vpai);
+ }
+
if (!feState.vpTransformDisable)
{
// perspective divide
prim[1].v[2] = _simd16_mul_ps(prim[1].v[2], vRecipW[1]);
// viewport transform to screen coords
- if (state.gsState.emitsViewportArrayIndex)
+ if (state.backendState.readViewportArrayIndex)
{
viewportTransform<2>(prim, state.vpMatrices, viewportIdx);
}
return i;
}
-void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
+void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipTriangles, pDC->drawId);
Clipper<3> clipper(workerId, pDC);
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+ clipper.ExecuteStage(pa, prims, primMask, primId);
AR_END(FEClipTriangles, 1);
}
-void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
+void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipLines, pDC->drawId);
Clipper<2> clipper(workerId, pDC);
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+ clipper.ExecuteStage(pa, prims, primMask, primId);
AR_END(FEClipLines, 1);
}
-void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
+void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipPoints, pDC->drawId);
Clipper<1> clipper(workerId, pDC);
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+ clipper.ExecuteStage(pa, prims, primMask, primId);
AR_END(FEClipPoints, 1);
}
#if USE_SIMD16_FRONTEND
-void SIMDAPI ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx)
+void SIMDAPI ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipTriangles, pDC->drawId);
Clipper<VERTS_PER_PRIM> clipper(workerId, pDC);
pa.useAlternateOffset = false;
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+ clipper.ExecuteStage(pa, prims, primMask, primId);
AR_END(FEClipTriangles, 1);
}
-void SIMDAPI ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx)
+void SIMDAPI ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipLines, pDC->drawId);
Clipper<VERTS_PER_PRIM> clipper(workerId, pDC);
pa.useAlternateOffset = false;
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+ clipper.ExecuteStage(pa, prims, primMask, primId);
AR_END(FEClipLines, 1);
}
-void SIMDAPI ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx)
+void SIMDAPI ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipPoints, pDC->drawId);
Clipper<VERTS_PER_PRIM> clipper(workerId, pDC);
pa.useAlternateOffset = false;
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+ clipper.ExecuteStage(pa, prims, primMask, primId);
AR_END(FEClipPoints, 1);
}
#endif
// clip SIMD primitives
- void ClipSimd(const simdscalar& vPrimMask, const simdscalar& vClipMask, PA_STATE& pa, const simdscalari& vPrimId, const simdscalari& vViewportIdx)
+ void ClipSimd(const simdscalar& vPrimMask, const simdscalar& vClipMask, PA_STATE& pa, const simdscalari& vPrimId)
{
// input/output vertex store for clipper
simdvertex vertices[7]; // maximum 7 verts generated per triangle
uint32_t* pVertexCount = (uint32_t*)&vNumClippedVerts;
uint32_t* pPrimitiveId = (uint32_t*)&vPrimId;
- uint32_t* pViewportIdx = (uint32_t*)&vViewportIdx;
const simdscalari vOffsets = _mm256_set_epi32(
0 * sizeof(simdvertex), // unused lane
}
clipPa.useAlternateOffset = false;
- pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd_set1_epi32(pPrimitiveId[inputPrim]), _simd_set1_epi32(pViewportIdx[inputPrim]));
+ pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd_set1_epi32(pPrimitiveId[inputPrim]));
}
#else
simdvector attrib[NumVertsPerPrim];
if (assemble)
{
static const uint32_t primMaskMap[] = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff };
- pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd_set1_epi32(pPrimitiveId[inputPrim]), _simd_set1_epi32(pViewportIdx[inputPrim]));
+ pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd_set1_epi32(pPrimitiveId[inputPrim]));
}
#endif
} while (clipPa.NextPrim());
}
#if USE_SIMD16_FRONTEND
- void ClipSimd(const simd16scalar& vPrimMask, const simd16scalar& vClipMask, PA_STATE& pa, const simd16scalari& vPrimId, const simd16scalari& vViewportIdx)
+ void ClipSimd(const simd16scalar& vPrimMask, const simd16scalar& vClipMask, PA_STATE& pa, const simd16scalari& vPrimId)
{
// input/output vertex store for clipper
simd16vertex vertices[7]; // maximum 7 verts generated per triangle
uint32_t* pVertexCount = (uint32_t*)&vNumClippedVerts;
uint32_t* pPrimitiveId = (uint32_t*)&vPrimId;
- uint32_t* pViewportIdx = (uint32_t*)&vViewportIdx;
const simdscalari vOffsets = _simd_set_epi32(
0 * sizeof(simd16vertex), // unused lane
static const uint32_t primMaskMap[] = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff };
clipPa.useAlternateOffset = false;
- pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd16_set1_epi32(pPrimitiveId[inputPrim]), _simd16_set1_epi32(pViewportIdx[inputPrim]));
+ pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd16_set1_epi32(pPrimitiveId[inputPrim]));
}
} while (clipPa.NextPrim());
#endif
// execute the clipper stage
- void ExecuteStage(PA_STATE& pa, simdvector prim[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
+ void ExecuteStage(PA_STATE& pa, simdvector prim[], uint32_t primMask, simdscalari primId)
{
SWR_ASSERT(this->pDC != nullptr);
SWR_CONTEXT* pContext = this->pDC->pContext;
// update clipper invocations pipeline stat
uint32_t numInvoc = _mm_popcnt_u32(primMask);
UPDATE_STAT_FE(CInvocations, numInvoc);
+
+ // Read back viewport index if required
+ simdscalari viewportIdx = _simd_set1_epi32(0);
+ if (state.backendState.readViewportArrayIndex)
+ {
+ simdvector vpiAttrib[NumVertsPerPrim];
+ pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
+ simdscalari vpai = _simd_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+
+ // OOB indices => forced to zero.
+ simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simdscalari vClearMask = _simd_cmplt_epi32(vpai, vNumViewports);
+ viewportIdx = _simd_and_si(vClearMask, vpai);
+ }
ComputeClipCodes(prim, viewportIdx);
AR_BEGIN(FEGuardbandClip, pa.pDC->drawId);
// we have to clip tris, execute the clipper, which will also
// call the binner
- ClipSimd(vMask(primMask), vMask(clipMask), pa, primId, viewportIdx);
+ ClipSimd(vMask(primMask), vMask(clipMask), pa, primId);
AR_END(FEGuardbandClip, 1);
}
else if (validMask)
UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask));
// forward valid prims directly to binner
- pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx);
+ pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId);
}
}
#if USE_SIMD16_FRONTEND
- void ExecuteStage(PA_STATE& pa, simd16vector prim[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx)
+ void ExecuteStage(PA_STATE& pa, simd16vector prim[], uint32_t primMask, simd16scalari primId)
{
SWR_ASSERT(pa.pDC != nullptr);
SWR_CONTEXT* pContext = pa.pDC->pContext;
uint32_t numInvoc = _mm_popcnt_u32(primMask);
UPDATE_STAT_FE(CInvocations, numInvoc);
+ // Read back viewport index if required
+ simd16scalari viewportIdx = _simd16_set1_epi32(0);
+ if (state.backendState.readViewportArrayIndex)
+ {
+ simd16vector vpiAttrib[NumVertsPerPrim];
+ pa.Assemble_simd16(VERTEX_SGV_SLOT, vpiAttrib);
+
+ // OOB indices => forced to zero.
+ simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports);
+ viewportIdx = _simd16_and_si(vClearMask, vpai);
+ }
ComputeClipCodes(prim, viewportIdx);
// cull prims with NAN coords
AR_BEGIN(FEGuardbandClip, pa.pDC->drawId);
// we have to clip tris, execute the clipper, which will also
// call the binner
- ClipSimd(vMask16(primMask), vMask16(clipMask), pa, primId, viewportIdx);
+ ClipSimd(vMask16(primMask), vMask16(clipMask), pa, primId);
AR_END(FEGuardbandClip, 1);
}
else if (validMask)
UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask));
// forward valid prims directly to binner
- pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx);
+ pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId);
}
}
// pipeline stage functions
-void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx);
-void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx);
-void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx);
+void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId);
+void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId);
+void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId);
#if USE_SIMD16_FRONTEND
-void SIMDAPI ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx);
-void SIMDAPI ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx);
-void SIMDAPI ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId, simd16scalari viewportIdx);
+void SIMDAPI ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId);
+void SIMDAPI ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId);
+void SIMDAPI ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari primId);
#endif
// function signature for pipeline stages that execute after primitive assembly
typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[],
- uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
+ uint32_t primMask, simdscalari primID);
#if ENABLE_AVX512_SIMD16
// function signature for pipeline stages that execute after primitive assembly
typedef void(SIMDAPI *PFN_PROCESS_PRIMS_SIMD16)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[],
- uint32_t primMask, simd16scalari primID, simd16scalari viewportIdx);
+ uint32_t primMask, simd16scalari primID);
#endif
OSALIGNLINE(struct) API_STATE
#if USE_SIMD16_FRONTEND
simd16scalari vPrimId = _simd16_set1_epi32(pPrimitiveId[inputPrim]);
- // use viewport array index if GS declares it as an output attribute. Otherwise use index 0.
- simd16scalari vViewPortIdx;
- if (state.gsState.emitsViewportArrayIndex)
- {
- simd16vector vpiAttrib[3];
- gsPa.Assemble_simd16(VERTEX_SGV_SLOT, vpiAttrib);
-
- // OOB indices => forced to zero.
- simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
- simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports);
- vViewPortIdx = _simd16_and_si(vClearMask, vpai);
- }
- else
- {
- vViewPortIdx = _simd16_set1_epi32(0);
- }
-
gsPa.useAlternateOffset = false;
- pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId, vViewPortIdx);
+ pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId);
#else
simdscalari vPrimId = _simd_set1_epi32(pPrimitiveId[inputPrim]);
-
- // use viewport array index if GS declares it as an output attribute. Otherwise use index 0.
- simdscalari vViewPortIdx;
- if (state.gsState.emitsViewportArrayIndex)
- {
- simdvector vpiAttrib[3];
- gsPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
- simdscalari vpai = _simd_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
-
- // OOB indices => forced to zero.
- simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simdscalari vClearMask = _simd_cmplt_epi32(vpai, vNumViewports);
- vViewPortIdx = _simd_and_si(vClearMask, vpai);
- }
- else
- {
- vViewPortIdx = _simd_set1_epi32(0);
- }
-
- pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId, vViewPortIdx);
+ pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId);
#endif
}
}
SWR_ASSERT(pfnClipFunc);
#if USE_SIMD16_FRONTEND
tessPa.useAlternateOffset = false;
- pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID, _simd16_set1_epi32(0));
+ pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID);
#else
pfnClipFunc(pDC, tessPa, workerId, prim,
- GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), _simd_set1_epi32(0));
+ GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID));
#endif
}
}
SWR_ASSERT(pDC->pState->pfnProcessPrims_simd16);
pa.useAlternateOffset = false;
- pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID, _simd16_setzero_si());
+ pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID);
}
}
}
SWR_ASSERT(pDC->pState->pfnProcessPrims);
pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim,
- GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID), _simd_set1_epi32(0));
+ GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID));
}
}
}
#endif
struct PA_STATE_BASE; // forward decl
-void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
-void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
+void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
+void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
#if USE_SIMD16_FRONTEND
-void SIMDAPI BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID, simd16scalari viewportIdx);
-void SIMDAPI BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID, simd16scalari viewportIdx);
+void SIMDAPI BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID);
+void SIMDAPI BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID);
#endif
// instance count
uint32_t instanceCount;
- // geometry shader emits ViewportArrayIndex
- bool emitsViewportArrayIndex;
-
// if true, geometry shader emits a single stream, with separate cut buffer.
// if false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
// to map vertices to streams
SWR_ATTRIB_SWIZZLE swizzleMap[32];
bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the backend
+ bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning
};
pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS];
- pGS->emitsViewportArrayIndex = info->writes_viewport_index;
-
// XXX: single stream for now...
pGS->isSingleStream = true;
pGS->singleStreamID = 0;
(ctx->rasterizer->flatshade ? ctx->fs->flatConstantMask : 0);
backendState.pointSpriteTexCoordMask = ctx->fs->pointSpriteMask;
- if (ctx->gs)
- backendState.readRenderTargetArrayIndex =
- ctx->gs->info.base.writes_layer;
- else
- backendState.readRenderTargetArrayIndex =
- ctx->vs->info.base.writes_layer;
+ struct tgsi_shader_info *pLastFE =
+ ctx->gs ?
+ &ctx->gs->info.base :
+ &ctx->vs->info.base;
+ backendState.readRenderTargetArrayIndex = pLastFE->writes_layer;
+ backendState.readViewportArrayIndex = pLastFE->writes_viewport_index;
SwrSetBackendState(ctx->swrContext, &backendState);