From 8b069207965b8cbfcb9de0e06ff03dadc8dbd291 Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Thu, 7 Dec 2017 11:59:45 -0600 Subject: [PATCH] swr/rast: Pull most of the VPAI manipulation out of the binner/clipper Move out of binner/clipper; hand them down from the frontend code instead. Reviewed-by: Bruce Cherniak --- .../drivers/swr/rasterizer/core/binner.cpp | 124 +++++------------- .../drivers/swr/rasterizer/core/clip.cpp | 25 ++-- .../drivers/swr/rasterizer/core/clip.h | 58 +++----- .../drivers/swr/rasterizer/core/context.h | 4 +- .../drivers/swr/rasterizer/core/frontend.cpp | 112 +++++++++++++++- .../drivers/swr/rasterizer/core/frontend.h | 8 +- src/gallium/drivers/swr/rasterizer/core/pa.h | 4 +- 7 files changed, 177 insertions(+), 158 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index 22996c5a5d1..a664ed812fe 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -307,7 +307,8 @@ void SIMDCALL BinTrianglesImpl( uint32_t workerId, typename SIMD_T::Vec4 tri[3], uint32_t triMask, - typename SIMD_T::Integer const &primID) + typename SIMD_T::Integer const &primID, + typename SIMD_T::Integer const &viewportIdx) { SWR_CONTEXT *pContext = pDC->pContext; @@ -323,31 +324,6 @@ void SIMDCALL BinTrianglesImpl( typename SIMD_T::Float vRecipW1 = SIMD_T::set1_ps(1.0f); typename SIMD_T::Float vRecipW2 = SIMD_T::set1_ps(1.0f); - typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si(); - typename SIMD_T::Vec4 vpiAttrib[3]; - typename SIMD_T::Integer vpai = SIMD_T::setzero_si(); - - if (state.backendState.readViewportArrayIndex) - { - pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); - - vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); - } - - - if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0 - { - // OOB indices => forced to zero. - vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si()); - typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); - typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports); - viewportIdx = SIMD_T::and_si(vClearMask, vpai); - } - else - { - viewportIdx = vpai; - } - if (feState.vpTransformDisable) { // RHW is passed in directly when VP transform is disabled @@ -375,7 +351,7 @@ void SIMDCALL BinTrianglesImpl( tri[2].v[2] = SIMD_T::mul_ps(tri[2].v[2], vRecipW2); // Viewport transform to screen space coords - if (state.backendState.readViewportArrayIndex) + if (pa.viewportArrayActive) { viewportTransform<3>(tri, state.vpMatrices, viewportIdx); } @@ -568,8 +544,8 @@ void SIMDCALL BinTrianglesImpl( /// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer. { typename SIMD_T::Integer scisXmin, scisYmin, scisXmax, scisYmax; + if (pa.viewportArrayActive) - if (state.backendState.readViewportArrayIndex) { GatherScissors(&state.scissorsInFixedPoint[0], pViewportIndex, scisXmin, scisYmin, scisXmax, scisYmax); } @@ -786,9 +762,10 @@ void BinTriangles( uint32_t workerId, simdvector tri[3], uint32_t triMask, - simdscalari const &primID) + simdscalari const &primID, + simdscalari const &viewportIdx) { - BinTrianglesImpl(pDC, pa, workerId, tri, triMask, primID); + BinTrianglesImpl(pDC, pa, workerId, tri, triMask, primID, viewportIdx); } #if USE_SIMD16_FRONTEND @@ -799,9 +776,10 @@ void SIMDCALL BinTriangles_simd16( uint32_t workerId, simd16vector tri[3], uint32_t triMask, - simd16scalari const &primID) + simd16scalari const &primID, + simd16scalari const &viewportIdx) { - BinTrianglesImpl(pDC, pa, workerId, tri, triMask, primID); + BinTrianglesImpl(pDC, pa, workerId, tri, triMask, primID, viewportIdx); } #endif @@ -1026,7 +1004,7 @@ void BinPostSetupPointsImpl( { typename SIMD_T::Integer scisXmin, scisYmin, scisXmax, scisYmax; - if (state.backendState.readViewportArrayIndex) + if (pa.viewportArrayActive) { GatherScissors(&state.scissorsInFixedPoint[0], pViewportIndex, scisXmin, scisYmin, scisXmax, scisYmax); } @@ -1176,38 +1154,13 @@ void BinPointsImpl( uint32_t workerId, typename SIMD_T::Vec4 prim[3], uint32_t primMask, - typename SIMD_T::Integer const &primID) + typename SIMD_T::Integer const &primID, + typename SIMD_T::Integer const &viewportIdx) { const API_STATE& state = GetApiState(pDC); const SWR_FRONTEND_STATE& feState = state.frontendState; const SWR_RASTSTATE& rastState = state.rastState; - // Read back viewport index if required - typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si(); - typename SIMD_T::Vec4 vpiAttrib[1]; - typename SIMD_T::Integer vpai = SIMD_T::setzero_si(); - - if (state.backendState.readViewportArrayIndex) - { - pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); - - vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); - } - - - if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0 - { - // OOB indices => forced to zero. - vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si()); - typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); - typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports); - viewportIdx = SIMD_T::and_si(vClearMask, vpai); - } - else - { - viewportIdx = vpai; - } - if (!feState.vpTransformDisable) { // perspective divide @@ -1218,7 +1171,7 @@ void BinPointsImpl( prim[0].z = SIMD_T::mul_ps(prim[0].z, vRecipW0); // viewport transform to screen coords - if (state.backendState.readViewportArrayIndex) + if (pa.viewportArrayActive) { viewportTransform<1>(prim, state.vpMatrices, viewportIdx); } @@ -1249,7 +1202,8 @@ void BinPoints( uint32_t workerId, simdvector prim[3], uint32_t primMask, - simdscalari const &primID) + simdscalari const &primID, + simdscalari const &viewportIdx) { BinPointsImpl( pDC, @@ -1257,7 +1211,8 @@ void BinPoints( workerId, prim, primMask, - primID); + primID, + viewportIdx); } #if USE_SIMD16_FRONTEND @@ -1267,7 +1222,8 @@ void SIMDCALL BinPoints_simd16( uint32_t workerId, simd16vector prim[3], uint32_t primMask, - simd16scalari const &primID) + simd16scalari const &primID, + simd16scalari const &viewportIdx) { BinPointsImpl( pDC, @@ -1275,7 +1231,8 @@ void SIMDCALL BinPoints_simd16( workerId, prim, primMask, - primID); + primID, + viewportIdx); } #endif @@ -1362,7 +1319,7 @@ void BinPostSetupLinesImpl( { typename SIMD_T::Integer scisXmin, scisYmin, scisXmax, scisYmax; - if (state.backendState.readViewportArrayIndex) + if (pa.viewportArrayActive) { GatherScissors(&state.scissorsInFixedPoint[0], pViewportIndex, scisXmin, scisYmin, scisXmax, scisYmax); } @@ -1513,7 +1470,8 @@ void SIMDCALL BinLinesImpl( uint32_t workerId, typename SIMD_T::Vec4 prim[3], uint32_t primMask, - typename SIMD_T::Integer const &primID) + typename SIMD_T::Integer const &primID, + typename SIMD_T::Integer const &viewportIdx) { const API_STATE& state = GetApiState(pDC); const SWR_RASTSTATE& rastState = state.rastState; @@ -1521,26 +1479,6 @@ void SIMDCALL BinLinesImpl( typename SIMD_T::Float vRecipW[2] = { SIMD_T::set1_ps(1.0f), SIMD_T::set1_ps(1.0f) }; - typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si(); - typename SIMD_T::Vec4 vpiAttrib[2]; - typename SIMD_T::Integer vpai = SIMD_T::setzero_si(); - - if (state.backendState.readViewportArrayIndex) - { - pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); - vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); - } - - - if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0 - { - // OOB indices => forced to zero. - vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si()); - typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); - typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports); - viewportIdx = SIMD_T::and_si(vClearMask, vpai); - } - if (!feState.vpTransformDisable) { // perspective divide @@ -1557,7 +1495,7 @@ void SIMDCALL BinLinesImpl( prim[1].v[2] = SIMD_T::mul_ps(prim[1].v[2], vRecipW[1]); // viewport transform to screen coords - if (state.backendState.readViewportArrayIndex) + if (pa.viewportArrayActive) { viewportTransform<2>(prim, state.vpMatrices, viewportIdx); } @@ -1593,9 +1531,10 @@ void BinLines( uint32_t workerId, simdvector prim[], uint32_t primMask, - simdscalari const &primID) + simdscalari const &primID, + simdscalari const &viewportIdx) { - BinLinesImpl(pDC, pa, workerId, prim, primMask, primID); + BinLinesImpl(pDC, pa, workerId, prim, primMask, primID, viewportIdx); } #if USE_SIMD16_FRONTEND @@ -1605,9 +1544,10 @@ void SIMDCALL BinLines_simd16( uint32_t workerId, simd16vector prim[3], uint32_t primMask, - simd16scalari const &primID) + simd16scalari const &primID, + simd16scalari const &viewportIdx) { - BinLinesImpl(pDC, pa, workerId, prim, primMask, primID); + BinLinesImpl(pDC, pa, workerId, prim, primMask, primID, viewportIdx); } #endif diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.cpp b/src/gallium/drivers/swr/rasterizer/core/clip.cpp index a40f077beab..d4da2c3badf 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/clip.cpp @@ -160,35 +160,35 @@ int ClipTriToPlane( const float *pInPts, int numInPts, return i; } -void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId) +void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx) { SWR_CONTEXT *pContext = pDC->pContext; AR_BEGIN(FEClipTriangles, pDC->drawId); Clipper clipper(workerId, pDC); - clipper.ExecuteStage(pa, prims, primMask, primId); + clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx); AR_END(FEClipTriangles, 1); } -void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId) +void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx) { SWR_CONTEXT *pContext = pDC->pContext; AR_BEGIN(FEClipLines, pDC->drawId); Clipper clipper(workerId, pDC); - clipper.ExecuteStage(pa, prims, primMask, primId); + clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx); AR_END(FEClipLines, 1); } -void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId) +void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx) { SWR_CONTEXT *pContext = pDC->pContext; AR_BEGIN(FEClipPoints, pDC->drawId); Clipper clipper(workerId, pDC); - clipper.ExecuteStage(pa, prims, primMask, primId); + clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx); AR_END(FEClipPoints, 1); } #if USE_SIMD16_FRONTEND -void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId) +void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx) { SWR_CONTEXT *pContext = pDC->pContext; AR_BEGIN(FEClipTriangles, pDC->drawId); @@ -198,12 +198,12 @@ void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t wor Clipper clipper(workerId, pDC); pa.useAlternateOffset = false; - clipper.ExecuteStage(pa, prims, primMask, primId); + clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx); AR_END(FEClipTriangles, 1); } -void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId) +void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx) { SWR_CONTEXT *pContext = pDC->pContext; AR_BEGIN(FEClipLines, pDC->drawId); @@ -213,12 +213,12 @@ void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerI Clipper clipper(workerId, pDC); pa.useAlternateOffset = false; - clipper.ExecuteStage(pa, prims, primMask, primId); + clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx); AR_END(FEClipLines, 1); } -void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId) +void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx) { SWR_CONTEXT *pContext = pDC->pContext; AR_BEGIN(FEClipPoints, pDC->drawId); @@ -228,10 +228,9 @@ void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t worker Clipper clipper(workerId, pDC); pa.useAlternateOffset = false; - clipper.ExecuteStage(pa, prims, primMask, primId); + clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx); AR_END(FEClipPoints, 1); } #endif - diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index 9d8bbc19e63..148f661ab42 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -178,11 +178,11 @@ struct BinnerChooser }; } - void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD256::Vec4 prims[], uint32_t primMask, SIMD256::Integer const &primID) + void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD256::Vec4 prims[], uint32_t primMask, SIMD256::Integer const &primID, SIMD256::Integer &viewportIdx) { SWR_ASSERT(pfnBinFunc != nullptr); - pfnBinFunc(pDC, pa, workerId, prims, primMask, primID); + pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx); } }; @@ -231,11 +231,11 @@ struct BinnerChooser }; } - void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD512::Vec4 prims[], uint32_t primMask, SIMD512::Integer const &primID) + void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD512::Vec4 prims[], uint32_t primMask, SIMD512::Integer const &primID, SIMD512::Integer &viewportIdx) { SWR_ASSERT(pfnBinFunc != nullptr); - pfnBinFunc(pDC, pa, workerId, prims, primMask, primID); + pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx); } }; @@ -437,7 +437,7 @@ public: return SIMD_T::movemask_ps(vClipCullMask); } - void ClipSimd(const typename SIMD_T::Float &vPrimMask, const typename SIMD_T::Float &vClipMask, PA_STATE &pa, const typename SIMD_T::Integer &vPrimId) + void ClipSimd(const typename SIMD_T::Float &vPrimMask, const typename SIMD_T::Float &vClipMask, PA_STATE &pa, const typename SIMD_T::Integer &vPrimId, const typename SIMD_T::Integer &vViewportIdx) { // input/output vertex store for clipper SIMDVERTEX_T vertices[7]; // maximum 7 verts generated per triangle @@ -538,6 +538,7 @@ public: const uint32_t *pVertexCount = reinterpret_cast(&vNumClippedVerts); const uint32_t *pPrimitiveId = reinterpret_cast(&vPrimId); + const uint32_t *pViewportIdx = reinterpret_cast(&vViewportIdx); const SIMD256::Integer vOffsets = SIMD256::set_epi32( 0 * sizeof(SIMDVERTEX_T), // unused lane @@ -642,12 +643,14 @@ public: } PA_STATE_OPT clipPA(pDC, numEmittedPrims, reinterpret_cast(&transposedPrims[0]), numEmittedVerts, SWR_VTX_NUM_SLOTS, true, NumVertsPerPrim, clipTopology); + clipPA.viewportArrayActive = pa.viewportArrayActive; static const uint32_t primMaskMap[] = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f }; const uint32_t primMask = primMaskMap[numEmittedPrims]; const typename SIMD_T::Integer primID = SIMD_T::set1_epi32(pPrimitiveId[inputPrim]); + const typename SIMD_T::Integer viewportIdx = SIMD_T::set1_epi32(pViewportIdx[inputPrim]); while (clipPA.GetNextStreamOutput()) { @@ -659,7 +662,7 @@ public: if (assemble) { - binner.pfnBinFunc(pDC, clipPA, workerId, attrib, primMask, primID); + binner.pfnBinFunc(pDC, clipPA, workerId, attrib, primMask, primID, viewportIdx); } } while (clipPA.NextPrim()); @@ -674,7 +677,7 @@ public: UPDATE_STAT_FE(CPrimitives, numClippedPrims); } - void ExecuteStage(PA_STATE &pa, typename SIMD_T::Vec4 prim[], uint32_t primMask, typename SIMD_T::Integer const &primId) + void ExecuteStage(PA_STATE &pa, typename SIMD_T::Vec4 prim[], uint32_t primMask, typename SIMD_T::Integer const &primId, typename SIMD_T::Integer const &viewportIdx) { SWR_ASSERT(pa.pDC != nullptr); @@ -686,31 +689,6 @@ public: uint32_t numInvoc = _mm_popcnt_u32(primMask); UPDATE_STAT_FE(CInvocations, numInvoc); - // Read back viewport index if required - typename SIMD_T::Integer viewportIdx = SIMD_T::setzero_si(); - typename SIMD_T::Vec4 vpiAttrib[NumVertsPerPrim]; - typename SIMD_T::Integer vpai = SIMD_T::setzero_si(); - - if (state.backendState.readViewportArrayIndex) - { - pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); - vpai = SIMD_T::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); - } - - - if (state.backendState.readViewportArrayIndex) // VPAIOffsets are guaranteed 0-15 -- no OOB issues if they are offsets from 0 - { - // OOB indices => forced to zero. - vpai = SIMD_T::max_epi32(vpai, SIMD_T::setzero_si()); - typename SIMD_T::Integer vNumViewports = SIMD_T::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); - typename SIMD_T::Integer vClearMask = SIMD_T::cmplt_epi32(vpai, vNumViewports); - viewportIdx = SIMD_T::and_si(vClearMask, vpai); - } - else - { - viewportIdx = vpai; - } - ComputeClipCodes(prim, viewportIdx); // cull prims with NAN coords @@ -738,7 +716,7 @@ public: AR_BEGIN(FEGuardbandClip, pa.pDC->drawId); // we have to clip tris, execute the clipper, which will also // call the binner - ClipSimd(SIMD_T::vmask_ps(primMask), SIMD_T::vmask_ps(clipMask), pa, primId); + ClipSimd(SIMD_T::vmask_ps(primMask), SIMD_T::vmask_ps(clipMask), pa, primId, viewportIdx); AR_END(FEGuardbandClip, 1); } else if (validMask) @@ -747,7 +725,7 @@ public: UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask)); // forward valid prims directly to binner - binner.pfnBinFunc(this->pDC, pa, this->workerId, prim, validMask, primId); + binner.pfnBinFunc(this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx); } } @@ -1157,12 +1135,12 @@ private: // pipeline stage functions -void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId); -void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId); -void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId); +void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx); +void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx); +void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx); #if USE_SIMD16_FRONTEND -void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId); -void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId); -void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId); +void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx); +void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx); +void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx); #endif diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index ae942f182d7..ef6719384ff 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -214,12 +214,12 @@ struct PA_STATE; // function signature for pipeline stages that execute after primitive assembly typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], - uint32_t primMask, simdscalari const &primID); + uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx); #if ENABLE_AVX512_SIMD16 // function signature for pipeline stages that execute after primitive assembly typedef void(SIMDCALL *PFN_PROCESS_PRIMS_SIMD16)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], - uint32_t primMask, simd16scalari const &primID); + uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx); #endif OSALIGNLINE(struct) API_STATE diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 5a61dc33a07..3de79d600f6 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -988,13 +988,48 @@ static void GeometryShaderStage( { #if USE_SIMD16_FRONTEND simd16scalari vPrimId = _simd16_set1_epi32(pPrimitiveId[inputPrim]); + + // Gather the VPAI from the SVG if provided. + SIMD16::Vec4 vpiAttrib[3]; + SIMD16::Integer vViewportIdx = SIMD16::setzero_si(); + if (state.backendState.readViewportArrayIndex) + { + gsPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); + vViewportIdx = SIMD16::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); + gsPa.viewportArrayActive = true; + } + { + // OOB VPAI indices => forced to zero. + vViewportIdx = SIMD16::max_epi32(vViewportIdx, SIMD16::setzero_si()); + simd16scalari vNumViewports = SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); + simd16scalari vClearMask = SIMD16::cmplt_epi32(vViewportIdx, vNumViewports); + vViewportIdx = SIMD16::and_si(vClearMask, vViewportIdx); + gsPa.useAlternateOffset = false; - pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId); + pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx); } #else simdscalari vPrimId = _simd_set1_epi32(pPrimitiveId[inputPrim]); - pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId); + + // Gather the VPAI from the SVG if provided. + SIMD8::Vec4 vpiAttrib[3]; + SIMD8::Integer vViewportIdx = SIMD8::setzero_si(); + if (state.backendState.readViewportArrayIndex) + { + gsPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); + vViewportIdx = SIMD8::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); + + // OOB VPAI indices => forced to zero. + vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si()); + simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); + simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports); + vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx); + + gsPa.viewportArrayActive = true; + } + + pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx); #endif } } @@ -1337,14 +1372,46 @@ static void TessellationStages( SWR_ASSERT(pfnClipFunc); #if USE_SIMD16_FRONTEND + // Gather the VPAI from the SVG if provided. + simd16scalari vpai = SIMD16::setzero_si(); + if (state.backendState.readViewportArrayIndex) + { + simd16vector vpiAttrib[4]; + tessPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); + vpai = SIMD16::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); + tessPa.viewportArrayActive = true; + } + { + // OOB VPAI indices => forced to zero. + vpai = SIMD16::max_epi32(vpai, SIMD16::setzero_si()); + simd16scalari vNumViewports = SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); + simd16scalari vClearMask = SIMD16::cmplt_epi32(vpai, vNumViewports); + vpai = SIMD16::and_si(vClearMask, vpai); + tessPa.useAlternateOffset = false; - pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID); + pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID, vpai); } #else + // Gather the VPAI from the SVG if provided. + SIMD8::Vec4 vpiAttrib[3]; + SIMD8::Integer vViewportIdx = SIMD8::setzero_si(); + if (state.backendState.readViewportArrayIndex) + { + tessPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); + vViewportIdx = SIMD8::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); + + // OOB VPAI indices => forced to zero. + vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si()); + simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); + simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports); + vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx); + + tessPa.viewportArrayActive = true; + } pfnClipFunc(pDC, tessPa, workerId, prim, - GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID)); + GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), vViewportIdx); #endif } } @@ -1736,9 +1803,25 @@ void ProcessDraw( if (HasRastT::value) { SWR_ASSERT(pDC->pState->pfnProcessPrims_simd16); + // Gather the VPAI from the SVG if provided. + simd16scalari vpai = SIMD16::setzero_si(); + if (state.backendState.readViewportArrayIndex) + { + simd16vector vpiAttrib[4]; + pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); + vpai = SIMD16::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); + pa.viewportArrayActive = true; + } + { + // OOB VPAI indices => forced to zero. + vpai = SIMD16::max_epi32(vpai, SIMD16::setzero_si()); + simd16scalari vNumViewports = SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); + simd16scalari vClearMask = SIMD16::cmplt_epi32(vpai, vNumViewports); + vpai = SIMD16::and_si(vClearMask, vpai); + pa.useAlternateOffset = false; - pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID); + pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID, vpai); } } } @@ -1900,8 +1983,25 @@ void ProcessDraw( { SWR_ASSERT(pDC->pState->pfnProcessPrims); + // Gather the VPAI from the SVG if provided. + SIMD8::Vec4 vpiAttrib[3]; + SIMD8::Integer vViewportIdx = SIMD8::setzero_si(); + if (state.backendState.readViewportArrayIndex) + { + pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); + vViewportIdx = SIMD8::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); + + // OOB VPAI indices => forced to zero. + vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si()); + simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); + simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports); + vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx); + + pa.viewportArrayActive = true; + } + pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim, - GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID)); + GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID), vViewportIdx); } } } diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h index 11099d6449d..e2ca1274c51 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.h +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h @@ -389,10 +389,10 @@ PFN_PROCESS_PRIMS_SIMD16 GetBinTrianglesFunc_simd16(bool IsConservative); #endif struct PA_STATE_BASE; // forward decl -void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID); -void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID); +void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx); +void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx); #if USE_SIMD16_FRONTEND -void SIMDCALL BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID); -void SIMDCALL BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID); +void SIMDCALL BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx); +void SIMDCALL BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx); #endif diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h index 13f99cb5461..a0160d4a40c 100644 --- a/src/gallium/drivers/swr/rasterizer/core/pa.h +++ b/src/gallium/drivers/swr/rasterizer/core/pa.h @@ -77,9 +77,11 @@ struct PA_STATE #if ENABLE_AVX512_SIMD16 bool useAlternateOffset{ false }; +#endif + + bool viewportArrayActive{ false }; uint32_t numVertsPerPrim{ 0 }; -#endif PA_STATE(){} PA_STATE(DRAW_CONTEXT *in_pDC, uint8_t* in_pStreamBase, uint32_t in_streamSizeInVerts, uint32_t in_vertexStride, uint32_t in_numVertsPerPrim) : pDC(in_pDC), pStreamBase(in_pStreamBase), streamSizeInVerts(in_streamSizeInVerts), vertexStride(in_vertexStride), numVertsPerPrim(in_numVertsPerPrim) {} -- 2.30.2