From 182cc51a50492926ebf72d4cd38f1e574c768e72 Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Mon, 11 Dec 2017 15:51:46 -0600 Subject: [PATCH] swr/rast: Pull of RTAI gather & offset out of clip/bin code Reviewed-by: Bruce Cherniak --- .../drivers/swr/rasterizer/core/binner.cpp | 118 ++++++-------- .../drivers/swr/rasterizer/core/clip.cpp | 30 ++-- .../drivers/swr/rasterizer/core/clip.h | 35 ++-- .../drivers/swr/rasterizer/core/context.h | 4 +- .../drivers/swr/rasterizer/core/frontend.cpp | 153 +++++++++++++----- .../drivers/swr/rasterizer/core/frontend.h | 8 +- src/gallium/drivers/swr/rasterizer/core/pa.h | 1 + 7 files changed, 203 insertions(+), 146 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index a664ed812fe..7ef87c4443d 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -45,7 +45,8 @@ void BinPostSetupLinesImpl( typename SIMD_T::Float recipW[], uint32_t primMask, typename SIMD_T::Integer const &primID, - typename SIMD_T::Integer const &viewportIdx); + typename SIMD_T::Integer const &viewportIdx, + typename SIMD_T::Integer const &rtIdx); template void BinPostSetupPointsImpl( @@ -55,7 +56,8 @@ void BinPostSetupPointsImpl( typename SIMD_T::Vec4 prim[], uint32_t primMask, typename SIMD_T::Integer const &primID, - typename SIMD_T::Integer const &viewportIdx); + typename SIMD_T::Integer const &viewportIdx, + typename SIMD_T::Integer const &rtIdx); ////////////////////////////////////////////////////////////////////////// /// @brief Processes attributes for the backend based on linkage mask and @@ -308,9 +310,11 @@ void SIMDCALL BinTrianglesImpl( typename SIMD_T::Vec4 tri[3], uint32_t triMask, typename SIMD_T::Integer const &primID, - typename SIMD_T::Integer const &viewportIdx) + typename SIMD_T::Integer const &viewportIdx, + typename SIMD_T::Integer const &rtIdx) { SWR_CONTEXT *pContext = pDC->pContext; + const uint32_t *aRTAI = reinterpret_cast(&rtIdx); AR_BEGIN(FEBinTriangles, pDC->drawId); @@ -604,21 +608,21 @@ endBinTriangles: recipW[0] = vRecipW0; recipW[1] = vRecipW1; - BinPostSetupLinesImpl(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx); + BinPostSetupLinesImpl(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx); line[0] = tri[1]; line[1] = tri[2]; recipW[0] = vRecipW1; recipW[1] = vRecipW2; - BinPostSetupLinesImpl(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx); + BinPostSetupLinesImpl(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx); line[0] = tri[2]; line[1] = tri[0]; recipW[0] = vRecipW2; recipW[1] = vRecipW0; - BinPostSetupLinesImpl(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx); + BinPostSetupLinesImpl(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx); AR_END(FEBinTriangles, 1); return; @@ -626,9 +630,9 @@ endBinTriangles: else if (rastState.fillMode == SWR_FILLMODE_POINT) { // Bin 3 points - BinPostSetupPointsImpl(pDC, pa, workerId, &tri[0], triMask, primID, viewportIdx); - BinPostSetupPointsImpl(pDC, pa, workerId, &tri[1], triMask, primID, viewportIdx); - BinPostSetupPointsImpl(pDC, pa, workerId, &tri[2], triMask, primID, viewportIdx); + BinPostSetupPointsImpl(pDC, pa, workerId, &tri[0], triMask, primID, viewportIdx, rtIdx); + BinPostSetupPointsImpl(pDC, pa, workerId, &tri[1], triMask, primID, viewportIdx, rtIdx); + BinPostSetupPointsImpl(pDC, pa, workerId, &tri[2], triMask, primID, viewportIdx, rtIdx); AR_END(FEBinTriangles, 1); return; @@ -659,22 +663,6 @@ endBinTriangles: TransposeVertices(vHorizZ, tri[0].z, tri[1].z, tri[2].z); TransposeVertices(vHorizW, vRecipW0, vRecipW1, vRecipW2); - // store render target array index - OSALIGNSIMD16(uint32_t) aRTAI[SIMD_WIDTH]; - if (state.backendState.readRenderTargetArrayIndex) - { - typename SIMD_T::Vec4 vRtai[3]; - pa.Assemble(VERTEX_SGV_SLOT, vRtai); - typename SIMD_T::Integer vRtaii; - vRtaii = SIMD_T::castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]); - SIMD_T::store_si(reinterpret_cast(aRTAI), vRtaii); - } - else - { - SIMD_T::store_si(reinterpret_cast(aRTAI), SIMD_T::setzero_si()); - } - - // scan remaining valid triangles and bin each separately while (_BitScanForward(&triIndex, triMask)) { @@ -763,9 +751,10 @@ void BinTriangles( simdvector tri[3], uint32_t triMask, simdscalari const &primID, - simdscalari const &viewportIdx) + simdscalari const &viewportIdx, + simdscalari const &rtIdx) { - BinTrianglesImpl(pDC, pa, workerId, tri, triMask, primID, viewportIdx); + BinTrianglesImpl(pDC, pa, workerId, tri, triMask, primID, viewportIdx, rtIdx); } #if USE_SIMD16_FRONTEND @@ -777,9 +766,10 @@ void SIMDCALL BinTriangles_simd16( simd16vector tri[3], uint32_t triMask, simd16scalari const &primID, - simd16scalari const &viewportIdx) + simd16scalari const &viewportIdx, + simd16scalari const &rtIdx) { - BinTrianglesImpl(pDC, pa, workerId, tri, triMask, primID, viewportIdx); + BinTrianglesImpl(pDC, pa, workerId, tri, triMask, primID, viewportIdx, rtIdx); } #endif @@ -828,7 +818,8 @@ void BinPostSetupPointsImpl( typename SIMD_T::Vec4 prim[], uint32_t primMask, typename SIMD_T::Integer const &primID, - typename SIMD_T::Integer const &viewportIdx) + typename SIMD_T::Integer const &viewportIdx, + typename SIMD_T::Integer const &rtIdx) { SWR_CONTEXT *pContext = pDC->pContext; @@ -896,19 +887,8 @@ void BinPostSetupPointsImpl( SIMD_T::store_ps(reinterpret_cast(aZ), primVerts.z); // store render target array index - OSALIGNSIMD16(uint32_t) aRTAI[SIMD_WIDTH]; - if (state.backendState.readRenderTargetArrayIndex) - { - typename SIMD_T::Vec4 vRtai; - pa.Assemble(VERTEX_SGV_SLOT, &vRtai); - typename SIMD_T::Integer vRtaii = SIMD_T::castps_si(vRtai[VERTEX_SGV_RTAI_COMP]); - SIMD_T::store_si(reinterpret_cast(aRTAI), vRtaii); - } - else - { - SIMD_T::store_si(reinterpret_cast(aRTAI), SIMD_T::setzero_si()); - } - + const uint32_t *aRTAI = reinterpret_cast(&rtIdx); + uint32_t *pPrimID = (uint32_t *)&primID; DWORD primIndex = 0; @@ -1155,7 +1135,8 @@ void BinPointsImpl( typename SIMD_T::Vec4 prim[3], uint32_t primMask, typename SIMD_T::Integer const &primID, - typename SIMD_T::Integer const &viewportIdx) + typename SIMD_T::Integer const &viewportIdx, + typename SIMD_T::Integer const &rtIdx) { const API_STATE& state = GetApiState(pDC); const SWR_FRONTEND_STATE& feState = state.frontendState; @@ -1193,7 +1174,8 @@ void BinPointsImpl( prim, primMask, primID, - viewportIdx); + viewportIdx, + rtIdx); } void BinPoints( @@ -1203,7 +1185,8 @@ void BinPoints( simdvector prim[3], uint32_t primMask, simdscalari const &primID, - simdscalari const &viewportIdx) + simdscalari const &viewportIdx, + simdscalari const &rtIdx) { BinPointsImpl( pDC, @@ -1212,7 +1195,8 @@ void BinPoints( prim, primMask, primID, - viewportIdx); + viewportIdx, + rtIdx); } #if USE_SIMD16_FRONTEND @@ -1223,7 +1207,8 @@ void SIMDCALL BinPoints_simd16( simd16vector prim[3], uint32_t primMask, simd16scalari const &primID, - simd16scalari const &viewportIdx) + simd16scalari const &viewportIdx, + simd16scalari const & rtIdx) { BinPointsImpl( pDC, @@ -1232,7 +1217,8 @@ void SIMDCALL BinPoints_simd16( prim, primMask, primID, - viewportIdx); + viewportIdx, + rtIdx); } #endif @@ -1253,9 +1239,11 @@ void BinPostSetupLinesImpl( typename SIMD_T::Float recipW[], uint32_t primMask, typename SIMD_T::Integer const &primID, - typename SIMD_T::Integer const &viewportIdx) + typename SIMD_T::Integer const &viewportIdx, + typename SIMD_T::Integer const &rtIdx) { SWR_CONTEXT *pContext = pDC->pContext; + const uint32_t *aRTAI = reinterpret_cast(&rtIdx); AR_BEGIN(FEBinLines, pDC->drawId); @@ -1376,20 +1364,6 @@ void BinPostSetupLinesImpl( TransposeVertices(vHorizZ, prim[0].z, prim[1].z, SIMD_T::setzero_ps()); TransposeVertices(vHorizW, vRecipW0, vRecipW1, SIMD_T::setzero_ps()); - // store render target array index - OSALIGNSIMD16(uint32_t) aRTAI[SIMD_WIDTH]; - if (state.backendState.readRenderTargetArrayIndex) - { - typename SIMD_T::Vec4 vRtai[2]; - pa.Assemble(VERTEX_SGV_SLOT, vRtai); - typename SIMD_T::Integer vRtaii = SIMD_T::castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]); - SIMD_T::store_si(reinterpret_cast(aRTAI), vRtaii); - } - else - { - SIMD_T::store_si(reinterpret_cast(aRTAI), SIMD_T::setzero_si()); - } - // scan remaining valid prims and bin each separately DWORD primIndex; while (_BitScanForward(&primIndex, primMask)) @@ -1471,7 +1445,8 @@ void SIMDCALL BinLinesImpl( typename SIMD_T::Vec4 prim[3], uint32_t primMask, typename SIMD_T::Integer const &primID, - typename SIMD_T::Integer const &viewportIdx) + typename SIMD_T::Integer const &viewportIdx, + typename SIMD_T::Integer const & rtIdx) { const API_STATE& state = GetApiState(pDC); const SWR_RASTSTATE& rastState = state.rastState; @@ -1522,7 +1497,8 @@ void SIMDCALL BinLinesImpl( vRecipW, primMask, primID, - viewportIdx); + viewportIdx, + rtIdx); } void BinLines( @@ -1532,9 +1508,10 @@ void BinLines( simdvector prim[], uint32_t primMask, simdscalari const &primID, - simdscalari const &viewportIdx) + simdscalari const &viewportIdx, + simdscalari const &rtIdx) { - BinLinesImpl(pDC, pa, workerId, prim, primMask, primID, viewportIdx); + BinLinesImpl(pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx); } #if USE_SIMD16_FRONTEND @@ -1545,9 +1522,10 @@ void SIMDCALL BinLines_simd16( simd16vector prim[3], uint32_t primMask, simd16scalari const &primID, - simd16scalari const &viewportIdx) + simd16scalari const &viewportIdx, + simd16scalari const &rtIdx) { - BinLinesImpl(pDC, pa, workerId, prim, primMask, primID, viewportIdx); + BinLinesImpl(pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx); } #endif diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.cpp b/src/gallium/drivers/swr/rasterizer/core/clip.cpp index d4da2c3badf..72058029b03 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/clip.cpp @@ -160,35 +160,39 @@ int ClipTriToPlane( const float *pInPts, int numInPts, return i; } -void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx) +void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, + simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx) { SWR_CONTEXT *pContext = pDC->pContext; AR_BEGIN(FEClipTriangles, pDC->drawId); Clipper clipper(workerId, pDC); - clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx); + clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); AR_END(FEClipTriangles, 1); } -void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx) +void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, + simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx) { SWR_CONTEXT *pContext = pDC->pContext; AR_BEGIN(FEClipLines, pDC->drawId); Clipper clipper(workerId, pDC); - clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx); + clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); AR_END(FEClipLines, 1); } -void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx) +void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, + simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx) { SWR_CONTEXT *pContext = pDC->pContext; AR_BEGIN(FEClipPoints, pDC->drawId); Clipper clipper(workerId, pDC); - clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx); + clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); AR_END(FEClipPoints, 1); } #if USE_SIMD16_FRONTEND -void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx) +void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, + simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx) { SWR_CONTEXT *pContext = pDC->pContext; AR_BEGIN(FEClipTriangles, pDC->drawId); @@ -198,12 +202,13 @@ void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t wor Clipper clipper(workerId, pDC); pa.useAlternateOffset = false; - clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx); + clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); AR_END(FEClipTriangles, 1); } -void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx) +void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, + simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx) { SWR_CONTEXT *pContext = pDC->pContext; AR_BEGIN(FEClipLines, pDC->drawId); @@ -213,12 +218,13 @@ void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerI Clipper clipper(workerId, pDC); pa.useAlternateOffset = false; - clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx); + clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); AR_END(FEClipLines, 1); } -void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx) +void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, + simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx) { SWR_CONTEXT *pContext = pDC->pContext; AR_BEGIN(FEClipPoints, pDC->drawId); @@ -228,7 +234,7 @@ void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t worker Clipper clipper(workerId, pDC); pa.useAlternateOffset = false; - clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx); + clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); AR_END(FEClipPoints, 1); } diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index 8b947668d3f..e5e00d49b02 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -178,11 +178,11 @@ struct BinnerChooser }; } - void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD256::Vec4 prims[], uint32_t primMask, SIMD256::Integer const &primID, SIMD256::Integer &viewportIdx) + void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD256::Vec4 prims[], uint32_t primMask, SIMD256::Integer const &primID, SIMD256::Integer &viewportIdx, SIMD256::Integer &rtIdx) { SWR_ASSERT(pfnBinFunc != nullptr); - pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx); + pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx, rtIdx); } }; @@ -231,11 +231,11 @@ struct BinnerChooser }; } - void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD512::Vec4 prims[], uint32_t primMask, SIMD512::Integer const &primID, SIMD512::Integer &viewportIdx) + void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD512::Vec4 prims[], uint32_t primMask, SIMD512::Integer const &primID, SIMD512::Integer &viewportIdx, SIMD512::Integer &rtIdx) { SWR_ASSERT(pfnBinFunc != nullptr); - pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx); + pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx, rtIdx); } }; @@ -437,7 +437,8 @@ public: return SIMD_T::movemask_ps(vClipCullMask); } - void ClipSimd(const typename SIMD_T::Vec4 prim[], const typename SIMD_T::Float &vPrimMask, const typename SIMD_T::Float &vClipMask, PA_STATE &pa, const typename SIMD_T::Integer &vPrimId, const typename SIMD_T::Integer &vViewportIdx) + void ClipSimd(const typename SIMD_T::Vec4 prim[], const typename SIMD_T::Float &vPrimMask, const typename SIMD_T::Float &vClipMask, PA_STATE &pa, + const typename SIMD_T::Integer &vPrimId, const typename SIMD_T::Integer &vViewportIdx, const typename SIMD_T::Integer &vRtIdx) { // input/output vertex store for clipper SIMDVERTEX_T vertices[7]; // maximum 7 verts generated per triangle @@ -538,6 +539,7 @@ public: const uint32_t *pVertexCount = reinterpret_cast(&vNumClippedVerts); const uint32_t *pPrimitiveId = reinterpret_cast(&vPrimId); const uint32_t *pViewportIdx = reinterpret_cast(&vViewportIdx); + const uint32_t *pRtIdx = reinterpret_cast(&vRtIdx); const SIMD256::Integer vOffsets = SIMD256::set_epi32( 0 * sizeof(SIMDVERTEX_T), // unused lane @@ -651,6 +653,8 @@ public: const typename SIMD_T::Integer primID = SIMD_T::set1_epi32(pPrimitiveId[inputPrim]); const typename SIMD_T::Integer viewportIdx = SIMD_T::set1_epi32(pViewportIdx[inputPrim]); + const typename SIMD_T::Integer rtIdx = SIMD_T::set1_epi32(pRtIdx[inputPrim]); + while (clipPA.GetNextStreamOutput()) { @@ -662,7 +666,7 @@ public: if (assemble) { - binner.pfnBinFunc(pDC, clipPA, workerId, attrib, primMask, primID, viewportIdx); + binner.pfnBinFunc(pDC, clipPA, workerId, attrib, primMask, primID, viewportIdx, rtIdx); } } while (clipPA.NextPrim()); @@ -677,7 +681,8 @@ public: UPDATE_STAT_FE(CPrimitives, numClippedPrims); } - void ExecuteStage(PA_STATE &pa, typename SIMD_T::Vec4 prim[], uint32_t primMask, typename SIMD_T::Integer const &primId, typename SIMD_T::Integer const &viewportIdx) + void ExecuteStage(PA_STATE &pa, typename SIMD_T::Vec4 prim[], uint32_t primMask, + typename SIMD_T::Integer const &primId, typename SIMD_T::Integer const &viewportIdx, typename SIMD_T::Integer const &rtIdx) { SWR_ASSERT(pa.pDC != nullptr); @@ -716,7 +721,7 @@ public: AR_BEGIN(FEGuardbandClip, pa.pDC->drawId); // we have to clip tris, execute the clipper, which will also // call the binner - ClipSimd(prim, SIMD_T::vmask_ps(primMask), SIMD_T::vmask_ps(clipMask), pa, primId, viewportIdx); + ClipSimd(prim, SIMD_T::vmask_ps(primMask), SIMD_T::vmask_ps(clipMask), pa, primId, viewportIdx, rtIdx); AR_END(FEGuardbandClip, 1); } else if (validMask) @@ -725,7 +730,7 @@ public: UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask)); // forward valid prims directly to binner - binner.pfnBinFunc(this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx); + binner.pfnBinFunc(this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx, rtIdx); } } @@ -1135,12 +1140,12 @@ private: // pipeline stage functions -void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx); -void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx); -void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx); +void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx); +void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx); +void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx); #if USE_SIMD16_FRONTEND -void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx); -void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx); -void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx); +void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx); +void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx); +void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx); #endif diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index ef6719384ff..cba8de999be 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -214,12 +214,12 @@ struct PA_STATE; // function signature for pipeline stages that execute after primitive assembly typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], - uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx); + uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx, simdscalari const &rtIdx); #if ENABLE_AVX512_SIMD16 // function signature for pipeline stages that execute after primitive assembly typedef void(SIMDCALL *PFN_PROCESS_PRIMS_SIMD16)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], - uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx); + uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx, simd16scalari const &rtIdx); #endif OSALIGNLINE(struct) API_STATE diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 3de79d600f6..ed8ce151c39 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -989,15 +989,27 @@ static void GeometryShaderStage( #if USE_SIMD16_FRONTEND simd16scalari vPrimId = _simd16_set1_epi32(pPrimitiveId[inputPrim]); - // Gather the VPAI from the SVG if provided. - SIMD16::Vec4 vpiAttrib[3]; - SIMD16::Integer vViewportIdx = SIMD16::setzero_si(); + // Gather data from the SVG if provided. + simd16scalari vViewportIdx = SIMD16::setzero_si(); + simd16scalari vRtIdx = SIMD16::setzero_si(); + SIMD16::Vec4 svgAttrib[4]; + + if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex) + { + gsPa.Assemble(VERTEX_SGV_SLOT, svgAttrib); + } + + if (state.backendState.readViewportArrayIndex) { - gsPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); - vViewportIdx = SIMD16::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); + vViewportIdx = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]); gsPa.viewportArrayActive = true; } + if (state.backendState.readRenderTargetArrayIndex) + { + vRtIdx = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]); + gsPa.rtArrayActive = true; + } { // OOB VPAI indices => forced to zero. @@ -1007,29 +1019,40 @@ static void GeometryShaderStage( vViewportIdx = SIMD16::and_si(vClearMask, vViewportIdx); gsPa.useAlternateOffset = false; - pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx); + pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx, vRtIdx); } #else simdscalari vPrimId = _simd_set1_epi32(pPrimitiveId[inputPrim]); - // Gather the VPAI from the SVG if provided. - SIMD8::Vec4 vpiAttrib[3]; - SIMD8::Integer vViewportIdx = SIMD8::setzero_si(); + // Gather data from the SVG if provided. + simdscalari vViewportIdx = SIMD16::setzero_si(); + simdscalari vRtIdx = SIMD16::setzero_si(); + SIMD8::Vec4 svgAttrib[4]; + + if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex) + { + tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib); + } + + if (state.backendState.readViewportArrayIndex) { - gsPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); - vViewportIdx = SIMD8::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); + vViewportIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]); // OOB VPAI indices => forced to zero. vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si()); simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports); vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx); - - gsPa.viewportArrayActive = true; + tessPa.viewportArrayActive = true; + } + if (state.backendState.readRenderTargetArrayIndex) + { + vRtIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]); + tessPa.rtArrayActive = true; } - pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx); + pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx, vRtIdx); #endif } } @@ -1372,46 +1395,68 @@ static void TessellationStages( SWR_ASSERT(pfnClipFunc); #if USE_SIMD16_FRONTEND - // Gather the VPAI from the SVG if provided. - simd16scalari vpai = SIMD16::setzero_si(); + // Gather data from the SVG if provided. + simd16scalari vViewportIdx = SIMD16::setzero_si(); + simd16scalari vRtIdx = SIMD16::setzero_si(); + SIMD16::Vec4 svgAttrib[4]; + + if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex) + { + tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib); + } + + if (state.backendState.readViewportArrayIndex) { - simd16vector vpiAttrib[4]; - tessPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); - vpai = SIMD16::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); + vViewportIdx = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]); tessPa.viewportArrayActive = true; } + if (state.backendState.readRenderTargetArrayIndex) + { + vRtIdx = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]); + tessPa.rtArrayActive = true; + } { // OOB VPAI indices => forced to zero. - vpai = SIMD16::max_epi32(vpai, SIMD16::setzero_si()); + vViewportIdx = SIMD16::max_epi32(vViewportIdx, SIMD16::setzero_si()); simd16scalari vNumViewports = SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); - simd16scalari vClearMask = SIMD16::cmplt_epi32(vpai, vNumViewports); - vpai = SIMD16::and_si(vClearMask, vpai); + simd16scalari vClearMask = SIMD16::cmplt_epi32(vViewportIdx, vNumViewports); + vViewportIdx = SIMD16::and_si(vClearMask, vViewportIdx); tessPa.useAlternateOffset = false; - pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID, vpai); + pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID, vViewportIdx, vRtIdx); } #else - // Gather the VPAI from the SVG if provided. - SIMD8::Vec4 vpiAttrib[3]; - SIMD8::Integer vViewportIdx = SIMD8::setzero_si(); + // Gather data from the SVG if provided. + simdscalari vViewportIdx = SIMD16::setzero_si(); + simdscalari vRtIdx = SIMD16::setzero_si(); + SIMD8::Vec4 svgAttrib[4]; + + if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex) + { + tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib); + } + if (state.backendState.readViewportArrayIndex) { - tessPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); - vViewportIdx = SIMD8::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); + vViewportIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]); // OOB VPAI indices => forced to zero. vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si()); simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports); vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx); - tessPa.viewportArrayActive = true; } + if (state.backendState.readRenderTargetArrayIndex) + { + vRtIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]); + tessPa.rtArrayActive = true; + } pfnClipFunc(pDC, tessPa, workerId, prim, - GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), vViewportIdx); + GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), vViewportIdx, vRtIdx); #endif } } @@ -1803,15 +1848,27 @@ void ProcessDraw( if (HasRastT::value) { SWR_ASSERT(pDC->pState->pfnProcessPrims_simd16); - // Gather the VPAI from the SVG if provided. + // Gather data from the SVG if provided. simd16scalari vpai = SIMD16::setzero_si(); + simd16scalari rtai = SIMD16::setzero_si(); + SIMD16::Vec4 svgAttrib[4]; + + if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex) + { + pa.Assemble(VERTEX_SGV_SLOT, svgAttrib); + } + + if (state.backendState.readViewportArrayIndex) { - simd16vector vpiAttrib[4]; - pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); - vpai = SIMD16::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); + vpai = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]); pa.viewportArrayActive = true; } + if (state.backendState.readRenderTargetArrayIndex) + { + rtai = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]); + pa.rtArrayActive = true; + } { // OOB VPAI indices => forced to zero. @@ -1821,7 +1878,7 @@ void ProcessDraw( vpai = SIMD16::and_si(vClearMask, vpai); pa.useAlternateOffset = false; - pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID, vpai); + pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID, vpai, rtai); } } } @@ -1983,25 +2040,35 @@ void ProcessDraw( { SWR_ASSERT(pDC->pState->pfnProcessPrims); - // Gather the VPAI from the SVG if provided. - SIMD8::Vec4 vpiAttrib[3]; - SIMD8::Integer vViewportIdx = SIMD8::setzero_si(); + // Gather data from the SVG if provided. + simdscalari vViewportIdx = SIMD16::setzero_si(); + simdscalari vRtIdx = SIMD16::setzero_si(); + SIMD8::Vec4 svgAttrib[4]; + + if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex) + { + tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib); + } + if (state.backendState.readViewportArrayIndex) { - pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); - vViewportIdx = SIMD8::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); + vViewportIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]); // OOB VPAI indices => forced to zero. vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si()); simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports); vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx); - - pa.viewportArrayActive = true; + tessPa.viewportArrayActive = true; + } + if (state.backendState.readRenderTargetArrayIndex) + { + vRtIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]); + tessPa.rtArrayActive = true; } pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim, - GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID), vViewportIdx); + GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID), vViewportIdx, vRtIdx); } } } diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h index e2ca1274c51..6a2ec8474f1 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.h +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h @@ -389,10 +389,10 @@ PFN_PROCESS_PRIMS_SIMD16 GetBinTrianglesFunc_simd16(bool IsConservative); #endif struct PA_STATE_BASE; // forward decl -void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx); -void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx); +void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx, simdscalari const &rtIdx); +void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx, simdscalari const &rtIdx); #if USE_SIMD16_FRONTEND -void SIMDCALL BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx); -void SIMDCALL BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx); +void SIMDCALL BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx, simd16scalari const &rtIdx); +void SIMDCALL BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx, simd16scalari const &rtIdx); #endif diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h index a0160d4a40c..c88b4bfd97d 100644 --- a/src/gallium/drivers/swr/rasterizer/core/pa.h +++ b/src/gallium/drivers/swr/rasterizer/core/pa.h @@ -80,6 +80,7 @@ struct PA_STATE #endif bool viewportArrayActive{ false }; + bool rtArrayActive { false }; uint32_t numVertsPerPrim{ 0 }; PA_STATE(){} -- 2.30.2