From a0dddac1cb88b1d518d9875cec2e8133ec6cddfc Mon Sep 17 00:00:00 2001 From: Alok Hota Date: Wed, 16 May 2018 11:14:18 -0500 Subject: [PATCH] swr/rast: Rectlist support for GS Add rectlist as an option for GS. Needed to support some driver optimizations. Reviewed-By: George Kyriazis --- .../drivers/swr/rasterizer/core/clip.cpp | 24 +++++++ .../drivers/swr/rasterizer/core/clip.h | 6 ++ .../drivers/swr/rasterizer/core/frontend.cpp | 2 + src/gallium/drivers/swr/rasterizer/core/pa.h | 68 +++++++++++++++++++ .../drivers/swr/rasterizer/core/rdtsc_core.h | 1 + .../drivers/swr/rasterizer/core/state.h | 2 +- 6 files changed, 102 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.cpp b/src/gallium/drivers/swr/rasterizer/core/clip.cpp index 780ca15ce71..e6c22180683 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/clip.cpp @@ -160,6 +160,15 @@ int ClipTriToPlane( const float *pInPts, int numInPts, return i; } +void ClipRectangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, + simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx) +{ + RDTSC_BEGIN(FEClipRectangles, pDC->drawId); + Clipper clipper(workerId, pDC); + clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); + RDTSC_END(FEClipRectangles, 1); +} + void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx) { @@ -188,6 +197,21 @@ void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector p } #if USE_SIMD16_FRONTEND +void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, + simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx) +{ + RDTSC_BEGIN(FEClipRectangles, pDC->drawId); + + enum { VERTS_PER_PRIM = 3 }; + + Clipper clipper(workerId, pDC); + + pa.useAlternateOffset = false; + clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); + + RDTSC_END(FEClipRectangles, 1); +} + void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx) { diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index 0f8399c742c..90ae4263575 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -531,6 +531,10 @@ public: { clipTopology = TOP_POINT_LIST; } + else if (pa.binTopology == TOP_RECT_LIST) + { + clipTopology = TOP_RECT_LIST; + } } else if (NumVertsPerPrim == 2) { @@ -1149,10 +1153,12 @@ private: // pipeline stage functions +void ClipRectangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx); void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx); void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx); void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx); #if USE_SIMD16_FRONTEND +void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx); void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx); void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx); void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx); diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 1847c3e822d..47c0662e5ee 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -868,6 +868,7 @@ static void GeometryShaderStage( { switch (pState->outputTopology) { + case TOP_RECT_LIST: pfnClipFunc = ClipRectangles_simd16; break; case TOP_TRIANGLE_STRIP: pfnClipFunc = ClipTriangles_simd16; break; case TOP_LINE_STRIP: pfnClipFunc = ClipLines_simd16; break; case TOP_POINT_LIST: pfnClipFunc = ClipPoints_simd16; break; @@ -881,6 +882,7 @@ static void GeometryShaderStage( { switch (pState->outputTopology) { + case TOP_RECT_LIST: pfnClipFunc = ClipRectangles; break; case TOP_TRIANGLE_STRIP: pfnClipFunc = ClipTriangles; break; case TOP_LINE_STRIP: pfnClipFunc = ClipLines; break; case TOP_POINT_LIST: pfnClipFunc = ClipPoints; break; diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h index ed644c044c3..ab1d46de9d0 100644 --- a/src/gallium/drivers/swr/rasterizer/core/pa.h +++ b/src/gallium/drivers/swr/rasterizer/core/pa.h @@ -481,6 +481,7 @@ struct PA_STATE_CUT : public PA_STATE case TOP_LINE_LIST_ADJ: pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertLineListAdj : &PA_STATE_CUT::ProcessVertLineListAdjNoGs; break; case TOP_LINE_STRIP: pfnPa = &PA_STATE_CUT::ProcessVertLineStrip; break; case TOP_LISTSTRIP_ADJ: pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertLineStripAdj : &PA_STATE_CUT::ProcessVertLineStripAdjNoGs; break; + case TOP_RECT_LIST: pfnPa = &PA_STATE_CUT::ProcessVertRectList; break; default: assert(0 && "Unimplemented topology"); } } @@ -719,6 +720,20 @@ struct PA_STATE_CUT : public PA_STATE } } + // compute the implied 4th vertex, v3 + if (this->binTopology == TOP_RECT_LIST) + { + for (uint32_t c = 0; c < 4; ++c) + { + // v1, v3 = v1 + v2 - v0, v2 + // v1 stored in verts[0], v0 stored in verts[1], v2 stored in verts[2] + simd16scalar temp = _simd16_add_ps(verts[0].v[c], verts[2].v[c]); + temp = _simd16_sub_ps(temp, verts[1].v[c]); + temp = _simd16_blend_ps(verts[1].v[c], temp, 0xAAAA); // 1010 1010 1010 1010 + verts[1].v[c] = _simd16_extract_ps(temp, 0); + } + } + return true; } @@ -766,6 +781,19 @@ struct PA_STATE_CUT : public PA_STATE } } + // compute the implied 4th vertex, v3 + if (this->binTopology == TOP_RECT_LIST) + { + for (uint32_t c = 0; c < 4; ++c) + { + // v1, v3 = v1 + v2 - v0, v2 + // v1 stored in verts[0], v0 stored in verts[1], v2 stored in verts[2] + simd16scalar temp = _simd16_add_ps(verts[0].v[c], verts[2].v[c]); + temp = _simd16_sub_ps(temp, verts[1].v[c]); + verts[1].v[c] = _simd16_blend_ps(verts[1].v[c], temp, 0xAAAA); // 1010 1010 1010 1010 + } + } + return true; } @@ -790,6 +818,21 @@ struct PA_STATE_CUT : public PA_STATE offset += SIMD_WIDTH * sizeof(float); } } + + // compute the implied 4th vertex, v3 + if ((this->binTopology == TOP_RECT_LIST) && (triIndex % 2 == 1)) + { + // v1, v3 = v1 + v2 - v0, v2 + // v1 stored in tri[0], v0 stored in tri[1], v2 stored in tri[2] + float* pVert0 = (float*)&tri[1]; + float* pVert1 = (float*)&tri[0]; + float* pVert2 = (float*)&tri[2]; + float* pVert3 = (float*)&tri[1]; + for (uint32_t c = 0; c < 4; ++c) + { + pVert3[c] = pVert1[c] + pVert2[c] - pVert0[c]; + } + } } uint32_t NumPrims() @@ -1135,6 +1178,31 @@ struct PA_STATE_CUT : public PA_STATE this->curIndex = 0; } } + + void ProcessVertRectList(uint32_t index, bool finish) + { + this->vert[this->curIndex] = index; + this->curIndex++; + if (this->curIndex == 3) + { + // assembled enough verts for prim, add to gather indices + this->indices[0][this->numPrimsAssembled] = this->vert[0]; + this->indices[1][this->numPrimsAssembled] = this->vert[1]; + this->indices[2][this->numPrimsAssembled] = this->vert[2]; + + // second triangle in the rectangle + // v1, v3 = v1 + v2 - v0, v2 + this->indices[0][this->numPrimsAssembled+1] = this->vert[1]; + this->indices[1][this->numPrimsAssembled+1] = this->vert[0]; + this->indices[2][this->numPrimsAssembled+1] = this->vert[2]; + + // increment numPrimsAssembled + this->numPrimsAssembled += 2; + + // set up next prim state + this->curIndex = 0; + } + } }; // Primitive Assembly for data output from the DomainShader. diff --git a/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h b/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h index 5ee8dec6dab..3a7ee4c3f02 100644 --- a/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h +++ b/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h @@ -59,6 +59,7 @@ enum CORE_BUCKETS FEClipPoints, FEClipLines, FEClipTriangles, + FEClipRectangles, FECullZeroAreaAndBackface, FECullBetweenCenters, FEEarlyRastEnter, diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index f160913a652..c26dabe8383 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -746,7 +746,7 @@ struct SWR_GS_STATE // Stride of incoming verts in attributes uint32_t inputVertStride; - // Output topology - can be point, tristrip, or linestrip + // Output topology - can be point, tristrip, linestrip, or rectlist PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum // Maximum number of verts that can be emitted by a single instance of the GS -- 2.30.2