swr/rast: Rectlist support for GS
authorAlok Hota <alok.hota@intel.com>
Wed, 16 May 2018 16:14:18 +0000 (11:14 -0500)
committerGeorge Kyriazis <george.kyriazis@intel.com>
Thu, 17 May 2018 15:53:01 +0000 (10:53 -0500)
Add rectlist as an option for GS.  Needed to support some driver
optimizations.

Reviewed-By: George Kyriazis <george.kyriazis@intel.com>
src/gallium/drivers/swr/rasterizer/core/clip.cpp
src/gallium/drivers/swr/rasterizer/core/clip.h
src/gallium/drivers/swr/rasterizer/core/frontend.cpp
src/gallium/drivers/swr/rasterizer/core/pa.h
src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h
src/gallium/drivers/swr/rasterizer/core/state.h

index 780ca15ce712e1783d84e81849d348ab487e6e6c..e6c22180683dda70b610233359d852f4c98efc4a 100644 (file)
@@ -160,6 +160,15 @@ int ClipTriToPlane( const float *pInPts, int numInPts,
     return i;
 }
 
+void ClipRectangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
+    simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
+{
+    RDTSC_BEGIN(FEClipRectangles, pDC->drawId);
+    Clipper<SIMD256, 3> clipper(workerId, pDC);
+    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
+    RDTSC_END(FEClipRectangles, 1);
+}
+
 void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
                    simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
 {
@@ -188,6 +197,21 @@ void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector p
 }
 
 #if USE_SIMD16_FRONTEND
+void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
+    simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
+{
+    RDTSC_BEGIN(FEClipRectangles, pDC->drawId);
+
+    enum { VERTS_PER_PRIM = 3 };
+
+    Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
+
+    pa.useAlternateOffset = false;
+    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
+
+    RDTSC_END(FEClipRectangles, 1);
+}
+
 void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
                                    simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
 {
index 0f8399c742c6169d637a9daa2df2c0b653ac06cd..90ae426357554c9643a974b62c1c1f9ee0cfb223 100644 (file)
@@ -531,6 +531,10 @@ public:
             {
                 clipTopology = TOP_POINT_LIST;
             }
+            else if (pa.binTopology == TOP_RECT_LIST)
+            {
+                clipTopology = TOP_RECT_LIST;
+            }
         }
         else if (NumVertsPerPrim == 2)
         {
@@ -1149,10 +1153,12 @@ private:
 
 
 // pipeline stage functions
+void ClipRectangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
 void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
 void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
 void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
 #if USE_SIMD16_FRONTEND
+void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
 void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
 void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
 void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
index 1847c3e822df4aaaf56cdaa398a6cec28aeaa35e..47c0662e5ee25ab6363a1b9920f9590883ea64ed 100644 (file)
@@ -868,6 +868,7 @@ static void GeometryShaderStage(
     {
         switch (pState->outputTopology)
         {
+        case TOP_RECT_LIST:         pfnClipFunc = ClipRectangles_simd16; break;
         case TOP_TRIANGLE_STRIP:    pfnClipFunc = ClipTriangles_simd16; break;
         case TOP_LINE_STRIP:        pfnClipFunc = ClipLines_simd16; break;
         case TOP_POINT_LIST:        pfnClipFunc = ClipPoints_simd16; break;
@@ -881,6 +882,7 @@ static void GeometryShaderStage(
     {
         switch (pState->outputTopology)
         {
+        case TOP_RECT_LIST:         pfnClipFunc = ClipRectangles; break;
         case TOP_TRIANGLE_STRIP:    pfnClipFunc = ClipTriangles; break;
         case TOP_LINE_STRIP:        pfnClipFunc = ClipLines; break;
         case TOP_POINT_LIST:        pfnClipFunc = ClipPoints; break;
index ed644c044c3b5f849b2b46e1e533a72afe93a301..ab1d46de9d0095ef657502cb60d6841b72d1915e 100644 (file)
@@ -481,6 +481,7 @@ struct PA_STATE_CUT : public PA_STATE
         case TOP_LINE_LIST_ADJ:     pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertLineListAdj : &PA_STATE_CUT::ProcessVertLineListAdjNoGs; break;
         case TOP_LINE_STRIP:        pfnPa = &PA_STATE_CUT::ProcessVertLineStrip; break;
         case TOP_LISTSTRIP_ADJ:     pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertLineStripAdj : &PA_STATE_CUT::ProcessVertLineStripAdjNoGs; break;
+        case TOP_RECT_LIST:         pfnPa = &PA_STATE_CUT::ProcessVertRectList; break;
         default: assert(0 && "Unimplemented topology");
         }
     }
@@ -719,6 +720,20 @@ struct PA_STATE_CUT : public PA_STATE
             }
         }
 
+        // compute the implied 4th vertex, v3
+        if (this->binTopology == TOP_RECT_LIST)
+        {
+            for (uint32_t c = 0; c < 4; ++c)
+            {
+                // v1, v3 = v1 + v2 - v0, v2
+                // v1 stored in verts[0], v0 stored in verts[1], v2 stored in verts[2]
+                simd16scalar temp = _simd16_add_ps(verts[0].v[c], verts[2].v[c]);
+                temp = _simd16_sub_ps(temp, verts[1].v[c]);
+                temp = _simd16_blend_ps(verts[1].v[c], temp, 0xAAAA); // 1010 1010 1010 1010
+                verts[1].v[c] = _simd16_extract_ps(temp, 0);
+            }
+        }
+
         return true;
     }
 
@@ -766,6 +781,19 @@ struct PA_STATE_CUT : public PA_STATE
             }
         }
 
+        // compute the implied 4th vertex, v3
+        if (this->binTopology == TOP_RECT_LIST)
+        {
+            for (uint32_t c = 0; c < 4; ++c)
+            {
+                // v1, v3 = v1 + v2 - v0, v2
+                // v1 stored in verts[0], v0 stored in verts[1], v2 stored in verts[2]
+                simd16scalar temp = _simd16_add_ps(verts[0].v[c], verts[2].v[c]);
+                temp = _simd16_sub_ps(temp, verts[1].v[c]);
+                verts[1].v[c] = _simd16_blend_ps(verts[1].v[c], temp, 0xAAAA); // 1010 1010 1010 1010
+            }
+        }
+
         return true;
     }
 
@@ -790,6 +818,21 @@ struct PA_STATE_CUT : public PA_STATE
                 offset += SIMD_WIDTH * sizeof(float);
             }
         }
+
+        // compute the implied 4th vertex, v3
+        if ((this->binTopology == TOP_RECT_LIST) && (triIndex % 2 == 1))
+        {
+            // v1, v3 = v1 + v2 - v0, v2
+            // v1 stored in tri[0], v0 stored in tri[1], v2 stored in tri[2]
+            float* pVert0 = (float*)&tri[1];
+            float* pVert1 = (float*)&tri[0];
+            float* pVert2 = (float*)&tri[2];
+            float* pVert3 = (float*)&tri[1];
+            for (uint32_t c = 0; c < 4; ++c)
+            {
+                pVert3[c] = pVert1[c] + pVert2[c] - pVert0[c];
+            }
+        }
     }
 
     uint32_t NumPrims()
@@ -1135,6 +1178,31 @@ struct PA_STATE_CUT : public PA_STATE
             this->curIndex = 0;
         }
     }
+
+    void ProcessVertRectList(uint32_t index, bool finish)
+    {
+        this->vert[this->curIndex] = index;
+        this->curIndex++;
+        if (this->curIndex == 3)
+        {
+            // assembled enough verts for prim, add to gather indices
+            this->indices[0][this->numPrimsAssembled] = this->vert[0];
+            this->indices[1][this->numPrimsAssembled] = this->vert[1];
+            this->indices[2][this->numPrimsAssembled] = this->vert[2];
+
+            // second triangle in the rectangle
+            // v1, v3 = v1 + v2 - v0, v2
+            this->indices[0][this->numPrimsAssembled+1] = this->vert[1];
+            this->indices[1][this->numPrimsAssembled+1] = this->vert[0];
+            this->indices[2][this->numPrimsAssembled+1] = this->vert[2];
+
+            // increment numPrimsAssembled
+            this->numPrimsAssembled += 2;
+
+            // set up next prim state
+            this->curIndex = 0;
+        }
+    }
 };
 
 // Primitive Assembly for data output from the DomainShader.
index 5ee8dec6dab427868df41961d7a2ffd857a150e5..3a7ee4c3f02a9e0a9c1c214a7775c57836118057 100644 (file)
@@ -59,6 +59,7 @@ enum CORE_BUCKETS
     FEClipPoints,
     FEClipLines,
     FEClipTriangles,
+    FEClipRectangles,
     FECullZeroAreaAndBackface,
     FECullBetweenCenters,
     FEEarlyRastEnter,
index f160913a6524f41de0edaf948eef91ab7efacd06..c26dabe8383669dd90be36b1303b3ea569bde1a2 100644 (file)
@@ -746,7 +746,7 @@ struct SWR_GS_STATE
     // Stride of incoming verts in attributes
     uint32_t inputVertStride;
 
-    // Output topology - can be point, tristrip, or linestrip
+    // Output topology - can be point, tristrip, linestrip, or rectlist
     PRIMITIVE_TOPOLOGY outputTopology;      // @llvm_enum
 
     // Maximum number of verts that can be emitted by a single instance of the GS