swr: [rasterizer core] routing of viewport indexes through frontend
authorTim Rowley <timothy.o.rowley@intel.com>
Mon, 8 Aug 2016 19:08:39 +0000 (13:08 -0600)
committerTim Rowley <timothy.o.rowley@intel.com>
Wed, 10 Aug 2016 16:09:00 +0000 (11:09 -0500)
Viewport transform performed based on per-prim viewport index if available.

Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
src/gallium/drivers/swr/rasterizer/core/api.cpp
src/gallium/drivers/swr/rasterizer/core/clip.cpp
src/gallium/drivers/swr/rasterizer/core/clip.h
src/gallium/drivers/swr/rasterizer/core/context.h
src/gallium/drivers/swr/rasterizer/core/frontend.cpp
src/gallium/drivers/swr/rasterizer/core/frontend.h

index d6aa80d678f615acbe157247920300ecbd8aa7af..15485012a08d6cce53bcb19ee816f849e842ea1e 100644 (file)
@@ -676,7 +676,6 @@ void SwrSetViewports(
 
     if (pMatrices != nullptr)
     {
-        //memcpy(&pState->vpMatrix[0], pMatrices, sizeof(SWR_VIEWPORT_MATRIX) * numViewports);
         // @todo Faster to copy portions of the SOA or just copy all of it?
         memcpy(&pState->vpMatrices, pMatrices, sizeof(SWR_VIEWPORT_MATRICES));
     }
index e624fd8f674bc5fd46f073ae949b6221312765ef..21cbb0a0629a5556305f9b418991a57d22fb31e6 100644 (file)
@@ -179,26 +179,26 @@ void Clip(const float *pTriangle, const float *pAttribs, int numAttribs, float *
     return;
 }
 
-void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId)
+void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
 {
     RDTSC_START(FEClipTriangles);
     Clipper<3> clipper(workerId, pDC);
-    clipper.ExecuteStage(pa, prims, primMask, primId);
+    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
     RDTSC_STOP(FEClipTriangles, 1, 0);
 }
 
-void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId)
+void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
 {
     RDTSC_START(FEClipLines);
     Clipper<2> clipper(workerId, pDC);
-    clipper.ExecuteStage(pa, prims, primMask, primId);
+    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
     RDTSC_STOP(FEClipLines, 1, 0);
 }
-void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId)
+void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
 {
     RDTSC_START(FEClipPoints);
     Clipper<1> clipper(workerId, pDC);
-    clipper.ExecuteStage(pa, prims, primMask, primId);
+    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
     RDTSC_STOP(FEClipPoints, 1, 0);
 }
 
index a2ba76967fe7b335f6f81cf20d361690d6d9dc0b..b173ae59b4557f880d3481d43041590ced9e8a52 100644 (file)
@@ -302,7 +302,7 @@ public:
     }
 
     // clip SIMD primitives
-    void ClipSimd(const simdscalar& vPrimMask, const simdscalar& vClipMask, PA_STATE& pa, const simdscalari& vPrimId)
+    void ClipSimd(const simdscalar& vPrimMask, const simdscalar& vClipMask, PA_STATE& pa, const simdscalari& vPrimId, const simdscalari& vViewportIdx)
     {
         // input/output vertex store for clipper
         simdvertex vertices[7]; // maximum 7 verts generated per triangle
@@ -402,6 +402,7 @@ public:
 
         uint32_t* pVertexCount = (uint32_t*)&vNumClippedVerts;
         uint32_t* pPrimitiveId = (uint32_t*)&vPrimId;
+        uint32_t* pViewportIdx = (uint32_t*)&vViewportIdx;
 
         const simdscalari vOffsets = _mm256_set_epi32(
             0 * sizeof(simdvertex),  // unused lane
@@ -487,7 +488,7 @@ public:
                     if (assemble)
                     {
                         static const uint32_t primMaskMap[] = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff };
-                        pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd_set1_epi32(pPrimitiveId[inputPrim]));
+                        pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd_set1_epi32(pPrimitiveId[inputPrim]), _simd_set1_epi32(pViewportIdx[inputPrim]));
                     }
                 } while (clipPa.NextPrim());
             }
@@ -499,7 +500,7 @@ public:
     }
     
     // execute the clipper stage
-    void ExecuteStage(PA_STATE& pa, simdvector prim[], uint32_t primMask, simdscalari primId)
+    void ExecuteStage(PA_STATE& pa, simdvector prim[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
     {
         // set up binner based on PA state
         PFN_PROCESS_PRIMS pfnBinner;
@@ -552,7 +553,7 @@ public:
             RDTSC_START(FEGuardbandClip);
             // we have to clip tris, execute the clipper, which will also
             // call the binner
-            ClipSimd(vMask(primMask), vMask(clipMask), pa, primId);
+            ClipSimd(vMask(primMask), vMask(clipMask), pa, primId, viewportIdx);
             RDTSC_STOP(FEGuardbandClip, 1, 0);
         }
         else if (validMask)
@@ -562,7 +563,7 @@ public:
             UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask));
 
             // forward valid prims directly to binner
-            pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId);
+            pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx);
         }
     }
 
@@ -948,6 +949,6 @@ private:
 
 
 // pipeline stage functions
-void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId);
-void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId);
-void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId);
+void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx);
+void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx);
+void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx);
index 144fcefb208555bd1b43b0b394fd09248a9fed1f..320aa924c4fb8718d92cea2409be39637300af77 100644 (file)
@@ -215,7 +215,7 @@ struct PA_STATE;
 
 // function signature for pipeline stages that execute after primitive assembly
 typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], 
-    uint32_t primMask, simdscalari primID);
+    uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
 
 OSALIGNLINE(struct) API_STATE
 {
index 3014c7defc8ce1ef840f8544cb5ac21fc95ce749..a62aa966c01f135daa39ffd9bbd79900fd8bc867 100644 (file)
@@ -833,7 +833,26 @@ static void GeometryShaderStage(
                                     vPrimId = _simd_set1_epi32(pPrimitiveId[inputPrim]);
                                 }
 
-                                pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId);
+                                // use viewport array index if GS declares it as an output attribute. Otherwise use index 0.
+                                simdscalari vViewPortIdx;
+                                if (state.gsState.emitsViewportArrayIndex)
+                                {
+                                    simdvector vpiAttrib[3];
+                                    gsPa.Assemble(VERTEX_VIEWPORT_ARRAY_INDEX_SLOT, vpiAttrib);
+
+                                    // OOB indices => forced to zero.
+                                    simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+                                    simdscalar vClearMask = _simd_cmplt_ps(vpiAttrib[0].x, _simd_castsi_ps(vNumViewports));
+                                    vpiAttrib[0].x = _simd_and_ps(vClearMask, vpiAttrib[0].x);
+
+                                    vViewPortIdx = _simd_castps_si(vpiAttrib[0].x);
+                                }
+                                else
+                                {
+                                    vViewPortIdx = _simd_set1_epi32(0);
+                                }
+
+                                pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId, vViewPortIdx);
                             }
                         }
                     } while (gsPa.NextPrim());
@@ -1104,7 +1123,7 @@ static void TessellationStages(
 
                     SWR_ASSERT(pfnClipFunc);
                     pfnClipFunc(pDC, tessPa, workerId, prim,
-                        GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID));
+                        GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), _simd_set1_epi32(0));
                 }
             }
 
@@ -1359,7 +1378,7 @@ void ProcessDraw(
                                 {
                                     SWR_ASSERT(pDC->pState->pfnProcessPrims);
                                     pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim,
-                                        GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID));
+                                        GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID), _simd_set1_epi32(0));
                                 }
                             }
                         }
@@ -1727,6 +1746,7 @@ INLINE void calcBoundingBoxIntVertical<FEConservativeRastT>(const simdvector * c
 /// @param workerId - thread's worker id. Even thread has a unique id.
 /// @param tri - Contains triangle position data for SIMDs worth of triangles.
 /// @param primID - Primitive ID for each triangle.
+/// @param viewportIdx - viewport array index for each triangle.
 /// @tparam CT - ConservativeRastFETraits
 template <typename CT>
 void BinTriangles(
@@ -1735,7 +1755,8 @@ void BinTriangles(
     uint32_t workerId,
     simdvector tri[3],
     uint32_t triMask,
-    simdscalari primID)
+    simdscalari primID,
+    simdscalari viewportIdx)
 {
     RDTSC_START(FEBinTriangles);
 
@@ -1770,7 +1791,14 @@ void BinTriangles(
         tri[2].v[2] = _simd_mul_ps(tri[2].v[2], vRecipW2);
 
         // viewport transform to screen coords
-        viewportTransform<3>(tri, state.vpMatrices);
+        if (state.gsState.emitsViewportArrayIndex)
+        {
+            viewportTransform<3>(tri, state.vpMatrices, viewportIdx);
+        }
+        else
+        {
+            viewportTransform<3>(tri, state.vpMatrices);
+        }
     }
 
     // adjust for pixel center location
@@ -2119,7 +2147,8 @@ void BinPoints(
     uint32_t workerId,
     simdvector prim[3],
     uint32_t primMask,
-    simdscalari primID)
+    simdscalari primID,
+    simdscalari viewportIdx)
 {
     RDTSC_START(FEBinPoints);
 
@@ -2143,7 +2172,14 @@ void BinPoints(
         primVerts.z = _simd_mul_ps(primVerts.z, vRecipW0);
 
         // viewport transform to screen coords
-        viewportTransform<1>(&primVerts, state.vpMatrices);
+        if (state.gsState.emitsViewportArrayIndex)
+        {
+            viewportTransform<1>(&primVerts, state.vpMatrices, viewportIdx);
+        }
+        else
+        {
+            viewportTransform<1>(&primVerts, state.vpMatrices);
+        }
     }
 
     // adjust for pixel center location
@@ -2429,7 +2465,8 @@ void BinLines(
     uint32_t workerId,
     simdvector prim[],
     uint32_t primMask,
-    simdscalari primID)
+    simdscalari primID,
+    simdscalari viewportIdx)
 {
     RDTSC_START(FEBinLines);
 
@@ -2461,7 +2498,14 @@ void BinLines(
         prim[1].v[2] = _simd_mul_ps(prim[1].v[2], vRecipW1);
 
         // viewport transform to screen coords
-        viewportTransform<2>(prim, state.vpMatrices);
+        if (state.gsState.emitsViewportArrayIndex)
+        {
+            viewportTransform<2>(prim, state.vpMatrices, viewportIdx);
+        }
+        else
+        {
+            viewportTransform<2>(prim, state.vpMatrices);
+        }
     }
 
     // adjust for pixel center location
index d47f17f42352f03edb820f61e15c54dfa002cab9..5e7762af2d50b2974fa1ecd29b78b1a5c6d4d592 100644 (file)
@@ -219,6 +219,26 @@ void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices)
     }
 }
 
+template<uint32_t NumVerts>
+INLINE
+void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices, simdscalari vViewportIdx)
+{
+    // perform a gather of each matrix element based on the viewport array indexes
+    simdscalar m00 = _simd_i32gather_ps(&vpMatrices.m00[0], vViewportIdx, 1);
+    simdscalar m30 = _simd_i32gather_ps(&vpMatrices.m30[0], vViewportIdx, 1);
+    simdscalar m11 = _simd_i32gather_ps(&vpMatrices.m11[0], vViewportIdx, 1);
+    simdscalar m31 = _simd_i32gather_ps(&vpMatrices.m31[0], vViewportIdx, 1);
+    simdscalar m22 = _simd_i32gather_ps(&vpMatrices.m22[0], vViewportIdx, 1);
+    simdscalar m32 = _simd_i32gather_ps(&vpMatrices.m32[0], vViewportIdx, 1);
+
+    for (uint32_t i = 0; i < NumVerts; ++i)
+    {
+        v[i].x = _simd_fmadd_ps(v[i].x, m00, m30);
+        v[i].y = _simd_fmadd_ps(v[i].y, m11, m31);
+        v[i].z = _simd_fmadd_ps(v[i].z, m22, m32);
+    }
+}
+
 INLINE
 void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, BBOX &bbox)
 {
@@ -288,6 +308,6 @@ void ProcessSync(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, vo
 PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative);
 
 struct PA_STATE_BASE;  // forward decl
-void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
-void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
+void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
+void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID, simdscalari viewportIdx);