swr: [rasterizer core] viewport array support
authorTim Rowley <timothy.o.rowley@intel.com>
Fri, 5 Aug 2016 22:42:24 +0000 (16:42 -0600)
committerTim Rowley <timothy.o.rowley@intel.com>
Wed, 10 Aug 2016 16:08:40 +0000 (11:08 -0500)
Change viewport matrix storage from AOS to SOA to support viewport arrays.

Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
src/gallium/drivers/swr/rasterizer/core/api.cpp
src/gallium/drivers/swr/rasterizer/core/api.h
src/gallium/drivers/swr/rasterizer/core/context.h
src/gallium/drivers/swr/rasterizer/core/frontend.cpp
src/gallium/drivers/swr/rasterizer/core/frontend.h
src/gallium/drivers/swr/rasterizer/core/state.h
src/gallium/drivers/swr/swr_state.cpp
src/gallium/drivers/swr/swr_state.h

index 5ebefabb75382df5207915135460efe02190237a..3e1bb336729ffe053ca52130e22426b975fc6d76 100644 (file)
@@ -663,7 +663,7 @@ void SwrSetViewports(
     HANDLE hContext,
     uint32_t numViewports,
     const SWR_VIEWPORT* pViewports,
-    const SWR_VIEWPORT_MATRIX* pMatrices)
+    const SWR_VIEWPORT_MATRICES* pMatrices)
 {
     SWR_ASSERT(numViewports <= KNOB_NUM_VIEWPORTS_SCISSORS,
         "Invalid number of viewports.");
@@ -675,7 +675,9 @@ void SwrSetViewports(
 
     if (pMatrices != nullptr)
     {
-        memcpy(&pState->vpMatrix[0], pMatrices, sizeof(SWR_VIEWPORT_MATRIX) * numViewports);
+        //memcpy(&pState->vpMatrix[0], pMatrices, sizeof(SWR_VIEWPORT_MATRIX) * numViewports);
+        // @todo Faster to copy portions of the SOA or just copy all of it?
+        memcpy(&pState->vpMatrices, pMatrices, sizeof(SWR_VIEWPORT_MATRICES));
     }
     else
     {
@@ -684,22 +686,22 @@ void SwrSetViewports(
         {
             if (pContext->driverType == DX)
             {
-                pState->vpMatrix[i].m00 = pState->vp[i].width / 2.0f;
-                pState->vpMatrix[i].m11 = -pState->vp[i].height / 2.0f;
-                pState->vpMatrix[i].m22 = pState->vp[i].maxZ - pState->vp[i].minZ;
-                pState->vpMatrix[i].m30 = pState->vp[i].x + pState->vpMatrix[i].m00;
-                pState->vpMatrix[i].m31 = pState->vp[i].y - pState->vpMatrix[i].m11;
-                pState->vpMatrix[i].m32 = pState->vp[i].minZ;
+                pState->vpMatrices.m00[i] = pState->vp[i].width / 2.0f;
+                pState->vpMatrices.m11[i] = -pState->vp[i].height / 2.0f;
+                pState->vpMatrices.m22[i] = pState->vp[i].maxZ - pState->vp[i].minZ;
+                pState->vpMatrices.m30[i] = pState->vp[i].x + pState->vpMatrices.m00[i];
+                pState->vpMatrices.m31[i] = pState->vp[i].y - pState->vpMatrices.m11[i];
+                pState->vpMatrices.m32[i] = pState->vp[i].minZ;
             }
             else
             {
                 // Standard, with the exception that Y is inverted.
-                pState->vpMatrix[i].m00 = (pState->vp[i].width - pState->vp[i].x) / 2.0f;
-                pState->vpMatrix[i].m11 = (pState->vp[i].y - pState->vp[i].height) / 2.0f;
-                pState->vpMatrix[i].m22 = (pState->vp[i].maxZ - pState->vp[i].minZ) / 2.0f;
-                pState->vpMatrix[i].m30 = pState->vp[i].x + pState->vpMatrix[i].m00;
-                pState->vpMatrix[i].m31 = pState->vp[i].height + pState->vpMatrix[i].m11;
-                pState->vpMatrix[i].m32 = pState->vp[i].minZ + pState->vpMatrix[i].m22;
+                pState->vpMatrices.m00[i] = (pState->vp[i].width - pState->vp[i].x) / 2.0f;
+                pState->vpMatrices.m11[i] = (pState->vp[i].y - pState->vp[i].height) / 2.0f;
+                pState->vpMatrices.m22[i] = (pState->vp[i].maxZ - pState->vp[i].minZ) / 2.0f;
+                pState->vpMatrices.m30[i] = pState->vp[i].x + pState->vpMatrices.m00[i];
+                pState->vpMatrices.m31[i] = pState->vp[i].height + pState->vpMatrices.m11[i];
+                pState->vpMatrices.m32[i] = pState->vp[i].minZ + pState->vpMatrices.m22[i];
 
                 // Now that the matrix is calculated, clip the view coords to screen size.
                 // OpenGL allows for -ve x,y in the viewport.
index 9c80526b1e523a444d1f553b3987fab94461cb30..304169eda78728a444ba523efb6aa54cd0b9c45e 100644 (file)
@@ -495,7 +495,7 @@ void SWR_API SwrSetViewports(
     HANDLE hContext,
     uint32_t numViewports,
     const SWR_VIEWPORT* pViewports,
-    const SWR_VIEWPORT_MATRIX* pMatrices);
+    const SWR_VIEWPORT_MATRICES* pMatrices);
 
 //////////////////////////////////////////////////////////////////////////
 /// @brief SwrSetScissorRects
index b38ec46ae285c41344dbfc9aaf6bdf624b6181b6..163ee5b1245fed72b3ec52ea6c7a79303a18a3a0 100644 (file)
@@ -277,7 +277,7 @@ OSALIGNLINE(struct) API_STATE
     GUARDBAND               gbState;
 
     SWR_VIEWPORT            vp[KNOB_NUM_VIEWPORTS_SCISSORS];
-    SWR_VIEWPORT_MATRIX     vpMatrix[KNOB_NUM_VIEWPORTS_SCISSORS];
+    SWR_VIEWPORT_MATRICES   vpMatrices;
 
     BBOX                    scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
     BBOX                    scissorInFixedPoint;
index 24b217da024b502d3efc299040439baeebedd8a4..0f0adf45cbc12c2ea34f8d68cdb7892090b104cb 100644 (file)
@@ -1793,7 +1793,7 @@ void BinTriangles(
         tri[2].v[2] = _simd_mul_ps(tri[2].v[2], vRecipW2);
 
         // viewport transform to screen coords
-        viewportTransform<3>(tri, state.vpMatrix[0]);
+        viewportTransform<3>(tri, state.vpMatrices);
     }
 
     // adjust for pixel center location
@@ -2166,7 +2166,7 @@ void BinPoints(
         primVerts.z = _simd_mul_ps(primVerts.z, vRecipW0);
 
         // viewport transform to screen coords
-        viewportTransform<1>(&primVerts, state.vpMatrix[0]);
+        viewportTransform<1>(&primVerts, state.vpMatrices);
     }
 
     // adjust for pixel center location
@@ -2484,7 +2484,7 @@ void BinLines(
         prim[1].v[2] = _simd_mul_ps(prim[1].v[2], vRecipW1);
 
         // viewport transform to screen coords
-        viewportTransform<2>(prim, state.vpMatrix[0]);
+        viewportTransform<2>(prim, state.vpMatrices);
     }
 
     // adjust for pixel center location
index 9142101089ed1c5d840c4da8c497c6db65c7c393..b4e6f9a460d20b4ba9677e842bdbff082517c6bd 100644 (file)
@@ -202,14 +202,14 @@ void viewportTransform(__m128 &vX, __m128 &vY, __m128 &vZ, const SWR_VIEWPORT_MA
 
 template<uint32_t NumVerts>
 INLINE
-void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRIX & vpMatrix)
+void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices)
 {
-    simdscalar m00 = _simd_load1_ps(&vpMatrix.m00);
-    simdscalar m30 = _simd_load1_ps(&vpMatrix.m30);
-    simdscalar m11 = _simd_load1_ps(&vpMatrix.m11);
-    simdscalar m31 = _simd_load1_ps(&vpMatrix.m31);
-    simdscalar m22 = _simd_load1_ps(&vpMatrix.m22);
-    simdscalar m32 = _simd_load1_ps(&vpMatrix.m32);
+    simdscalar m00 = _simd_load1_ps(&vpMatrices.m00[0]);
+    simdscalar m30 = _simd_load1_ps(&vpMatrices.m30[0]);
+    simdscalar m11 = _simd_load1_ps(&vpMatrices.m11[0]);
+    simdscalar m31 = _simd_load1_ps(&vpMatrices.m31[0]);
+    simdscalar m22 = _simd_load1_ps(&vpMatrices.m22[0]);
+    simdscalar m32 = _simd_load1_ps(&vpMatrices.m32[0]);
 
     for (uint32_t i = 0; i < NumVerts; ++i)
     {
index a3616bce069c13011de1b8c7c4e0a611cd01d30f..fdf5d7ef45cf83f3c3dd360f1f859c4aa25e80e4 100644 (file)
@@ -833,6 +833,19 @@ struct SWR_VIEWPORT_MATRIX
     float m32;
 };
 
+//////////////////////////////////////////////////////////////////////////
+/// VIEWPORT_MATRIXES
+/////////////////////////////////////////////////////////////////////////
+struct SWR_VIEWPORT_MATRICES
+{
+    float m00[KNOB_NUM_VIEWPORTS_SCISSORS];
+    float m11[KNOB_NUM_VIEWPORTS_SCISSORS];
+    float m22[KNOB_NUM_VIEWPORTS_SCISSORS];
+    float m30[KNOB_NUM_VIEWPORTS_SCISSORS];
+    float m31[KNOB_NUM_VIEWPORTS_SCISSORS];
+    float m32[KNOB_NUM_VIEWPORTS_SCISSORS];
+};
+
 //////////////////////////////////////////////////////////////////////////
 /// SWR_VIEWPORT
 /////////////////////////////////////////////////////////////////////////
index 2df79855358f7b5fcabc082d7b2dd7f2180f7e23..de41ddc704d615b8f5dd26489729e679b2ec8852 100644 (file)
@@ -944,7 +944,7 @@ swr_update_derived(struct pipe_context *pipe,
       pipe_rasterizer_state *rasterizer = ctx->rasterizer;
 
       SWR_VIEWPORT *vp = &ctx->derived.vp;
-      SWR_VIEWPORT_MATRIX *vpm = &ctx->derived.vpm;
+      SWR_VIEWPORT_MATRICES *vpm = &ctx->derived.vpm;
 
       vp->x = state->translate[0] - state->scale[0];
       vp->width = state->translate[0] + state->scale[0];
@@ -958,12 +958,12 @@ swr_update_derived(struct pipe_context *pipe,
          vp->maxZ = state->translate[2] + state->scale[2];
       }
 
-      vpm->m00 = state->scale[0];
-      vpm->m11 = state->scale[1];
-      vpm->m22 = state->scale[2];
-      vpm->m30 = state->translate[0];
-      vpm->m31 = state->translate[1];
-      vpm->m32 = state->translate[2];
+      vpm->m00[0] = state->scale[0];
+      vpm->m11[0] = state->scale[1];
+      vpm->m22[0] = state->scale[2];
+      vpm->m30[0] = state->translate[0];
+      vpm->m31[0] = state->translate[1];
+      vpm->m32[0] = state->translate[2];
 
       /* Now that the matrix is calculated, clip the view coords to screen
        * size.  OpenGL allows for -ve x,y in the viewport. */
index dcb1145a3626aa56a0aa551e6718980076afd816..0e3b49d2b0d330476e85349d246e30d598aa33dd 100644 (file)
@@ -87,7 +87,7 @@ struct swr_blend_state {
 struct swr_derived_state {
    SWR_RASTSTATE rastState;
    SWR_VIEWPORT vp;
-   SWR_VIEWPORT_MATRIX vpm;
+   SWR_VIEWPORT_MATRICES vpm;
 };
 
 void swr_update_derived(struct pipe_context *,