swr: [rasterizer] attribute swizzling and linkage
authorTim Rowley <timothy.o.rowley@intel.com>
Tue, 12 Jul 2016 21:03:42 +0000 (15:03 -0600)
committerTim Rowley <timothy.o.rowley@intel.com>
Wed, 20 Jul 2016 15:22:15 +0000 (10:22 -0500)
Add support for enhanced attribute swizzling. Currently supports constant
source overrides to handle PrimitiveID support. No support yet for input
select swizzling or wrap shortest. Removes obsoleted linkageMask and
associated code.

Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
src/gallium/drivers/swr/rasterizer/core/api.cpp
src/gallium/drivers/swr/rasterizer/core/api.h
src/gallium/drivers/swr/rasterizer/core/clip.h
src/gallium/drivers/swr/rasterizer/core/context.h
src/gallium/drivers/swr/rasterizer/core/frontend.cpp
src/gallium/drivers/swr/rasterizer/core/pa.h
src/gallium/drivers/swr/rasterizer/core/state.h
src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
src/gallium/drivers/swr/swr_shader.cpp
src/gallium/drivers/swr/swr_state.cpp
src/gallium/drivers/swr/swr_state.h

index aface7a77ef0b269f9c67deaf8056932c34a83a6..c3a1539b506ea5a5dbce81dc751bc565d884f0a7 100644 (file)
@@ -606,31 +606,6 @@ void SwrSetBlendFunc(
     pState->pfnBlendFunc[renderTarget] = pfnBlendFunc;
 }
 
-void SwrSetLinkage(
-    HANDLE hContext,
-    uint32_t mask,
-    const uint8_t* pMap)
-{
-    API_STATE* pState = GetDrawState(GetContext(hContext));
-
-    static const uint8_t IDENTITY_MAP[] =
-    {
-         0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
-        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-    };
-    static_assert(sizeof(IDENTITY_MAP) == sizeof(pState->linkageMap),
-        "Update for new value of MAX_ATTRIBUTES");
-
-    pState->linkageMask = mask;
-    pState->linkageCount = _mm_popcnt_u32(mask);
-
-    if (!pMap)
-    {
-        pMap = IDENTITY_MAP;
-    }
-    memcpy(pState->linkageMap, pMap, pState->linkageCount);
-}
-
 // update guardband multipliers for the viewport
 void updateGuardband(API_STATE *pState)
 {
@@ -847,25 +822,44 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
         (pState->state.depthStencilState.depthWriteEnable == FALSE) &&
         (pState->state.depthStencilState.stencilTestEnable == FALSE) &&
         (pState->state.depthStencilState.stencilWriteEnable == FALSE) &&
-        (pState->state.linkageCount == 0))
+        (pState->state.backendState.numAttributes == 0))
     {
         pState->pfnProcessPrims = nullptr;
-        pState->state.linkageMask = 0;
     }
 
     if (pState->state.soState.rasterizerDisable == true)
     {
         pState->pfnProcessPrims = nullptr;
-        pState->state.linkageMask = 0;
     }
 
-    // set up the frontend attrib mask
-    pState->state.feAttribMask = pState->state.linkageMask;
+    // set up the frontend attribute count
+    pState->state.feNumAttributes = 0;
+    const SWR_BACKEND_STATE& backendState = pState->state.backendState;
+    if (backendState.swizzleEnable)
+    {
+        // attribute swizzling is enabled, iterate over the map and record the max attribute used
+        for (uint32_t i = 0; i < backendState.numAttributes; ++i)
+        {
+            pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)backendState.swizzleMap[i].sourceAttrib + 1);
+        }
+    }
+    else
+    {
+        pState->state.feNumAttributes = pState->state.backendState.numAttributes;
+    }
+
     if (pState->state.soState.soEnable)
     {
+        uint32_t streamMasks = 0;
         for (uint32_t i = 0; i < 4; ++i)
         {
-            pState->state.feAttribMask |= pState->state.soState.streamMasks[i];
+            streamMasks |= pState->state.soState.streamMasks[i];
+        }
+
+        DWORD maxAttrib;
+        if (_BitScanReverse(&maxAttrib, streamMasks))
+        {
+            pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)(maxAttrib + 1));
         }
     }
 
index 04cdb9e4e65348ff64801381e43db44feae55294..ab56cab772e7891cd121604d1f48de21fae8f36d 100644 (file)
@@ -329,19 +329,6 @@ void SWR_API SwrSetBlendFunc(
     uint32_t renderTarget,
     PFN_BLEND_JIT_FUNC pfnBlendFunc);
 
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set linkage mask
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param mask - Specifies which vertex outputs are are needed by PS.
-/// @param pMap - (Optional)Linkage map to specify where FE attributes are
-///               gathered from to supply PS attribute values.  The length
-///               of the map buffer needs to match the number of set bits
-///               in "mask".
-void SWR_API SwrSetLinkage(
-    HANDLE hContext,
-    uint32_t mask,
-    const uint8_t* pMap);
-
 //////////////////////////////////////////////////////////////////////////
 /// @brief SwrDraw
 /// @param hContext - Handle passed back from SwrCreateContext
index 1a6fc6d2873a0fa0731e4c93c39cfcd5c2946314..b2b3bb4e6fd406357ff621869ce9f5135a4f7994 100644 (file)
@@ -324,16 +324,13 @@ public:
         }
 
         // assemble attribs
-        DWORD slot = 0;
-        uint32_t mapIdx = 0;
-        uint32_t tmpLinkage = this->state.linkageMask;
+        const SWR_BACKEND_STATE& backendState = this->state.backendState;
 
         int32_t maxSlot = -1;
-        while (_BitScanForward(&slot, tmpLinkage))
+        for (uint32_t slot = 0; slot < backendState.numAttributes; ++slot)
         {
-            tmpLinkage &= ~(1 << slot);
             // Compute absolute attrib slot in vertex array
-            uint32_t mapSlot = this->state.linkageMap[mapIdx++];
+            uint32_t mapSlot = backendState.swizzleEnable ? backendState.swizzleMap[slot].sourceAttrib : slot;
             maxSlot = std::max<int32_t>(maxSlot, mapSlot);
             uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + mapSlot;
 
index be4c2e94b425ac7f9f4f7b07398aeabb9e681ab9..13dcdfca2eedf51838c841892d0a7521f6ea7776 100644 (file)
@@ -264,15 +264,8 @@ OSALIGNLINE(struct) API_STATE
     PFN_DS_FUNC             pfnDsFunc;
     SWR_TS_STATE            tsState;
 
-    // Specifies which VS outputs are sent to PS.
-    // Does not include position
-    uint32_t                linkageMask; 
-    uint32_t                linkageCount;
-    uint8_t                 linkageMap[MAX_ATTRIBUTES];
-
-    // attrib mask, specifies the total set of attributes used
-    // by the frontend (vs, so, gs)
-    uint32_t                feAttribMask;
+    // Number of attributes used by the frontend (vs, so, gs)
+    uint32_t                feNumAttributes;
 
     PRIMITIVE_TOPOLOGY      topology;
     bool                    forceFront;
index cc8ebda35bcc6fb1c80cb5323257d10bbd13c442..8537c59033c3bd08b39d15c4f4657cb561607cc7 100644 (file)
@@ -794,15 +794,7 @@ static void GeometryShaderStage(
             uint8_t* pBase = pInstanceBase + instance * instanceStride;
             uint8_t* pCutBase = pCutBufferBase + instance * cutInstanceStride;
             
-            DWORD numAttribs;
-            if (_BitScanReverse(&numAttribs, state.feAttribMask))
-            {
-                numAttribs++;
-            }
-            else
-            {
-                numAttribs = 0;
-            }
+            uint32_t numAttribs = state.feNumAttributes;
 
             for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream)
             {
@@ -1445,7 +1437,6 @@ PFN_FE_WORK_FUNC GetProcessDrawFunc(
     return TemplateArgUnroller<FEDrawChooser>::GetFunc(IsIndexed, IsCutIndexEnabled, HasTessellation, HasGeometryShader, HasStreamOut, HasRasterization);
 }
 
-
 //////////////////////////////////////////////////////////////////////////
 /// @brief Processes attributes for the backend based on linkage mask and
 ///        linkage map.  Essentially just doing an SOA->AOS conversion and pack.
@@ -1455,75 +1446,101 @@ PFN_FE_WORK_FUNC GetProcessDrawFunc(
 /// @param pLinkageMap - maps VS attribute slot to PS slot
 /// @param triIndex - Triangle to process attributes for
 /// @param pBuffer - Output result
-template<uint32_t NumVerts>
+template<typename NumVertsT, typename IsSwizzledT, typename HasConstantInterpT>
 INLINE void ProcessAttributes(
     DRAW_CONTEXT *pDC,
     PA_STATE&pa,
-    uint32_t linkageMask,
-    const uint8_t* pLinkageMap,
     uint32_t triIndex,
+    uint32_t primId,
     float *pBuffer)
 {
-    DWORD slot = 0;
-    uint32_t mapIdx = 0;
-    LONG constantInterpMask = pDC->pState->state.backendState.constantInterpolationMask;
+    static_assert(NumVertsT::value > 0 && NumVertsT::value <= 3, "Invalid value for NumVertsT");
+    const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
+    LONG constantInterpMask = backendState.constantInterpolationMask;
     const uint32_t provokingVertex = pDC->pState->state.frontendState.topologyProvokingVertex;
     const PRIMITIVE_TOPOLOGY topo = pDC->pState->state.topology;
 
-    while (_BitScanForward(&slot, linkageMask))
+    static const float constTable[3][4] = {
+        {0.0f, 0.0f, 0.0f, 0.0f},
+        {0.0f, 0.0f, 0.0f, 1.0f},
+        {1.0f, 1.0f, 1.0f, 1.0f}
+    };
+
+    for (uint32_t i = 0; i < backendState.numAttributes; ++i)
     {
-        linkageMask &= ~(1 << slot); // done with this bit.
+        uint32_t inputSlot;
+        if (IsSwizzledT::value)
+        {
+            SWR_ATTRIB_SWIZZLE attribSwizzle = backendState.swizzleMap[i];
+            inputSlot = VERTEX_ATTRIB_START_SLOT + attribSwizzle.sourceAttrib;
 
-        // compute absolute slot in vertex attrib array
-        uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + pLinkageMap[mapIdx];
+        }
+        else
+        {
+            inputSlot = VERTEX_ATTRIB_START_SLOT + i;
+        }
 
         __m128 attrib[3];    // triangle attribs (always 4 wide)
+        static const uint32_t numVerts = NumVertsT::value < 3 ? NumVertsT::value : 3;
+        float* pAttribStart = pBuffer;
 
-        if (_bittest(&constantInterpMask, mapIdx))
+        if (HasConstantInterpT::value)
         {
-            uint32_t vid;
-            static const uint32_t tristripProvokingVertex[] = {0, 2, 1};
-            static const int32_t quadProvokingTri[2][4] = {{0, 0, 0, 1}, {0, -1, 0, 0}};
-            static const uint32_t quadProvokingVertex[2][4] = {{0, 1, 2, 2}, {0, 1, 1, 2}};
-            static const int32_t qstripProvokingTri[2][4] = {{0, 0, 0, 1}, {-1, 0, 0, 0}};
-            static const uint32_t qstripProvokingVertex[2][4] = {{0, 1, 2, 1}, {0, 0, 2, 1}};
-
-            switch (topo) {
-            case TOP_QUAD_LIST:
-                pa.AssembleSingle(inputSlot,
-                                  triIndex + quadProvokingTri[triIndex & 1][provokingVertex],
-                                  attrib);
-                vid = quadProvokingVertex[triIndex & 1][provokingVertex];
-                break;
-            case TOP_QUAD_STRIP:
-                pa.AssembleSingle(inputSlot,
-                                  triIndex + qstripProvokingTri[triIndex & 1][provokingVertex],
-                                  attrib);
-                vid = qstripProvokingVertex[triIndex & 1][provokingVertex];
-                break;
-            case TOP_TRIANGLE_STRIP:
-               pa.AssembleSingle(inputSlot, triIndex, attrib);
-               vid = (triIndex & 1)
-                   ? tristripProvokingVertex[provokingVertex]
-                   : provokingVertex;
-               break;
-            default:
-                pa.AssembleSingle(inputSlot, triIndex, attrib);
-                vid = provokingVertex;
-                break;
-            }
+            if (_bittest(&constantInterpMask, i))
+            {
+                uint32_t vid;
+                uint32_t adjustedTriIndex;
+                static const uint32_t tristripProvokingVertex[] = { 0, 2, 1 };
+                static const int32_t quadProvokingTri[2][4] = { {0, 0, 0, 1}, {0, -1, 0, 0} };
+                static const uint32_t quadProvokingVertex[2][4] = { {0, 1, 2, 2}, {0, 1, 1, 2} };
+                static const int32_t qstripProvokingTri[2][4] = { {0, 0, 0, 1}, {-1, 0, 0, 0} };
+                static const uint32_t qstripProvokingVertex[2][4] = { {0, 1, 2, 1}, {0, 0, 2, 1} };
+
+                switch (topo) {
+                case TOP_QUAD_LIST:
+                    adjustedTriIndex = triIndex + quadProvokingTri[triIndex & 1][provokingVertex];
+                    vid = quadProvokingVertex[triIndex & 1][provokingVertex];
+                    break;
+                case TOP_QUAD_STRIP:
+                    adjustedTriIndex = triIndex + qstripProvokingTri[triIndex & 1][provokingVertex];
+                    vid = qstripProvokingVertex[triIndex & 1][provokingVertex];
+                    break;
+                case TOP_TRIANGLE_STRIP:
+                    adjustedTriIndex = triIndex;
+                    vid = (triIndex & 1)
+                        ? tristripProvokingVertex[provokingVertex]
+                        : provokingVertex;
+                    break;
+                default:
+                    adjustedTriIndex = triIndex;
+                    vid = provokingVertex;
+                    break;
+                }
+
+                pa.AssembleSingle(inputSlot, adjustedTriIndex, attrib);
 
-            for (uint32_t i = 0; i < NumVerts; ++i)
+                for (uint32_t i = 0; i < numVerts; ++i)
+                {
+                    _mm_store_ps(pBuffer, attrib[vid]);
+                    pBuffer += 4;
+                }
+            }
+            else
             {
-                _mm_store_ps(pBuffer, attrib[vid]);
-                pBuffer += 4;
+                pa.AssembleSingle(inputSlot, triIndex, attrib);
+
+                for (uint32_t i = 0; i < numVerts; ++i)
+                {
+                    _mm_store_ps(pBuffer, attrib[i]);
+                    pBuffer += 4;
+                }
             }
         }
         else
         {
             pa.AssembleSingle(inputSlot, triIndex, attrib);
 
-            for (uint32_t i = 0; i < NumVerts; ++i)
+            for (uint32_t i = 0; i < numVerts; ++i)
             {
                 _mm_store_ps(pBuffer, attrib[i]);
                 pBuffer += 4;
@@ -1534,16 +1551,66 @@ INLINE void ProcessAttributes(
         // interpolation code in the pixel shader works correctly for the
         // 3 topologies - point, line, tri.  This effectively zeros out the
         // effect of the missing vertices in the triangle interpolation.
-        for (uint32_t i = NumVerts; i < 3; ++i)
+        for (uint32_t v = numVerts; v < 3; ++v)
         {
-            _mm_store_ps(pBuffer, attrib[NumVerts - 1]);
+            _mm_store_ps(pBuffer, attrib[numVerts - 1]);
             pBuffer += 4;
         }
 
-        mapIdx++;
+        // check for constant source overrides
+        if (IsSwizzledT::value)
+        {
+            uint32_t mask = backendState.swizzleMap[i].componentOverrideMask;
+            if (mask)
+            {
+                DWORD comp;
+                while (_BitScanForward(&comp, mask))
+                {
+                    mask &= ~(1 << comp);
+
+                    float constantValue = 0.0f;
+                    switch ((SWR_CONSTANT_SOURCE)backendState.swizzleMap[i].constantSource)
+                    {
+                    case SWR_CONSTANT_SOURCE_CONST_0000:
+                    case SWR_CONSTANT_SOURCE_CONST_0001_FLOAT:
+                    case SWR_CONSTANT_SOURCE_CONST_1111_FLOAT:
+                        constantValue = constTable[backendState.swizzleMap[i].constantSource][comp];
+                        break;
+                    case SWR_CONSTANT_SOURCE_PRIM_ID:
+                        constantValue = *(float*)&primId;
+                        break;
+                    }
+
+                    // apply constant value to all 3 vertices
+                    for (uint32_t v = 0; v < 3; ++v)
+                    {
+                        pAttribStart[comp + v * 4] = constantValue;
+                    }
+                }
+            }
+        }
     }
 }
 
+
+typedef void(*PFN_PROCESS_ATTRIBUTES)(DRAW_CONTEXT*, PA_STATE&, uint32_t, uint32_t, float*);
+
+struct ProcessAttributesChooser
+{
+    typedef PFN_PROCESS_ATTRIBUTES FuncType;
+
+    template <typename... ArgsB>
+    static FuncType GetFunc()
+    {
+        return ProcessAttributes<ArgsB...>;
+    }
+};
+
+PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzled, bool HasConstantInterp)
+{
+    return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(NumVerts, IsSwizzled, HasConstantInterp);
+}
+
 //////////////////////////////////////////////////////////////////////////
 /// @brief Processes enabled user clip distances. Loads the active clip
 ///        distances from the PA, sets up barycentric equations, and
@@ -1742,6 +1809,10 @@ void BinTriangles(
     const SWR_GS_STATE& gsState = state.gsState;
     MacroTileMgr *pTileMgr = pDC->pTileMgr;
 
+    // Select attribute processor
+    PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(3,
+        state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
+
 
     simdscalar vRecipW0 = _simd_set1_ps(1.0f);
     simdscalar vRecipW1 = _simd_set1_ps(1.0f);
@@ -1951,8 +2022,7 @@ void BinTriangles(
     // scan remaining valid triangles and bin each separately
     while (_BitScanForward(&triIndex, triMask))
     {
-        uint32_t linkageCount = state.linkageCount;
-        uint32_t linkageMask  = state.linkageMask;
+        uint32_t linkageCount = state.backendState.numAttributes;
         uint32_t numScalarAttribs = linkageCount * 4;
 
         BE_WORK work;
@@ -1972,7 +2042,7 @@ void BinTriangles(
         float *pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
         desc.pAttribs = pAttribs;
         desc.numAttribs = linkageCount;
-        ProcessAttributes<3>(pDC, pa, linkageMask, state.linkageMap, triIndex, desc.pAttribs);
+        pfnProcessAttribs(pDC, pa, triIndex, pPrimID[triIndex], desc.pAttribs);
 
         // store triangle vertex data
         desc.pTriBuffer = (float*)pArena->AllocAligned(4 * 4 * sizeof(float), 16);
@@ -2050,6 +2120,10 @@ void BinPoints(
     const SWR_GS_STATE& gsState = state.gsState;
     const SWR_RASTSTATE& rastState = state.rastState;
 
+    // Select attribute processor
+    PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(1,
+        state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
+
     if (!feState.vpTransformDisable)
     {
         // perspective divide
@@ -2130,12 +2204,13 @@ void BinPoints(
 
         uint32_t *pPrimID = (uint32_t *)&primID;
         DWORD primIndex = 0;
+
+        const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
+
         // scan remaining valid triangles and bin each separately
         while (_BitScanForward(&primIndex, primMask))
         {
-            uint32_t linkageCount = state.linkageCount;
-            uint32_t linkageMask = state.linkageMask;
-
+            uint32_t linkageCount = backendState.numAttributes;
             uint32_t numScalarAttribs = linkageCount * 4;
 
             BE_WORK work;
@@ -2158,7 +2233,7 @@ void BinPoints(
             desc.pAttribs = pAttribs;
             desc.numAttribs = linkageCount;
 
-            ProcessAttributes<1>(pDC, pa, linkageMask, state.linkageMap, primIndex, pAttribs);
+            pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], pAttribs);
 
             // store raster tile aligned x, y, perspective correct z
             float *pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16);
@@ -2265,11 +2340,11 @@ void BinPoints(
         _simd_store_ps((float*)aPrimVertsZ, primVerts.z);
 
         // scan remaining valid prims and bin each separately
+        const SWR_BACKEND_STATE& backendState = state.backendState;
         DWORD primIndex;
         while (_BitScanForward(&primIndex, primMask))
         {
-            uint32_t linkageCount = state.linkageCount;
-            uint32_t linkageMask = state.linkageMask;
+            uint32_t linkageCount = backendState.numAttributes;
             uint32_t numScalarAttribs = linkageCount * 4;
 
             BE_WORK work;
@@ -2290,7 +2365,7 @@ void BinPoints(
             // store active attribs
             desc.pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
             desc.numAttribs = linkageCount;
-            ProcessAttributes<1>(pDC, pa, linkageMask, state.linkageMap, primIndex, desc.pAttribs);
+            pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], desc.pAttribs);
 
             // store point vertex data
             float *pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16);
@@ -2353,6 +2428,10 @@ void BinLines(
     const SWR_FRONTEND_STATE& feState = state.frontendState;
     const SWR_GS_STATE& gsState = state.gsState;
 
+    // Select attribute processor
+    PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(2,
+    state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
+
     simdscalar vRecipW0 = _simd_set1_ps(1.0f);
     simdscalar vRecipW1 = _simd_set1_ps(1.0f);
 
@@ -2485,8 +2564,7 @@ void BinLines(
     DWORD primIndex;
     while (_BitScanForward(&primIndex, primMask))
     {
-        uint32_t linkageCount = state.linkageCount;
-        uint32_t linkageMask = state.linkageMask;
+        uint32_t linkageCount = state.backendState.numAttributes;
         uint32_t numScalarAttribs = linkageCount * 4;
 
         BE_WORK work;
@@ -2507,7 +2585,7 @@ void BinLines(
         // store active attribs
         desc.pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
         desc.numAttribs = linkageCount;
-        ProcessAttributes<2>(pDC, pa, linkageMask, state.linkageMap, primIndex, desc.pAttribs);
+        pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], desc.pAttribs);
 
         // store line vertex data
         desc.pTriBuffer = (float*)pArena->AllocAligned(4 * 4 * sizeof(float), 16);
index 6aa73c1ddf1b2820bbbacf05b5491936c8db3fd2..64932af61456cbeb74bacbec21615264ead5cd01 100644 (file)
@@ -1169,15 +1169,8 @@ struct PA_FACTORY
             topo == TOP_LINE_LIST_ADJ || topo == TOP_LISTSTRIP_ADJ || topo == TOP_TRI_LIST_ADJ || topo == TOP_TRI_STRIP_ADJ)))
         {
             memset(&indexStore, 0, sizeof(indexStore));
-            DWORD numAttribs;
-            if (_BitScanReverse(&numAttribs, state.feAttribMask))
-            {
-                numAttribs++;
-            }
-            else
-            {
-                numAttribs = 0;
-            }
+            uint32_t numAttribs = state.feNumAttributes;
+
             new (&this->paCut) PA_STATE_CUT(pDC, (uint8_t*)&this->vertexStore[0], MAX_NUM_VERTS_PER_PRIM * KNOB_SIMD_WIDTH, 
                 &this->indexStore[0], numVerts, numAttribs, state.topology, false);
             cutPA = true;
index 9fc304a8c3f1b5b3664a0c9f67f0baf6bf3ddd90..0931c82b5d2939a037636c01cfca74700948ffee 100644 (file)
@@ -938,13 +938,34 @@ struct SWR_RASTSTATE
     uint8_t clipDistanceMask;
 };
 
+enum SWR_CONSTANT_SOURCE
+{
+    SWR_CONSTANT_SOURCE_CONST_0000,
+    SWR_CONSTANT_SOURCE_CONST_0001_FLOAT,
+    SWR_CONSTANT_SOURCE_CONST_1111_FLOAT,
+    SWR_CONSTANT_SOURCE_PRIM_ID
+};
+
+struct SWR_ATTRIB_SWIZZLE
+{
+    uint16_t sourceAttrib : 5;          // source attribute 
+    uint16_t constantSource : 2;        // constant source to apply
+    uint16_t componentOverrideMask : 4; // override component with constant source
+};
+
 // backend state
 struct SWR_BACKEND_STATE
 {
-    uint32_t constantInterpolationMask;
-    uint32_t pointSpriteTexCoordMask;
-    uint8_t numAttributes;
-    uint8_t numComponents[KNOB_NUM_ATTRIBUTES];
+    uint32_t constantInterpolationMask;     // bitmask indicating which attributes have constant interpolation
+    uint32_t pointSpriteTexCoordMask;       // bitmask indicating the attribute(s) which should be interpreted as tex coordinates
+
+    uint8_t numAttributes;                  // total number of attributes to send to backend (up to 32)
+    uint8_t numComponents[32];              // number of components to setup per attribute, this reduces some calculations for unneeded components
+
+    bool swizzleEnable;                 // when enabled, core will parse the swizzle map when 
+                                        // setting up attributes for the backend, otherwise
+                                        // all attributes up to numAttributes will be sent
+    SWR_ATTRIB_SWIZZLE swizzleMap[32];
 };
 
 
index 1d8e9a111edd462fe4d13947c84050a832030e71..d3181cd29eceaf0b3106bc96db59b79e554e00f7 100644 (file)
@@ -80,12 +80,12 @@ enum ComponentEnable
 
 enum ComponentControl
 {
-    NoStore     = 0,
-    StoreSrc    = 1,
-    Store0      = 2,
-    Store1Fp    = 3,
-    Store1Int   = 4,
-    StoreVertexId = 5,
+    NoStore         = 0,
+    StoreSrc        = 1,
+    Store0          = 2,
+    Store1Fp        = 3,
+    Store1Int       = 4,
+    StoreVertexId   = 5,
     StoreInstanceId = 6
 };
 
index 4d1b604817b430e2c1abd47bcaa52839bae7c003..ecb4545d13bdf7ef2edd4309513c1e1c47e74d2d 100644 (file)
@@ -157,18 +157,6 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
 {
    struct swr_vertex_shader *swr_vs = ctx->vs;
 
-   swr_vs->linkageMask = 0;
-
-   for (unsigned i = 0; i < swr_vs->info.base.num_outputs; i++) {
-      switch (swr_vs->info.base.output_semantic_name[i]) {
-      case TGSI_SEMANTIC_POSITION:
-         break;
-      default:
-         swr_vs->linkageMask |= (1 << i);
-         break;
-      }
-   }
-
    LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
 
index 5caaa5c7139de654f711901b4e02a1dbe3e8c5cc..dac95ce42e7f55d5636a45832e97704955ab84dd 100644 (file)
@@ -1373,16 +1373,13 @@ swr_update_derived(struct pipe_context *pipe,
       }
    }
 
-   uint32_t linkage = ctx->vs->linkageMask;
-   if (ctx->rasterizer->sprite_coord_enable)
-      linkage |= (1 << ctx->vs->info.base.num_outputs);
-
-   SwrSetLinkage(ctx->swrContext, linkage, NULL);
-
    // set up backend state
    SWR_BACKEND_STATE backendState = {0};
-   backendState.numAttributes = 1;
-   backendState.numComponents[0] = 4;
+   backendState.numAttributes =
+      ctx->vs->info.base.num_outputs - 1 +
+      (ctx->rasterizer->sprite_coord_enable ? 1 : 0);
+   for (unsigned i = 0; i < backendState.numAttributes; i++)
+      backendState.numComponents[i] = 4;
    backendState.constantInterpolationMask =
       ctx->rasterizer->flatshade ?
       ctx->fs->flatConstantMask :
index cb69964127476a8af72e10426c31fd97fac5fd00..dcb1145a3626aa56a0aa551e6718980076afd816 100644 (file)
@@ -53,7 +53,6 @@ typedef ShaderVariant<PFN_PIXEL_KERNEL> VariantFS;
 struct swr_vertex_shader {
    struct pipe_shader_state pipe;
    struct lp_tgsi_info info;
-   unsigned linkageMask;
    std::unordered_map<swr_jit_vs_key, std::unique_ptr<VariantVS>> map;
    SWR_STREAMOUT_STATE soState;
    PFN_SO_FUNC soFunc[PIPE_PRIM_MAX] {0};