swr/rasterizer: Fix GS attributes processing
authorJan Zielinski <jan.zielinski@intel.com>
Fri, 2 Aug 2019 09:59:03 +0000 (11:59 +0200)
committerJan Zielinski <jan.zielinski@intel.com>
Fri, 30 Aug 2019 07:31:45 +0000 (07:31 +0000)
Input to GS is just a set of attributes, so remove explicit setup of
'position' which is meaningless for GS input processing.

Reviewed-by: Alok Hota <alok.hota@intel.com>
src/gallium/drivers/swr/rasterizer/core/frontend.cpp
src/gallium/drivers/swr/rasterizer/core/state.h
src/gallium/drivers/swr/swr_shader.cpp

index 1aa98f49fd7b4a8670d31918e370772157d2c5ba..13e92e8640a4b2b8ef571a3549f5d3d339348790 100644 (file)
@@ -851,29 +851,21 @@ static void GeometryShaderStage(DRAW_CONTEXT* pDC,
     gsContext.inputVertStride = pState->inputVertStride;
     for (uint32_t slot = 0; slot < pState->numInputAttribs; ++slot)
     {
-        uint32_t srcAttribSlot = pState->srcVertexAttribOffset + slot;
-        uint32_t attribSlot    = pState->vertexAttribOffset + slot;
-        pa.Assemble(srcAttribSlot, attrib);
+        uint32_t attribOffset = slot + pState->vertexAttribOffset;
+        pa.Assemble(attribOffset, attrib);
 
         for (uint32_t i = 0; i < numVertsPerPrim; ++i)
         {
-            gsContext.pVerts[attribSlot + pState->inputVertStride * i] = attrib[i];
+            gsContext.pVerts[attribOffset + pState->inputVertStride * i] = attrib[i];
         }
     }
 
-    // assemble position
-    pa.Assemble(VERTEX_POSITION_SLOT, attrib);
-    for (uint32_t i = 0; i < numVertsPerPrim; ++i)
-    {
-        gsContext.pVerts[VERTEX_POSITION_SLOT + pState->inputVertStride * i] = attrib[i];
-    }
-
     // record valid prims from the frontend to avoid over binning the newly generated
     // prims from the GS
 #if USE_SIMD16_FRONTEND
     uint32_t numInputPrims = numPrims_simd8;
 #else
-    uint32_t          numInputPrims = pa.NumPrims();
+    uint32_t numInputPrims = pa.NumPrims();
 #endif
 
     for (uint32_t instance = 0; instance < pState->instanceCount; ++instance)
index 66144bbef7546e09ead4c282a3e280b6d7ef0767..66a23bd9b0879add161a20cdcff12cacd002a3fe 100644 (file)
@@ -747,13 +747,11 @@ struct SWR_GS_STATE
     // Total amount of memory to allocate for one instance of the shader output in bytes
     uint32_t allocationSize;
 
-    // Offset to the start of the attributes of the input vertices, in simdvector units, as read by
-    // the GS
+    // Offset to start reading data per input vertex in simdvector units. This can be used to
+    // skip over any vertex data output from the previous stage that is unused in the GS, removing
+    // unnecessary vertex processing.
     uint32_t vertexAttribOffset;
 
-    // Offset to the attributes as stored by the preceding shader stage.
-    uint32_t srcVertexAttribOffset;
-
     // Size of the control data section which contains cut or streamID data, in simdscalar units.
     // Should be sized to handle the maximum number of verts output by the GS. Can be 0 if there are
     // no cuts or streamID bits.
@@ -772,10 +770,7 @@ struct SWR_GS_STATE
     // shader is expected to store the final vertex count in the first dword of the gs output
     // stream.
     uint32_t staticVertexCount;
-
-    uint32_t pad;
 };
-static_assert(sizeof(SWR_GS_STATE) == 64, "Adjust padding to keep size (or remove this assert)");
 
 //////////////////////////////////////////////////////////////////////////
 /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
index cbffaef0b1245837fb810d16beeb28bbbd337727..9b27652d8bc21cb014051497ac2ee2e881a42d42 100644 (file)
@@ -555,7 +555,7 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
 
    pGS->gsEnable = true;
 
-   pGS->numInputAttribs = info->num_inputs;
+   pGS->numInputAttribs = (VERTEX_ATTRIB_START_SLOT - VERTEX_POSITION_SLOT) + info->num_inputs;
    pGS->outputTopology =
       swr_convert_prim_topology(info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]);
    pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
@@ -565,8 +565,7 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
    pGS->isSingleStream = true;
    pGS->singleStreamID = 0;
 
-   pGS->vertexAttribOffset = VERTEX_ATTRIB_START_SLOT; // TODO: optimize
-   pGS->srcVertexAttribOffset = VERTEX_ATTRIB_START_SLOT; // TODO: optimize
+   pGS->vertexAttribOffset = VERTEX_POSITION_SLOT;
    pGS->inputVertStride = pGS->numInputAttribs + pGS->vertexAttribOffset;
    pGS->outputVertexSize = SWR_VTX_NUM_SLOTS;
    pGS->controlDataSize = 8; // GS ouputs max of 8 32B units
@@ -793,7 +792,7 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
    pWorkerData->setName("pWorkerData");
    Value *pVsCtx = &*argitr++;
    pVsCtx->setName("vsCtx");
-   
+
    Value *consts_ptr = GEP(hPrivateData, {C(0), C(swr_draw_context_constantVS)});
 
    consts_ptr->setName("vs_constants");