swr/rast: Fix allocation of DS output data for USE_SIMD16_FRONTEND
authorTim Rowley <timothy.o.rowley@intel.com>
Fri, 15 Sep 2017 23:53:47 +0000 (18:53 -0500)
committerTim Rowley <timothy.o.rowley@intel.com>
Mon, 25 Sep 2017 18:38:57 +0000 (13:38 -0500)
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
src/gallium/drivers/swr/rasterizer/core/frontend.cpp

index 22a5705c480e8af6d528a7262ce1f30cee7be154..aea8e88de4d87d653e9be0cbc6d3dcf0491690a6 100644 (file)
@@ -1062,7 +1062,7 @@ struct TessellationThreadLocalData
     size_t tsCtxSize;
 
     simdscalar* pDSOutput;
-    size_t numDSOutputVectors;
+    size_t dsOutputAllocSize;
 };
 
 THREAD TessellationThreadLocalData* gt_pTessellationThreadData = nullptr;
@@ -1210,24 +1210,20 @@ static void TessellationStages(
 
         // Allocate DS Output memory
         uint32_t requiredDSVectorInvocations = AlignUp(tsData.NumDomainPoints, KNOB_SIMD_WIDTH) / KNOB_SIMD_WIDTH;
-        size_t requiredDSOutputVectors = requiredDSVectorInvocations * tsState.numDsOutputAttribs;
 #if USE_SIMD16_FRONTEND
         size_t requiredAllocSize = sizeof(simdvector) * RoundUpEven(requiredDSVectorInvocations) * tsState.numDsOutputAttribs;      // simd8 -> simd16, padding
 #else
+        size_t requiredDSOutputVectors = requiredDSVectorInvocations * tsState.numDsOutputAttribs;
         size_t requiredAllocSize = sizeof(simdvector) * requiredDSOutputVectors;
 #endif
-        if (requiredDSOutputVectors > gt_pTessellationThreadData->numDSOutputVectors)
+        if (requiredAllocSize > gt_pTessellationThreadData->dsOutputAllocSize)
         {
             AlignedFree(gt_pTessellationThreadData->pDSOutput);
             gt_pTessellationThreadData->pDSOutput = (simdscalar*)AlignedMalloc(requiredAllocSize, 64);
-#if USE_SIMD16_FRONTEND
-            gt_pTessellationThreadData->numDSOutputVectors = RoundUpEven(requiredDSVectorInvocations) * tsState.numDsOutputAttribs; // simd8 -> simd16, padding
-#else
-            gt_pTessellationThreadData->numDSOutputVectors = requiredDSOutputVectors;
-#endif
+            gt_pTessellationThreadData->dsOutputAllocSize = requiredAllocSize;
         }
         SWR_ASSERT(gt_pTessellationThreadData->pDSOutput);
-        SWR_ASSERT(gt_pTessellationThreadData->numDSOutputVectors >= requiredDSOutputVectors);
+        SWR_ASSERT(gt_pTessellationThreadData->dsOutputAllocSize >= requiredAllocSize);
 
 #if defined(_DEBUG)
         memset(gt_pTessellationThreadData->pDSOutput, 0x90, requiredAllocSize);
@@ -1356,7 +1352,7 @@ static void TessellationStages(
         AlignedFree(gt_pTessellationThreadData->pDSOutput);
         gt_pTessellationThreadData->pDSOutput = nullptr;
     }
-    gt_pTessellationThreadData->numDSOutputVectors = 0;
+    gt_pTessellationThreadData->dsOutputAllocSize = 0;
 
 #endif
     TSDestroyCtx(tsCtx);