swr/rast: Use different handing for stream masks
authorGeorge Kyriazis <george.kyriazis@intel.com>
Tue, 17 Apr 2018 04:18:28 +0000 (23:18 -0500)
committerGeorge Kyriazis <george.kyriazis@intel.com>
Fri, 27 Apr 2018 19:36:41 +0000 (14:36 -0500)
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
src/gallium/drivers/swr/rasterizer/common/os.h
src/gallium/drivers/swr/rasterizer/core/api.cpp
src/gallium/drivers/swr/rasterizer/core/frontend.cpp
src/gallium/drivers/swr/rasterizer/core/state.h
src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp

index 5cfd12ff72a788cd50cc56ca6f4810fdd7c7aba5..e779562225eab8bf0a1237e01d9659165648a7b7 100644 (file)
@@ -209,6 +209,9 @@ unsigned char _BitScanReverse(unsigned int *Index, unsigned int Mask)
     return (Mask != 0);
 }
 
+#define _BitScanForward64 _BitScanForward
+#define _BitScanReverse64 _BitScanReverse
+
 inline
 void *AlignedMalloc(size_t size, size_t alignment)
 {
index e37e2e4a5384d9c08cea03a2f6e4211eed9c77fc..a2ee85d12b854288ae1d667a3ec10fa5a7e71b30 100644 (file)
@@ -976,14 +976,14 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
 
     if (pState->state.soState.soEnable)
     {
-        uint32_t streamMasks = 0;
+        uint64_t streamMasks = 0;
         for (uint32_t i = 0; i < 4; ++i)
         {
             streamMasks |= pState->state.soState.streamMasks[i];
         }
 
         DWORD maxAttrib;
-        if (_BitScanReverse(&maxAttrib, streamMasks))
+        if (_BitScanReverse64(&maxAttrib, streamMasks))
         {
             pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)(maxAttrib + 1));
         }
index 6e2bab3e2344bec18dd17da2f0487de961b30ffa..1847c3e822df4aaaf56cdaa398a6cec28aeaa35e 100644 (file)
@@ -528,10 +528,10 @@ static void StreamOut(
     for (uint32_t primIndex = 0; primIndex < numPrims; ++primIndex)
     {
         DWORD slot = 0;
-        uint32_t soMask = soState.streamMasks[streamIndex];
+        uint64_t soMask = soState.streamMasks[streamIndex];
 
         // Write all entries into primitive data buffer for SOS.
-        while (_BitScanForward(&slot, soMask))
+        while (_BitScanForward64(&slot, soMask))
         {
             simd4scalar attrib[MAX_NUM_VERTS_PER_PRIM];    // prim attribs (always 4 wide)
             uint32_t paSlot = slot + soState.vertexAttribOffset[streamIndex];
@@ -551,7 +551,7 @@ static void StreamOut(
                 _mm_store_ps((float*)pPrimDataAttrib, attrib[v]);
             }
 
-            soMask &= ~(1 << slot);
+            soMask &= ~(uint64_t(1) << slot);
         }
 
         // Update pPrimData pointer 
index 217cf44c58fee224c62d63e2c648bd96e45bdc76..f160913a6524f41de0edaf948eef91ab7efacd06 100644 (file)
@@ -702,7 +702,7 @@ struct SWR_STREAMOUT_STATE
     // The stream masks specify which attributes are sent to which streams.
     // These masks help the FE to setup the pPrimData buffer that is passed
     // the Stream Output Shader (SOS) function.
-    uint32_t streamMasks[MAX_SO_STREAMS];
+    uint64_t streamMasks[MAX_SO_STREAMS];
 
     // Number of attributes, including position, per vertex that are streamed out.
     // This should match number of bits in stream mask.
index 15a6bc4028960c21c7fec470ce511a1a7e0581ed..f804900291e46da6ff17a3981128692ac5ea0834 100644 (file)
@@ -313,6 +313,7 @@ struct StreamOutJit : public Builder
 
         JitManager::DumpToFile(soFunc, "SoFunc_optimized");
 
+
         return soFunc;
     }
 };
@@ -333,6 +334,7 @@ PFN_SO_FUNC JitStreamoutFunc(HANDLE hJitMgr, const HANDLE hFunc)
 
     pJitMgr->DumpAsm(func, "SoFunc_optimized");
 
+
     return pfnStreamOut;
 }