swr: [rasterizer archrast/core/scripts] Fix archrast multithreading issue
authorTim Rowley <timothy.o.rowley@intel.com>
Sat, 18 Feb 2017 08:29:06 +0000 (00:29 -0800)
committerTim Rowley <timothy.o.rowley@intel.com>
Mon, 20 Mar 2017 23:04:53 +0000 (18:04 -0500)
Per pixel stats are cached but were not always being flushed as threads
moved from one draw context to the next.  Added an explicit flush to allow
all archrast objects to flush any cached events.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
src/gallium/drivers/swr/rasterizer/archrast/archrast.h
src/gallium/drivers/swr/rasterizer/archrast/eventmanager.h
src/gallium/drivers/swr/rasterizer/core/context.h
src/gallium/drivers/swr/rasterizer/core/threads.cpp
src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandler_h.template

index ad4d20c24791a7331bb53de6e4fee5f1bd4fac39..acd0a0f50f352611a3b90ae35e14f10e48c2790b 100644 (file)
@@ -72,7 +72,7 @@ namespace ArchRast
     class EventHandlerStatsFile : public EventHandlerFile
     {
     public:
-        EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id) {}
+        EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id), mNeedFlush(false) {}
 
         // These are events that we're not interested in saving in stats event files.
         virtual void Handle(const Start& event) {}
@@ -87,6 +87,7 @@ namespace ArchRast
             //earlyStencil test compute
             mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
             mDSSingleSample.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mNeedFlush = true;
         }
 
         virtual void Handle(const EarlyDepthStencilInfoSampleRate& event)
@@ -98,6 +99,7 @@ namespace ArchRast
             //earlyStencil test compute
             mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
             mDSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mNeedFlush = true;
         }
 
         virtual void Handle(const EarlyDepthStencilInfoNullPS& event)
@@ -109,6 +111,7 @@ namespace ArchRast
             //earlyStencil test compute
             mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
             mDSNullPS.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mNeedFlush = true;
         }
 
         virtual void Handle(const LateDepthStencilInfoSingleSample& event)
@@ -120,6 +123,7 @@ namespace ArchRast
             //lateStencil test compute
             mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
             mDSSingleSample.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mNeedFlush = true;
         }
 
         virtual void Handle(const LateDepthStencilInfoSampleRate& event)
@@ -131,6 +135,7 @@ namespace ArchRast
             //lateStencil test compute
             mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
             mDSSampleRate.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mNeedFlush = true;
         }
 
         virtual void Handle(const LateDepthStencilInfoNullPS& event)
@@ -142,6 +147,7 @@ namespace ArchRast
             //lateStencil test compute
             mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
             mDSNullPS.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mNeedFlush = true;
         }
 
         virtual void Handle(const EarlyDepthInfoPixelRate& event)
@@ -149,6 +155,7 @@ namespace ArchRast
             //earlyZ test compute
             mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
             mDSPixelRate.earlyZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
+            mNeedFlush = true;
         }
 
 
@@ -157,38 +164,43 @@ namespace ArchRast
             //lateZ test compute
             mDSPixelRate.lateZTestPassCount += event.data.depthPassCount;
             mDSPixelRate.lateZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
-
+            mNeedFlush = true;
         }
 
 
-        virtual void Handle(const BackendDrawEndEvent& event)
+        // Flush cached events for this draw
+        virtual void FlushDraw(uint32_t drawId)
         {
+            if (mNeedFlush == false) return;
+
             //singleSample
-            EventHandlerFile::Handle(EarlyZSingleSample(event.data.drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
-            EventHandlerFile::Handle(LateZSingleSample(event.data.drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
-            EventHandlerFile::Handle(EarlyStencilSingleSample(event.data.drawId, mDSSingleSample.earlyStencilTestPassCount, mDSSingleSample.earlyStencilTestFailCount));
-            EventHandlerFile::Handle(LateStencilSingleSample(event.data.drawId, mDSSingleSample.lateStencilTestPassCount, mDSSingleSample.lateStencilTestFailCount));
+            EventHandlerFile::Handle(EarlyZSingleSample(drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
+            EventHandlerFile::Handle(LateZSingleSample(drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
+            EventHandlerFile::Handle(EarlyStencilSingleSample(drawId, mDSSingleSample.earlyStencilTestPassCount, mDSSingleSample.earlyStencilTestFailCount));
+            EventHandlerFile::Handle(LateStencilSingleSample(drawId, mDSSingleSample.lateStencilTestPassCount, mDSSingleSample.lateStencilTestFailCount));
 
             //sampleRate
-            EventHandlerFile::Handle(EarlyZSampleRate(event.data.drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
-            EventHandlerFile::Handle(LateZSampleRate(event.data.drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
-            EventHandlerFile::Handle(EarlyStencilSampleRate(event.data.drawId, mDSSampleRate.earlyStencilTestPassCount, mDSSampleRate.earlyStencilTestFailCount));
-            EventHandlerFile::Handle(LateStencilSampleRate(event.data.drawId, mDSSampleRate.lateStencilTestPassCount, mDSSampleRate.lateStencilTestFailCount));
+            EventHandlerFile::Handle(EarlyZSampleRate(drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
+            EventHandlerFile::Handle(LateZSampleRate(drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
+            EventHandlerFile::Handle(EarlyStencilSampleRate(drawId, mDSSampleRate.earlyStencilTestPassCount, mDSSampleRate.earlyStencilTestFailCount));
+            EventHandlerFile::Handle(LateStencilSampleRate(drawId, mDSSampleRate.lateStencilTestPassCount, mDSSampleRate.lateStencilTestFailCount));
 
             //pixelRate
-            EventHandlerFile::Handle(EarlyZPixelRate(event.data.drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
-            EventHandlerFile::Handle(LateZPixelRate(event.data.drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
+            EventHandlerFile::Handle(EarlyZPixelRate(drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
+            EventHandlerFile::Handle(LateZPixelRate(drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
 
 
             //NullPS
-            EventHandlerFile::Handle(EarlyZNullPS(event.data.drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
-            EventHandlerFile::Handle(EarlyStencilNullPS(event.data.drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
+            EventHandlerFile::Handle(EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
+            EventHandlerFile::Handle(EarlyStencilNullPS(drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
 
             //Reset Internal Counters
             mDSSingleSample = {};
             mDSSampleRate = {};
             mDSPixelRate = {};
             mDSNullPS = {};
+
+            mNeedFlush = false;
         }
 
         virtual void Handle(const FrontendDrawEndEvent& event)
@@ -228,7 +240,7 @@ namespace ArchRast
         }
 
     protected:
-
+        bool mNeedFlush;
         // Per draw stats
         DepthStencilStats mDSSingleSample = {};
         DepthStencilStats mDSSampleRate = {};
@@ -294,4 +306,12 @@ namespace ArchRast
         pManager->Dispatch(event);
     }
 
+    // Flush for this thread.
+    void FlushDraw(HANDLE hThreadContext, uint32_t drawId)
+    {
+        EventManager* pManager = FromHandle(hThreadContext);
+        SWR_ASSERT(pManager != nullptr);
+
+        pManager->FlushDraw(drawId);
+    }
 }
index 4783144fcb5702f60f42b30b6429295e8ee5bf47..c0f9d6a81944bf82005c06bddaac3421b6b5117c 100644 (file)
@@ -43,5 +43,6 @@ namespace ArchRast
 
     // Dispatch event for this thread.
     void Dispatch(HANDLE hThreadContext, Event& event);
+    void FlushDraw(HANDLE hThreadContext, uint32_t drawId);
 };
 
index 78ba8f3e2d746377521f4c22a3a5a6be8f07ff73..88edc03f4f4bad026f7994404d8159d508e479f8 100644 (file)
@@ -69,6 +69,14 @@ namespace ArchRast
                 event.Accept(pHandler);
             }
         }
+
+        void FlushDraw(uint32_t drawId)
+        {
+            for (auto pHandler : mHandlers)
+            {
+                pHandler->FlushDraw(drawId);
+            }
+        }
     private:
 
         // Handlers stay registered for life
index e937a631b9e39bfb116e6bbe92ed812fed47c466..9da7962826ca2fa1b100204f466889d1fbe3a7c2 100644 (file)
@@ -525,6 +525,7 @@ struct SWR_CONTEXT
     #define _AR_BEGIN(ctx, type, id)    ArchRast::Dispatch(ctx, ArchRast::Start(ArchRast::type, id))
     #define _AR_END(ctx, type, count)   ArchRast::Dispatch(ctx, ArchRast::End(ArchRast::type, count))
     #define _AR_EVENT(ctx, event)       ArchRast::Dispatch(ctx, ArchRast::event)
+    #define _AR_FLUSH(ctx, id)          ArchRast::FlushDraw(ctx, id)
 #else
     #ifdef KNOB_ENABLE_RDTSC
         #define _AR_BEGIN(ctx, type, id) (void)ctx; RDTSC_START(type)
@@ -534,6 +535,7 @@ struct SWR_CONTEXT
         #define _AR_END(ctx, type, id)
     #endif
     #define _AR_EVENT(ctx, event)
+    #define _AR_FLUSH(ctx, id)
 #endif
 
 // Use these macros for api thread.
@@ -545,3 +547,4 @@ struct SWR_CONTEXT
 #define AR_BEGIN(type, id) _AR_BEGIN(AR_WORKER_CTX, type, id)
 #define AR_END(type, count) _AR_END(AR_WORKER_CTX, type, count)
 #define AR_EVENT(event) _AR_EVENT(AR_WORKER_CTX, event)
+#define AR_FLUSH(id) _AR_FLUSH(AR_WORKER_CTX, id)
index e11291bb83e9c9f1a090f9b97607fd7776f01cad..0f6c94c6550f9929d228a634d60687399509fc68 100644 (file)
@@ -396,6 +396,8 @@ INLINE int32_t CompleteDrawContextInl(SWR_CONTEXT* pContext, uint32_t workerId,
     int32_t result = InterlockedDecrement((volatile LONG*)&pDC->threadsDone);
     SWR_ASSERT(result >= 0);
 
+    AR_FLUSH(pDC->drawId);
+
     if (result == 0)
     {
         ExecuteCallbacks(pContext, workerId, pDC);
index e6cacd75a2718fe014bee6caac6bf953c2fb8134..cfed2aded0c47519cfbd8bc75fa31f700fe68923 100644 (file)
@@ -42,6 +42,8 @@ namespace ArchRast
         EventHandler() {}
         virtual ~EventHandler() {}
 
+        virtual void FlushDraw(uint32_t drawId) {}
+
 % for name in protos['event_names']:
         virtual void Handle(const ${name}& event) {}
 % endfor