swr/rast: Slightly more efficient blend jit
[mesa.git] / src / gallium / drivers / swr / rasterizer / archrast / archrast.cpp
index 48dec28a1059de10c6bae606fbc96e8e97d04244..cda161246bba08d73ce625147be05358e1eb1283 100644 (file)
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
-* @file archrast.h
+* @file archrast.cpp
 *
-* @brief Definitions for archrast.
+* @brief Implementation for archrast.
 *
 ******************************************************************************/
+#include <atomic>
+
 #include "common/os.h"
 #include "archrast/archrast.h"
 #include "archrast/eventmanager.h"
+#include "gen_ar_eventhandlerfile.hpp"
 
 namespace ArchRast
 {
+    //////////////////////////////////////////////////////////////////////////
+    /// @brief struct that keeps track of depth and stencil event information
+    struct DepthStencilStats
+    {
+        uint32_t earlyZTestPassCount = 0;
+        uint32_t earlyZTestFailCount = 0;
+        uint32_t lateZTestPassCount = 0;
+        uint32_t lateZTestFailCount = 0;
+        uint32_t earlyStencilTestPassCount = 0;
+        uint32_t earlyStencilTestFailCount = 0;
+        uint32_t lateStencilTestPassCount = 0;
+        uint32_t lateStencilTestFailCount = 0;
+    };
+
+    struct CStats
+    {
+        uint32_t clippedVerts = 0;
+    };
+
+    struct TEStats
+    {
+        uint32_t inputPrims = 0;
+        //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
+    };
+
+    struct GSStats
+    {
+        uint32_t inputPrimCount;
+        uint32_t primGeneratedCount;
+        uint32_t vertsInput;
+    };
+
+    //////////////////////////////////////////////////////////////////////////
+    /// @brief Event handler that saves stat events to event files. This
+    ///        handler filters out unwanted events.
+    class EventHandlerStatsFile : public EventHandlerFile
+    {
+    public:
+        EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id), mNeedFlush(false) {}
+
+        // These are events that we're not interested in saving in stats event files.
+        virtual void Handle(const Start& event) {}
+        virtual void Handle(const End& event) {}
+
+        virtual void Handle(const EarlyDepthStencilInfoSingleSample& event)
+        {
+            //earlyZ test compute
+            mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
+            mDSSingleSample.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+
+            //earlyStencil test compute
+            mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
+            mDSSingleSample.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mNeedFlush = true;
+        }
+
+        virtual void Handle(const EarlyDepthStencilInfoSampleRate& event)
+        {
+            //earlyZ test compute
+            mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
+            mDSSampleRate.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+
+            //earlyStencil test compute
+            mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
+            mDSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mNeedFlush = true;
+        }
+
+        virtual void Handle(const EarlyDepthStencilInfoNullPS& event)
+        {
+            //earlyZ test compute
+            mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
+            mDSNullPS.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+
+            //earlyStencil test compute
+            mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
+            mDSNullPS.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mNeedFlush = true;
+        }
+
+        virtual void Handle(const LateDepthStencilInfoSingleSample& event)
+        {
+            //lateZ test compute
+            mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
+            mDSSingleSample.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+
+            //lateStencil test compute
+            mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
+            mDSSingleSample.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mNeedFlush = true;
+        }
+
+        virtual void Handle(const LateDepthStencilInfoSampleRate& event)
+        {
+            //lateZ test compute
+            mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
+            mDSSampleRate.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+
+            //lateStencil test compute
+            mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
+            mDSSampleRate.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mNeedFlush = true;
+        }
+
+        virtual void Handle(const LateDepthStencilInfoNullPS& event)
+        {
+            //lateZ test compute
+            mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
+            mDSNullPS.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+
+            //lateStencil test compute
+            mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
+            mDSNullPS.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mNeedFlush = true;
+        }
+
+        virtual void Handle(const EarlyDepthInfoPixelRate& event)
+        {
+            //earlyZ test compute
+            mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
+            mDSPixelRate.earlyZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
+            mNeedFlush = true;
+        }
+
+
+        virtual void Handle(const LateDepthInfoPixelRate& event)
+        {
+            //lateZ test compute
+            mDSPixelRate.lateZTestPassCount += event.data.depthPassCount;
+            mDSPixelRate.lateZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
+            mNeedFlush = true;
+        }
+
+
+        // Flush cached events for this draw
+        virtual void FlushDraw(uint32_t drawId)
+        {
+            if (mNeedFlush == false) return;
+
+            //singleSample
+            EventHandlerFile::Handle(EarlyZSingleSample(drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
+            EventHandlerFile::Handle(LateZSingleSample(drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
+            EventHandlerFile::Handle(EarlyStencilSingleSample(drawId, mDSSingleSample.earlyStencilTestPassCount, mDSSingleSample.earlyStencilTestFailCount));
+            EventHandlerFile::Handle(LateStencilSingleSample(drawId, mDSSingleSample.lateStencilTestPassCount, mDSSingleSample.lateStencilTestFailCount));
+
+            //sampleRate
+            EventHandlerFile::Handle(EarlyZSampleRate(drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
+            EventHandlerFile::Handle(LateZSampleRate(drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
+            EventHandlerFile::Handle(EarlyStencilSampleRate(drawId, mDSSampleRate.earlyStencilTestPassCount, mDSSampleRate.earlyStencilTestFailCount));
+            EventHandlerFile::Handle(LateStencilSampleRate(drawId, mDSSampleRate.lateStencilTestPassCount, mDSSampleRate.lateStencilTestFailCount));
+
+            //pixelRate
+            EventHandlerFile::Handle(EarlyZPixelRate(drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
+            EventHandlerFile::Handle(LateZPixelRate(drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
+
+
+            //NullPS
+            EventHandlerFile::Handle(EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
+            EventHandlerFile::Handle(EarlyStencilNullPS(drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
+
+            //Reset Internal Counters
+            mDSSingleSample = {};
+            mDSSampleRate = {};
+            mDSPixelRate = {};
+            mDSNullPS = {};
+
+            mNeedFlush = false;
+        }
+
+        virtual void Handle(const FrontendDrawEndEvent& event)
+        {
+            //Clipper
+            EventHandlerFile::Handle(VertsClipped(event.data.drawId, mClipper.clippedVerts));
+
+            //Tesselator
+            EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims));
+
+            //Geometry Shader
+            EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount));
+            EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
+            EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));
+
+            //Reset Internal Counters
+            mClipper = {};
+            mTS = {};
+            mGS = {};
+        }
+
+        virtual void Handle(const GSPrimInfo& event)
+        {
+            mGS.inputPrimCount += event.data.inputPrimCount;
+            mGS.primGeneratedCount += event.data.primGeneratedCount;
+            mGS.vertsInput += event.data.vertsInput;
+        }
+
+        virtual void Handle(const ClipVertexCount& event)
+        {
+            mClipper.clippedVerts += (_mm_popcnt_u32(event.data.primMask) * event.data.vertsPerPrim);
+        }
+
+        virtual void Handle(const TessPrimCount& event)
+        {
+            mTS.inputPrims += event.data.primCount;
+        }
+
+    protected:
+        bool mNeedFlush;
+        // Per draw stats
+        DepthStencilStats mDSSingleSample = {};
+        DepthStencilStats mDSSampleRate = {};
+        DepthStencilStats mDSPixelRate = {};
+        DepthStencilStats mDSNullPS = {};
+        DepthStencilStats mDSOmZ = {};
+        CStats mClipper = {};
+        TEStats mTS = {};
+        GSStats mGS = {};
+
+    };
+
     static EventManager* FromHandle(HANDLE hThreadContext)
     {
         return reinterpret_cast<EventManager*>(hThreadContext);
     }
 
     // Construct an event manager and associate a handler with it.
-    HANDLE CreateThreadContext()
+    HANDLE CreateThreadContext(AR_THREAD type)
     {
+        // Can we assume single threaded here?
+        static std::atomic<uint32_t> counter(0);
+        uint32_t id = counter.fetch_add(1);
+
         EventManager* pManager = new EventManager();
-        EventHandler* pHandler = new EventHandler();
+        EventHandlerFile* pHandler = new EventHandlerStatsFile(id);
 
         if (pManager && pHandler)
         {
-            pManager->attach(pHandler);
+            pManager->Attach(pHandler);
+
+            if (type == AR_THREAD::API)
+            {
+                pHandler->Handle(ThreadStartApiEvent());
+            }
+            else
+            {
+                pHandler->Handle(ThreadStartWorkerEvent());
+            }
+            pHandler->MarkHeader();
 
             return pManager;
         }
 
-        SWR_ASSERT(0, "Failed to register thread.");
+        SWR_INVALID("Failed to register thread.");
         return nullptr;
     }
 
@@ -62,11 +298,20 @@ namespace ArchRast
     }
 
     // Dispatch event for this thread.
-    void dispatch(HANDLE hThreadContext, Event& event)
+    void Dispatch(HANDLE hThreadContext, const Event& event)
+    {
+        EventManager* pManager = FromHandle(hThreadContext);
+        SWR_ASSERT(pManager != nullptr);
+
+        pManager->Dispatch(event);
+    }
+
+    // Flush for this thread.
+    void FlushDraw(HANDLE hThreadContext, uint32_t drawId)
     {
         EventManager* pManager = FromHandle(hThreadContext);
         SWR_ASSERT(pManager != nullptr);
 
-        pManager->dispatch(event);
+        pManager->FlushDraw(drawId);
     }
 }