* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
-* @file archrast.h
+* @file archrast.cpp
*
-* @brief Definitions for archrast.
+* @brief Implementation for archrast.
*
******************************************************************************/
+#include <atomic>
+
#include "common/os.h"
#include "archrast/archrast.h"
#include "archrast/eventmanager.h"
+#include "gen_ar_eventhandlerfile.hpp"
namespace ArchRast
{
+ //////////////////////////////////////////////////////////////////////////
+ /// @brief struct that keeps track of depth and stencil event information
+ struct DepthStencilStats
+ {
+ uint32_t earlyZTestPassCount = 0;
+ uint32_t earlyZTestFailCount = 0;
+ uint32_t lateZTestPassCount = 0;
+ uint32_t lateZTestFailCount = 0;
+ uint32_t earlyStencilTestPassCount = 0;
+ uint32_t earlyStencilTestFailCount = 0;
+ uint32_t lateStencilTestPassCount = 0;
+ uint32_t lateStencilTestFailCount = 0;
+ };
+
+ struct CStats
+ {
+ uint32_t clippedVerts = 0;
+ };
+
+ struct TEStats
+ {
+ uint32_t inputPrims = 0;
+ //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
+ };
+
+ struct GSStats
+ {
+ uint32_t inputPrimCount;
+ uint32_t primGeneratedCount;
+ uint32_t vertsInput;
+ };
+
+ //////////////////////////////////////////////////////////////////////////
+ /// @brief Event handler that saves stat events to event files. This
+ /// handler filters out unwanted events.
+ class EventHandlerStatsFile : public EventHandlerFile
+ {
+ public:
+ EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id), mNeedFlush(false) {}
+
+ // These are events that we're not interested in saving in stats event files.
+ virtual void Handle(const Start& event) {}
+ virtual void Handle(const End& event) {}
+
+ virtual void Handle(const EarlyDepthStencilInfoSingleSample& event)
+ {
+ //earlyZ test compute
+ mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
+ mDSSingleSample.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+
+ //earlyStencil test compute
+ mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
+ mDSSingleSample.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mNeedFlush = true;
+ }
+
+ virtual void Handle(const EarlyDepthStencilInfoSampleRate& event)
+ {
+ //earlyZ test compute
+ mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
+ mDSSampleRate.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+
+ //earlyStencil test compute
+ mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
+ mDSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mNeedFlush = true;
+ }
+
+ virtual void Handle(const EarlyDepthStencilInfoNullPS& event)
+ {
+ //earlyZ test compute
+ mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
+ mDSNullPS.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+
+ //earlyStencil test compute
+ mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
+ mDSNullPS.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mNeedFlush = true;
+ }
+
+ virtual void Handle(const LateDepthStencilInfoSingleSample& event)
+ {
+ //lateZ test compute
+ mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
+ mDSSingleSample.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+
+ //lateStencil test compute
+ mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
+ mDSSingleSample.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mNeedFlush = true;
+ }
+
+ virtual void Handle(const LateDepthStencilInfoSampleRate& event)
+ {
+ //lateZ test compute
+ mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
+ mDSSampleRate.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+
+ //lateStencil test compute
+ mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
+ mDSSampleRate.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mNeedFlush = true;
+ }
+
+ virtual void Handle(const LateDepthStencilInfoNullPS& event)
+ {
+ //lateZ test compute
+ mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
+ mDSNullPS.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+
+ //lateStencil test compute
+ mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
+ mDSNullPS.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mNeedFlush = true;
+ }
+
+ virtual void Handle(const EarlyDepthInfoPixelRate& event)
+ {
+ //earlyZ test compute
+ mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
+ mDSPixelRate.earlyZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
+ mNeedFlush = true;
+ }
+
+
+ virtual void Handle(const LateDepthInfoPixelRate& event)
+ {
+ //lateZ test compute
+ mDSPixelRate.lateZTestPassCount += event.data.depthPassCount;
+ mDSPixelRate.lateZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
+ mNeedFlush = true;
+ }
+
+
+ // Flush cached events for this draw
+ virtual void FlushDraw(uint32_t drawId)
+ {
+ if (mNeedFlush == false) return;
+
+ //singleSample
+ EventHandlerFile::Handle(EarlyZSingleSample(drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
+ EventHandlerFile::Handle(LateZSingleSample(drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
+ EventHandlerFile::Handle(EarlyStencilSingleSample(drawId, mDSSingleSample.earlyStencilTestPassCount, mDSSingleSample.earlyStencilTestFailCount));
+ EventHandlerFile::Handle(LateStencilSingleSample(drawId, mDSSingleSample.lateStencilTestPassCount, mDSSingleSample.lateStencilTestFailCount));
+
+ //sampleRate
+ EventHandlerFile::Handle(EarlyZSampleRate(drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
+ EventHandlerFile::Handle(LateZSampleRate(drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
+ EventHandlerFile::Handle(EarlyStencilSampleRate(drawId, mDSSampleRate.earlyStencilTestPassCount, mDSSampleRate.earlyStencilTestFailCount));
+ EventHandlerFile::Handle(LateStencilSampleRate(drawId, mDSSampleRate.lateStencilTestPassCount, mDSSampleRate.lateStencilTestFailCount));
+
+ //pixelRate
+ EventHandlerFile::Handle(EarlyZPixelRate(drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
+ EventHandlerFile::Handle(LateZPixelRate(drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
+
+
+ //NullPS
+ EventHandlerFile::Handle(EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
+ EventHandlerFile::Handle(EarlyStencilNullPS(drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
+
+ //Reset Internal Counters
+ mDSSingleSample = {};
+ mDSSampleRate = {};
+ mDSPixelRate = {};
+ mDSNullPS = {};
+
+ mNeedFlush = false;
+ }
+
+ virtual void Handle(const FrontendDrawEndEvent& event)
+ {
+ //Clipper
+ EventHandlerFile::Handle(VertsClipped(event.data.drawId, mClipper.clippedVerts));
+
+ //Tesselator
+ EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims));
+
+ //Geometry Shader
+ EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount));
+ EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
+ EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));
+
+ //Reset Internal Counters
+ mClipper = {};
+ mTS = {};
+ mGS = {};
+ }
+
+ virtual void Handle(const GSPrimInfo& event)
+ {
+ mGS.inputPrimCount += event.data.inputPrimCount;
+ mGS.primGeneratedCount += event.data.primGeneratedCount;
+ mGS.vertsInput += event.data.vertsInput;
+ }
+
+ virtual void Handle(const ClipVertexCount& event)
+ {
+ mClipper.clippedVerts += (_mm_popcnt_u32(event.data.primMask) * event.data.vertsPerPrim);
+ }
+
+ virtual void Handle(const TessPrimCount& event)
+ {
+ mTS.inputPrims += event.data.primCount;
+ }
+
+ protected:
+ bool mNeedFlush;
+ // Per draw stats
+ DepthStencilStats mDSSingleSample = {};
+ DepthStencilStats mDSSampleRate = {};
+ DepthStencilStats mDSPixelRate = {};
+ DepthStencilStats mDSNullPS = {};
+ DepthStencilStats mDSOmZ = {};
+ CStats mClipper = {};
+ TEStats mTS = {};
+ GSStats mGS = {};
+
+ };
+
static EventManager* FromHandle(HANDLE hThreadContext)
{
return reinterpret_cast<EventManager*>(hThreadContext);
}
// Construct an event manager and associate a handler with it.
- HANDLE CreateThreadContext()
+ HANDLE CreateThreadContext(AR_THREAD type)
{
+ // Can we assume single threaded here?
+ static std::atomic<uint32_t> counter(0);
+ uint32_t id = counter.fetch_add(1);
+
EventManager* pManager = new EventManager();
- EventHandler* pHandler = new EventHandler();
+ EventHandlerFile* pHandler = new EventHandlerStatsFile(id);
if (pManager && pHandler)
{
- pManager->attach(pHandler);
+ pManager->Attach(pHandler);
+
+ if (type == AR_THREAD::API)
+ {
+ pHandler->Handle(ThreadStartApiEvent());
+ }
+ else
+ {
+ pHandler->Handle(ThreadStartWorkerEvent());
+ }
+ pHandler->MarkHeader();
return pManager;
}
- SWR_ASSERT(0, "Failed to register thread.");
+ SWR_INVALID("Failed to register thread.");
return nullptr;
}
}
// Dispatch event for this thread.
- void dispatch(HANDLE hThreadContext, Event& event)
+ void Dispatch(HANDLE hThreadContext, const Event& event)
+ {
+ EventManager* pManager = FromHandle(hThreadContext);
+ SWR_ASSERT(pManager != nullptr);
+
+ pManager->Dispatch(event);
+ }
+
+ // Flush for this thread.
+ void FlushDraw(HANDLE hThreadContext, uint32_t drawId)
{
EventManager* pManager = FromHandle(hThreadContext);
SWR_ASSERT(pManager != nullptr);
- pManager->dispatch(event);
+ pManager->FlushDraw(drawId);
}
}