1 /****************************************************************************
2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Implementation for archrast.
27 ******************************************************************************/
30 #include "common/os.h"
31 #include "archrast/archrast.h"
32 #include "archrast/eventmanager.h"
33 #include "gen_ar_eventhandlerfile.h"
37 //////////////////////////////////////////////////////////////////////////
38 /// @brief struct that keeps track of depth and stencil event information
39 struct DepthStencilStats
41 uint32_t earlyZTestPassCount
= 0;
42 uint32_t earlyZTestFailCount
= 0;
43 uint32_t lateZTestPassCount
= 0;
44 uint32_t lateZTestFailCount
= 0;
45 uint32_t earlyStencilTestPassCount
= 0;
46 uint32_t earlyStencilTestFailCount
= 0;
47 uint32_t lateStencilTestPassCount
= 0;
48 uint32_t lateStencilTestFailCount
= 0;
49 uint32_t earlyZTestCount
= 0;
50 uint32_t lateZTestCount
= 0;
51 uint32_t earlyStencilTestCount
= 0;
52 uint32_t lateStencilTestCount
= 0;
57 uint32_t clippedVerts
= 0;
62 uint32_t inputPrims
= 0;
63 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
68 uint32_t inputPrimCount
;
69 uint32_t primGeneratedCount
;
73 //////////////////////////////////////////////////////////////////////////
74 /// @brief Event handler that saves stat events to event files. This
75 /// handler filters out unwanted events.
76 class EventHandlerStatsFile
: public EventHandlerFile
79 EventHandlerStatsFile(uint32_t id
) : EventHandlerFile(id
) {}
81 // These are events that we're not interested in saving in stats event files.
82 virtual void Handle(const Start
& event
) {}
83 virtual void Handle(const End
& event
) {}
85 virtual void Handle(const EarlyDepthStencilInfoSingleSample
& event
)
88 mDSSingleSample
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
89 mDSSingleSample
.earlyZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
90 mDSSingleSample
.earlyZTestCount
+= (_mm_popcnt_u32(event
.data
.depthPassMask
) + _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
));
92 //earlyStencil test compute
93 mDSSingleSample
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
94 mDSSingleSample
.earlyStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
95 mDSSingleSample
.earlyStencilTestCount
+= (_mm_popcnt_u32(event
.data
.stencilPassMask
) + _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
));
98 virtual void Handle(const EarlyDepthStencilInfoSampleRate
& event
)
100 //earlyZ test compute
101 mDSSampleRate
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
102 mDSSampleRate
.earlyZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
103 mDSSampleRate
.earlyZTestCount
+= (_mm_popcnt_u32(event
.data
.depthPassMask
) + _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
));
105 //earlyStencil test compute
106 mDSSampleRate
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
107 mDSSampleRate
.earlyStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
108 mDSSampleRate
.earlyStencilTestCount
+= (_mm_popcnt_u32(event
.data
.stencilPassMask
) + _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
));
111 virtual void Handle(const EarlyDepthStencilInfoNullPS
& event
)
113 //earlyZ test compute
114 mDSNullPS
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
115 mDSNullPS
.earlyZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
116 mDSNullPS
.earlyZTestCount
+= (_mm_popcnt_u32(event
.data
.depthPassMask
) + _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
));
118 //earlyStencil test compute
119 mDSNullPS
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
120 mDSNullPS
.earlyStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
121 mDSNullPS
.earlyStencilTestCount
+= (_mm_popcnt_u32(event
.data
.stencilPassMask
) + _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
));
124 virtual void Handle(const LateDepthStencilInfoSingleSample
& event
)
127 mDSSingleSample
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
128 mDSSingleSample
.lateZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
129 mDSSingleSample
.lateZTestCount
+= (_mm_popcnt_u32(event
.data
.depthPassMask
) + _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
));
131 //lateStencil test compute
132 mDSSingleSample
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
133 mDSSingleSample
.lateStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
134 mDSSingleSample
.lateStencilTestCount
+= (_mm_popcnt_u32(event
.data
.stencilPassMask
) + _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
));
137 virtual void Handle(const LateDepthStencilInfoSampleRate
& event
)
140 mDSSampleRate
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
141 mDSSampleRate
.lateZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
142 mDSSampleRate
.lateZTestCount
+= (_mm_popcnt_u32(event
.data
.depthPassMask
) + _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
));
144 //lateStencil test compute
145 mDSSampleRate
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
146 mDSSampleRate
.lateStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
147 mDSSampleRate
.lateStencilTestCount
+= (_mm_popcnt_u32(event
.data
.stencilPassMask
) + _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
));
150 virtual void Handle(const LateDepthStencilInfoNullPS
& event
)
153 mDSNullPS
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
154 mDSNullPS
.lateZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
155 mDSNullPS
.lateZTestCount
+= (_mm_popcnt_u32(event
.data
.depthPassMask
) + _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
));
157 //lateStencil test compute
158 mDSNullPS
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
159 mDSNullPS
.lateStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
160 mDSNullPS
.lateStencilTestCount
+= (_mm_popcnt_u32(event
.data
.stencilPassMask
) + _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
));
163 virtual void Handle(const EarlyDepthInfoPixelRate
& event
)
165 //earlyZ test compute
166 mDSPixelRate
.earlyZTestCount
+= _mm_popcnt_u32(event
.data
.activeLanes
);
167 mDSPixelRate
.earlyZTestPassCount
+= event
.data
.depthPassCount
;
168 mDSPixelRate
.earlyZTestFailCount
+= (_mm_popcnt_u32(event
.data
.activeLanes
) - event
.data
.depthPassCount
);
172 virtual void Handle(const LateDepthInfoPixelRate
& event
)
175 mDSPixelRate
.lateZTestCount
+= _mm_popcnt_u32(event
.data
.activeLanes
);
176 mDSPixelRate
.lateZTestPassCount
+= event
.data
.depthPassCount
;
177 mDSPixelRate
.lateZTestFailCount
+= (_mm_popcnt_u32(event
.data
.activeLanes
) - event
.data
.depthPassCount
);
182 virtual void Handle(const BackendDrawEndEvent
& event
)
185 EventHandlerFile::Handle(EarlyZSingleSample(event
.data
.drawId
, mDSSingleSample
.earlyZTestPassCount
, mDSSingleSample
.earlyZTestFailCount
, mDSSingleSample
.earlyZTestCount
));
186 EventHandlerFile::Handle(LateZSingleSample(event
.data
.drawId
, mDSSingleSample
.lateZTestPassCount
, mDSSingleSample
.lateZTestFailCount
, mDSSingleSample
.lateZTestCount
));
187 EventHandlerFile::Handle(EarlyStencilSingleSample(event
.data
.drawId
, mDSSingleSample
.earlyStencilTestPassCount
, mDSSingleSample
.earlyStencilTestFailCount
, mDSSingleSample
.earlyStencilTestCount
));
188 EventHandlerFile::Handle(LateStencilSingleSample(event
.data
.drawId
, mDSSingleSample
.lateStencilTestPassCount
, mDSSingleSample
.lateStencilTestFailCount
, mDSSingleSample
.lateStencilTestCount
));
191 EventHandlerFile::Handle(EarlyZSampleRate(event
.data
.drawId
, mDSSampleRate
.earlyZTestPassCount
, mDSSampleRate
.earlyZTestFailCount
, mDSSampleRate
.earlyZTestCount
));
192 EventHandlerFile::Handle(LateZSampleRate(event
.data
.drawId
, mDSSampleRate
.lateZTestPassCount
, mDSSampleRate
.lateZTestFailCount
, mDSSampleRate
.lateZTestCount
));
193 EventHandlerFile::Handle(EarlyStencilSampleRate(event
.data
.drawId
, mDSSampleRate
.earlyStencilTestPassCount
, mDSSampleRate
.earlyStencilTestFailCount
, mDSSampleRate
.earlyStencilTestCount
));
194 EventHandlerFile::Handle(LateStencilSampleRate(event
.data
.drawId
, mDSSampleRate
.lateStencilTestPassCount
, mDSSampleRate
.lateStencilTestFailCount
, mDSSampleRate
.lateStencilTestCount
));
197 EventHandlerFile::Handle(EarlyZPixelRate(event
.data
.drawId
, mDSPixelRate
.earlyZTestPassCount
, mDSPixelRate
.earlyZTestFailCount
, mDSPixelRate
.earlyZTestCount
));
198 EventHandlerFile::Handle(LateZPixelRate(event
.data
.drawId
, mDSPixelRate
.lateZTestPassCount
, mDSPixelRate
.lateZTestFailCount
, mDSPixelRate
.lateZTestCount
));
202 EventHandlerFile::Handle(EarlyZNullPS(event
.data
.drawId
, mDSNullPS
.earlyZTestPassCount
, mDSNullPS
.earlyZTestFailCount
, mDSNullPS
.earlyZTestCount
));
203 EventHandlerFile::Handle(EarlyStencilNullPS(event
.data
.drawId
, mDSNullPS
.earlyStencilTestPassCount
, mDSNullPS
.earlyStencilTestFailCount
, mDSNullPS
.earlyStencilTestCount
));
205 //Reset Internal Counters
206 mDSSingleSample
= {};
212 virtual void Handle(const FrontendDrawEndEvent
& event
)
215 EventHandlerFile::Handle(VertsClipped(event
.data
.drawId
, mClipper
.clippedVerts
));
218 EventHandlerFile::Handle(TessPrims(event
.data
.drawId
, mTS
.inputPrims
));
221 EventHandlerFile::Handle(GSInputPrims(event
.data
.drawId
, mGS
.inputPrimCount
));
222 EventHandlerFile::Handle(GSPrimsGen(event
.data
.drawId
, mGS
.primGeneratedCount
));
223 EventHandlerFile::Handle(GSVertsInput(event
.data
.drawId
, mGS
.vertsInput
));
225 //Reset Internal Counters
231 virtual void Handle(const GSPrimInfo
& event
)
233 mGS
.inputPrimCount
+= event
.data
.inputPrimCount
;
234 mGS
.primGeneratedCount
+= event
.data
.primGeneratedCount
;
235 mGS
.vertsInput
+= event
.data
.vertsInput
;
238 virtual void Handle(const ClipVertexCount
& event
)
240 mClipper
.clippedVerts
+= (_mm_popcnt_u32(event
.data
.primMask
) * event
.data
.vertsPerPrim
);
243 virtual void Handle(const TessPrimCount
& event
)
245 mTS
.inputPrims
+= event
.data
.primCount
;
251 DepthStencilStats mDSSingleSample
= {};
252 DepthStencilStats mDSSampleRate
= {};
253 DepthStencilStats mDSPixelRate
= {};
254 DepthStencilStats mDSNullPS
= {};
255 DepthStencilStats mDSOmZ
= {};
256 CStats mClipper
= {};
262 static EventManager
* FromHandle(HANDLE hThreadContext
)
264 return reinterpret_cast<EventManager
*>(hThreadContext
);
267 // Construct an event manager and associate a handler with it.
268 HANDLE
CreateThreadContext(AR_THREAD type
)
270 // Can we assume single threaded here?
271 static std::atomic
<uint32_t> counter(0);
272 uint32_t id
= counter
.fetch_add(1);
274 EventManager
* pManager
= new EventManager();
275 EventHandlerFile
* pHandler
= new EventHandlerStatsFile(id
);
277 if (pManager
&& pHandler
)
279 pManager
->Attach(pHandler
);
281 if (type
== AR_THREAD::API
)
283 pHandler
->Handle(ThreadStartApiEvent());
287 pHandler
->Handle(ThreadStartWorkerEvent());
289 pHandler
->MarkHeader();
294 SWR_ASSERT(0, "Failed to register thread.");
298 void DestroyThreadContext(HANDLE hThreadContext
)
300 EventManager
* pManager
= FromHandle(hThreadContext
);
301 SWR_ASSERT(pManager
!= nullptr);
306 // Dispatch event for this thread.
307 void Dispatch(HANDLE hThreadContext
, Event
& event
)
309 EventManager
* pManager
= FromHandle(hThreadContext
);
310 SWR_ASSERT(pManager
!= nullptr);
312 pManager
->Dispatch(event
);