1 /****************************************************************************
2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Definitions for archrast.
27 ******************************************************************************/
30 #include "common/os.h"
31 #include "archrast/archrast.h"
32 #include "archrast/eventmanager.h"
33 #include "gen_ar_eventhandlerfile.h"
37 //////////////////////////////////////////////////////////////////////////
38 /// @brief struct that keeps track of depth and stencil event information
39 struct DepthStencilStats
41 uint32_t earlyZTestPassCount
= 0;
42 uint32_t earlyZTestFailCount
= 0;
43 uint32_t lateZTestPassCount
= 0;
44 uint32_t lateZTestFailCount
= 0;
45 uint32_t earlyStencilTestPassCount
= 0;
46 uint32_t earlyStencilTestFailCount
= 0;
47 uint32_t lateStencilTestPassCount
= 0;
48 uint32_t lateStencilTestFailCount
= 0;
49 uint32_t earlyZTestCount
= 0;
50 uint32_t lateZTestCount
= 0;
51 uint32_t earlyStencilTestCount
= 0;
52 uint32_t lateStencilTestCount
= 0;
57 uint32_t clippedVerts
= 0;
62 uint32_t inputPrims
= 0;
63 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
68 uint32_t inputPrimCount
;
69 uint32_t primGeneratedCount
;
73 //////////////////////////////////////////////////////////////////////////
74 /// @brief Event handler that saves stat events to event files. This
75 /// handler filters out unwanted events.
76 class EventHandlerStatsFile
: public EventHandlerFile
79 DepthStencilStats DSSingleSample
= {};
80 DepthStencilStats DSSampleRate
= {};
81 DepthStencilStats DSPixelRate
= {};
82 DepthStencilStats DSNullPS
= {};
83 DepthStencilStats DSOmZ
= {};
88 EventHandlerStatsFile(uint32_t id
) : EventHandlerFile(id
) {}
90 // These are events that we're not interested in saving in stats event files.
91 virtual void Handle(Start event
) {}
92 virtual void Handle(End event
) {}
94 virtual void Handle(EarlyDepthStencilInfoSingleSample event
)
97 DSSingleSample
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
98 DSSingleSample
.earlyZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
99 DSSingleSample
.earlyZTestCount
+= (_mm_popcnt_u32(event
.data
.depthPassMask
) + _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
));
101 //earlyStencil test compute
102 DSSingleSample
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
103 DSSingleSample
.earlyStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
104 DSSingleSample
.earlyStencilTestCount
+= (_mm_popcnt_u32(event
.data
.stencilPassMask
) + _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
));
106 //outputerMerger test compute
107 DSOmZ
.earlyZTestPassCount
+= DSSingleSample
.earlyZTestPassCount
;
108 DSOmZ
.earlyZTestFailCount
+= DSSingleSample
.earlyZTestFailCount
;
109 DSOmZ
.earlyZTestCount
+= DSSingleSample
.earlyZTestCount
;
110 DSOmZ
.earlyStencilTestPassCount
+= DSSingleSample
.earlyStencilTestPassCount
;
111 DSOmZ
.earlyStencilTestFailCount
+= DSSingleSample
.earlyStencilTestFailCount
;
112 DSOmZ
.earlyStencilTestCount
+= DSSingleSample
.earlyStencilTestCount
;
115 virtual void Handle(EarlyDepthStencilInfoSampleRate event
)
117 //earlyZ test compute
118 DSSampleRate
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
119 DSSampleRate
.earlyZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
120 DSSampleRate
.earlyZTestCount
+= (_mm_popcnt_u32(event
.data
.depthPassMask
) + _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
));
122 //earlyStencil test compute
123 DSSampleRate
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
124 DSSampleRate
.earlyStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
125 DSSampleRate
.earlyStencilTestCount
+= (_mm_popcnt_u32(event
.data
.stencilPassMask
) + _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
));
127 //outputerMerger test compute
128 DSOmZ
.earlyZTestPassCount
+= DSSampleRate
.earlyZTestPassCount
;
129 DSOmZ
.earlyZTestFailCount
+= DSSampleRate
.earlyZTestFailCount
;
130 DSOmZ
.earlyZTestCount
+= DSSampleRate
.earlyZTestCount
;
131 DSOmZ
.earlyStencilTestPassCount
+= DSSampleRate
.earlyStencilTestPassCount
;
132 DSOmZ
.earlyStencilTestFailCount
+= DSSampleRate
.earlyStencilTestFailCount
;
133 DSOmZ
.earlyStencilTestCount
+= DSSampleRate
.earlyStencilTestCount
;
136 virtual void Handle(EarlyDepthStencilInfoNullPS event
)
138 //earlyZ test compute
139 DSNullPS
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
140 DSNullPS
.earlyZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
141 DSNullPS
.earlyZTestCount
+= (_mm_popcnt_u32(event
.data
.depthPassMask
) + _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
));
143 //earlyStencil test compute
144 DSNullPS
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
145 DSNullPS
.earlyStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
146 DSNullPS
.earlyStencilTestCount
+= (_mm_popcnt_u32(event
.data
.stencilPassMask
) + _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
));
148 //outputerMerger test compute
149 DSOmZ
.earlyZTestPassCount
+= DSNullPS
.earlyZTestPassCount
;
150 DSOmZ
.earlyZTestFailCount
+= DSNullPS
.earlyZTestFailCount
;
151 DSOmZ
.earlyZTestCount
+= DSNullPS
.earlyZTestCount
;
152 DSOmZ
.earlyStencilTestPassCount
+= DSNullPS
.earlyStencilTestPassCount
;
153 DSOmZ
.earlyStencilTestFailCount
+= DSNullPS
.earlyStencilTestFailCount
;
154 DSOmZ
.earlyStencilTestCount
+= DSNullPS
.earlyStencilTestCount
;
157 virtual void Handle(LateDepthStencilInfoSingleSample event
)
160 DSSingleSample
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
161 DSSingleSample
.lateZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
162 DSSingleSample
.lateZTestCount
+= (_mm_popcnt_u32(event
.data
.depthPassMask
) + _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
));
164 //lateStencil test compute
165 DSSingleSample
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
166 DSSingleSample
.lateStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
167 DSSingleSample
.lateStencilTestCount
+= (_mm_popcnt_u32(event
.data
.stencilPassMask
) + _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
));
169 //outputerMerger test compute
170 DSOmZ
.lateZTestPassCount
+= DSSingleSample
.lateZTestPassCount
;
171 DSOmZ
.lateZTestFailCount
+= DSSingleSample
.lateZTestFailCount
;
172 DSOmZ
.lateZTestCount
+= DSSingleSample
.lateZTestCount
;
173 DSOmZ
.lateStencilTestPassCount
+= DSSingleSample
.lateStencilTestPassCount
;
174 DSOmZ
.lateStencilTestFailCount
+= DSSingleSample
.lateStencilTestFailCount
;
175 DSOmZ
.lateStencilTestCount
+= DSSingleSample
.lateStencilTestCount
;
178 virtual void Handle(LateDepthStencilInfoSampleRate event
)
181 DSSampleRate
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
182 DSSampleRate
.lateZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
183 DSSampleRate
.lateZTestCount
+= (_mm_popcnt_u32(event
.data
.depthPassMask
) + _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
));
185 //lateStencil test compute
186 DSSampleRate
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
187 DSSampleRate
.lateStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
188 DSSampleRate
.lateStencilTestCount
+= (_mm_popcnt_u32(event
.data
.stencilPassMask
) + _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
));
190 //outputerMerger test compute
191 DSOmZ
.lateZTestPassCount
+= DSSampleRate
.lateZTestPassCount
;
192 DSOmZ
.lateZTestFailCount
+= DSSampleRate
.lateZTestFailCount
;
193 DSOmZ
.lateZTestCount
+= DSSampleRate
.lateZTestCount
;
194 DSOmZ
.lateStencilTestPassCount
+= DSSampleRate
.lateStencilTestPassCount
;
195 DSOmZ
.lateStencilTestFailCount
+= DSSampleRate
.lateStencilTestFailCount
;
196 DSOmZ
.lateStencilTestCount
+= DSSampleRate
.lateStencilTestCount
;
199 virtual void Handle(LateDepthStencilInfoNullPS event
)
202 DSNullPS
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
203 DSNullPS
.lateZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
204 DSNullPS
.lateZTestCount
+= (_mm_popcnt_u32(event
.data
.depthPassMask
) + _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
));
206 //lateStencil test compute
207 DSNullPS
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
208 DSNullPS
.lateStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
209 DSNullPS
.lateStencilTestCount
+= (_mm_popcnt_u32(event
.data
.stencilPassMask
) + _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
));
211 //outputerMerger test compute
212 DSOmZ
.lateZTestPassCount
+= DSNullPS
.lateZTestPassCount
;
213 DSOmZ
.lateZTestFailCount
+= DSNullPS
.lateZTestFailCount
;
214 DSOmZ
.lateZTestCount
+= DSNullPS
.lateZTestCount
;
215 DSOmZ
.lateStencilTestPassCount
+= DSNullPS
.lateStencilTestPassCount
;
216 DSOmZ
.lateStencilTestFailCount
+= DSNullPS
.lateStencilTestFailCount
;
217 DSOmZ
.lateStencilTestCount
+= DSNullPS
.lateStencilTestCount
;
220 virtual void Handle(EarlyDepthInfoPixelRate event
)
222 //earlyZ test compute
223 DSPixelRate
.earlyZTestCount
+= _mm_popcnt_u32(event
.data
.activeLanes
);
224 DSPixelRate
.earlyZTestPassCount
+= event
.data
.depthPassCount
;
225 DSPixelRate
.earlyZTestFailCount
+= (_mm_popcnt_u32(event
.data
.activeLanes
) - event
.data
.depthPassCount
);
227 //outputerMerger test compute
228 DSOmZ
.earlyZTestPassCount
+= DSPixelRate
.earlyZTestPassCount
;
229 DSOmZ
.earlyZTestFailCount
+= DSPixelRate
.earlyZTestFailCount
;
230 DSOmZ
.earlyZTestCount
+= DSPixelRate
.earlyZTestCount
;
234 virtual void Handle(LateDepthInfoPixelRate event
)
237 DSPixelRate
.lateZTestCount
+= _mm_popcnt_u32(event
.data
.activeLanes
);
238 DSPixelRate
.lateZTestPassCount
+= event
.data
.depthPassCount
;
239 DSPixelRate
.lateZTestFailCount
+= (_mm_popcnt_u32(event
.data
.activeLanes
) - event
.data
.depthPassCount
);
241 //outputerMerger test compute
242 DSOmZ
.lateZTestPassCount
+= DSPixelRate
.lateZTestPassCount
;
243 DSOmZ
.lateZTestFailCount
+= DSPixelRate
.lateZTestFailCount
;
244 DSOmZ
.lateZTestCount
+= DSPixelRate
.lateZTestCount
;
249 virtual void Handle(BackendDrawEndEvent event
)
252 EventHandlerFile::Handle(EarlyZSingleSample(event
.data
.drawId
, DSSingleSample
.earlyZTestPassCount
, DSSingleSample
.earlyZTestFailCount
, DSSingleSample
.earlyZTestCount
));
253 EventHandlerFile::Handle(LateZSingleSample(event
.data
.drawId
, DSSingleSample
.lateZTestPassCount
, DSSingleSample
.lateZTestFailCount
, DSSingleSample
.lateZTestCount
));
254 EventHandlerFile::Handle(EarlyStencilSingleSample(event
.data
.drawId
, DSSingleSample
.earlyStencilTestPassCount
, DSSingleSample
.earlyStencilTestFailCount
, DSSingleSample
.earlyStencilTestCount
));
255 EventHandlerFile::Handle(LateStencilSingleSample(event
.data
.drawId
, DSSingleSample
.lateStencilTestPassCount
, DSSingleSample
.lateStencilTestFailCount
, DSSingleSample
.lateStencilTestCount
));
258 EventHandlerFile::Handle(EarlyZSampleRate(event
.data
.drawId
, DSSampleRate
.earlyZTestPassCount
, DSSampleRate
.earlyZTestFailCount
, DSSampleRate
.earlyZTestCount
));
259 EventHandlerFile::Handle(LateZSampleRate(event
.data
.drawId
, DSSampleRate
.lateZTestPassCount
, DSSampleRate
.lateZTestFailCount
, DSSampleRate
.lateZTestCount
));
260 EventHandlerFile::Handle(EarlyStencilSampleRate(event
.data
.drawId
, DSSampleRate
.earlyStencilTestPassCount
, DSSampleRate
.earlyStencilTestFailCount
, DSSampleRate
.earlyStencilTestCount
));
261 EventHandlerFile::Handle(LateStencilSampleRate(event
.data
.drawId
, DSSampleRate
.lateStencilTestPassCount
, DSSampleRate
.lateStencilTestFailCount
, DSSampleRate
.lateStencilTestCount
));
264 EventHandlerFile::Handle(EarlyZPixelRate(event
.data
.drawId
, DSPixelRate
.earlyZTestPassCount
, DSPixelRate
.earlyZTestFailCount
, DSPixelRate
.earlyZTestCount
));
265 EventHandlerFile::Handle(LateZPixelRate(event
.data
.drawId
, DSPixelRate
.lateZTestPassCount
, DSPixelRate
.lateZTestFailCount
, DSPixelRate
.lateZTestCount
));
269 EventHandlerFile::Handle(EarlyZNullPS(event
.data
.drawId
, DSNullPS
.earlyZTestPassCount
, DSNullPS
.earlyZTestFailCount
, DSNullPS
.earlyZTestCount
));
270 EventHandlerFile::Handle(EarlyStencilNullPS(event
.data
.drawId
, DSNullPS
.earlyStencilTestPassCount
, DSNullPS
.earlyStencilTestFailCount
, DSNullPS
.earlyStencilTestCount
));
273 EventHandlerFile::Handle(EarlyOmZ(event
.data
.drawId
, DSOmZ
.earlyZTestPassCount
, DSOmZ
.earlyZTestFailCount
, DSOmZ
.earlyZTestCount
));
274 EventHandlerFile::Handle(EarlyOmStencil(event
.data
.drawId
, DSOmZ
.earlyStencilTestPassCount
, DSOmZ
.earlyStencilTestFailCount
, DSOmZ
.earlyStencilTestCount
));
275 EventHandlerFile::Handle(LateOmZ(event
.data
.drawId
, DSOmZ
.lateZTestPassCount
, DSOmZ
.lateZTestFailCount
, DSOmZ
.lateZTestCount
));
276 EventHandlerFile::Handle(LateOmStencil(event
.data
.drawId
, DSOmZ
.lateStencilTestPassCount
, DSOmZ
.lateStencilTestFailCount
, DSOmZ
.lateStencilTestCount
));
278 //Reset Internal Counters
286 virtual void Handle(FrontendDrawEndEvent event
)
289 EventHandlerFile::Handle(VertsClipped(event
.data
.drawId
, CS
.clippedVerts
));
292 EventHandlerFile::Handle(TessPrims(event
.data
.drawId
, TS
.inputPrims
));
295 EventHandlerFile::Handle(GSInputPrims(event
.data
.drawId
, GS
.inputPrimCount
));
296 EventHandlerFile::Handle(GSPrimsGen(event
.data
.drawId
, GS
.primGeneratedCount
));
297 EventHandlerFile::Handle(GSVertsInput(event
.data
.drawId
, GS
.vertsInput
));
299 //Reset Internal Counters
305 virtual void Handle(GSPrimInfo event
)
307 GS
.inputPrimCount
+= event
.data
.inputPrimCount
;
308 GS
.primGeneratedCount
+= event
.data
.primGeneratedCount
;
309 GS
.vertsInput
+= event
.data
.vertsInput
;
312 virtual void Handle(ClipVertexCount event
)
314 CS
.clippedVerts
+= (_mm_popcnt_u32(event
.data
.primMask
) * event
.data
.vertsPerPrim
);
317 virtual void Handle(TessPrimCount event
)
319 TS
.inputPrims
+= event
.data
.primCount
;
323 static EventManager
* FromHandle(HANDLE hThreadContext
)
325 return reinterpret_cast<EventManager
*>(hThreadContext
);
328 // Construct an event manager and associate a handler with it.
329 HANDLE
CreateThreadContext(AR_THREAD type
)
331 // Can we assume single threaded here?
332 static std::atomic
<uint32_t> counter(0);
333 uint32_t id
= counter
.fetch_add(1);
335 EventManager
* pManager
= new EventManager();
336 EventHandlerFile
* pHandler
= new EventHandlerStatsFile(id
);
338 if (pManager
&& pHandler
)
340 pManager
->Attach(pHandler
);
342 if (type
== AR_THREAD::API
)
344 pHandler
->Handle(ThreadStartApiEvent());
348 pHandler
->Handle(ThreadStartWorkerEvent());
350 pHandler
->MarkHeader();
355 SWR_ASSERT(0, "Failed to register thread.");
359 void DestroyThreadContext(HANDLE hThreadContext
)
361 EventManager
* pManager
= FromHandle(hThreadContext
);
362 SWR_ASSERT(pManager
!= nullptr);
367 // Dispatch event for this thread.
368 void Dispatch(HANDLE hThreadContext
, Event
& event
)
370 EventManager
* pManager
= FromHandle(hThreadContext
);
371 SWR_ASSERT(pManager
!= nullptr);
373 pManager
->Dispatch(event
);