1 /****************************************************************************
2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Implementation for archrast.
27 ******************************************************************************/
30 #include "common/os.h"
31 #include "archrast/archrast.h"
32 #include "archrast/eventmanager.h"
33 #include "gen_ar_eventhandlerfile.hpp"
37 //////////////////////////////////////////////////////////////////////////
38 /// @brief struct that keeps track of depth and stencil event information
39 struct DepthStencilStats
41 uint32_t earlyZTestPassCount
= 0;
42 uint32_t earlyZTestFailCount
= 0;
43 uint32_t lateZTestPassCount
= 0;
44 uint32_t lateZTestFailCount
= 0;
45 uint32_t earlyStencilTestPassCount
= 0;
46 uint32_t earlyStencilTestFailCount
= 0;
47 uint32_t lateStencilTestPassCount
= 0;
48 uint32_t lateStencilTestFailCount
= 0;
53 uint32_t trivialRejectCount
;
54 uint32_t trivialAcceptCount
;
55 uint32_t mustClipCount
;
60 uint32_t inputPrims
= 0;
61 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
66 uint32_t inputPrimCount
;
67 uint32_t primGeneratedCount
;
73 uint32_t rasterTiles
= 0;
76 //////////////////////////////////////////////////////////////////////////
77 /// @brief Event handler that handles API thread events. This is shared
78 /// between the API and its caller (e.g. driver shim) but typically
79 /// there is only a single API thread per context. So you can save
80 /// information in the class to be used for other events.
81 class EventHandlerApiStats
: public EventHandlerFile
84 EventHandlerApiStats(uint32_t id
) : EventHandlerFile(id
) {}
86 virtual void Handle(const DrawInstancedEvent
& event
)
88 DrawInfoEvent
e(event
.data
.drawId
, ArchRast::Instanced
, event
.data
.topology
,
89 event
.data
.numVertices
, 0, 0, event
.data
.startVertex
, event
.data
.numInstances
,
90 event
.data
.startInstance
, event
.data
.tsEnable
, event
.data
.gsEnable
, event
.data
.soEnable
, event
.data
.soTopology
, event
.data
.splitId
);
92 EventHandlerFile::Handle(e
);
95 virtual void Handle(const DrawIndexedInstancedEvent
& event
)
97 DrawInfoEvent
e(event
.data
.drawId
, ArchRast::IndexedInstanced
, event
.data
.topology
, 0,
98 event
.data
.numIndices
, event
.data
.indexOffset
, event
.data
.baseVertex
, event
.data
.numInstances
,
99 event
.data
.startInstance
, event
.data
.tsEnable
, event
.data
.gsEnable
, event
.data
.soEnable
, event
.data
.soTopology
, event
.data
.splitId
);
101 EventHandlerFile::Handle(e
);
105 //////////////////////////////////////////////////////////////////////////
106 /// @brief Event handler that handles worker thread events. There is one
107 /// event handler per thread. The python script will need to sum
108 /// up counters across all of the threads.
109 class EventHandlerWorkerStats
: public EventHandlerFile
112 EventHandlerWorkerStats(uint32_t id
) : EventHandlerFile(id
), mNeedFlush(false) {}
114 virtual void Handle(const EarlyDepthStencilInfoSingleSample
& event
)
116 //earlyZ test compute
117 mDSSingleSample
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
118 mDSSingleSample
.earlyZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
120 //earlyStencil test compute
121 mDSSingleSample
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
122 mDSSingleSample
.earlyStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
124 //earlyZ test single and multi sample
125 mDSCombined
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
126 mDSCombined
.earlyZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
128 //earlyStencil test single and multi sample
129 mDSCombined
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
130 mDSCombined
.earlyStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
135 virtual void Handle(const EarlyDepthStencilInfoSampleRate
& event
)
137 //earlyZ test compute
138 mDSSampleRate
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
139 mDSSampleRate
.earlyZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
141 //earlyStencil test compute
142 mDSSampleRate
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
143 mDSSampleRate
.earlyStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
145 //earlyZ test single and multi sample
146 mDSCombined
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
147 mDSCombined
.earlyZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
149 //earlyStencil test single and multi sample
150 mDSCombined
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
151 mDSCombined
.earlyStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
156 virtual void Handle(const EarlyDepthStencilInfoNullPS
& event
)
158 //earlyZ test compute
159 mDSNullPS
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
160 mDSNullPS
.earlyZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
162 //earlyStencil test compute
163 mDSNullPS
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
164 mDSNullPS
.earlyStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
168 virtual void Handle(const LateDepthStencilInfoSingleSample
& event
)
171 mDSSingleSample
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
172 mDSSingleSample
.lateZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
174 //lateStencil test compute
175 mDSSingleSample
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
176 mDSSingleSample
.lateStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
178 //lateZ test single and multi sample
179 mDSCombined
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
180 mDSCombined
.lateZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
182 //lateStencil test single and multi sample
183 mDSCombined
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
184 mDSCombined
.lateStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
189 virtual void Handle(const LateDepthStencilInfoSampleRate
& event
)
192 mDSSampleRate
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
193 mDSSampleRate
.lateZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
195 //lateStencil test compute
196 mDSSampleRate
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
197 mDSSampleRate
.lateStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
200 //lateZ test single and multi sample
201 mDSCombined
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
202 mDSCombined
.lateZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
204 //lateStencil test single and multi sample
205 mDSCombined
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
206 mDSCombined
.lateStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
211 virtual void Handle(const LateDepthStencilInfoNullPS
& event
)
214 mDSNullPS
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
215 mDSNullPS
.lateZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
217 //lateStencil test compute
218 mDSNullPS
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
219 mDSNullPS
.lateStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
223 virtual void Handle(const EarlyDepthInfoPixelRate
& event
)
225 //earlyZ test compute
226 mDSPixelRate
.earlyZTestPassCount
+= event
.data
.depthPassCount
;
227 mDSPixelRate
.earlyZTestFailCount
+= (_mm_popcnt_u32(event
.data
.activeLanes
) - event
.data
.depthPassCount
);
232 virtual void Handle(const LateDepthInfoPixelRate
& event
)
235 mDSPixelRate
.lateZTestPassCount
+= event
.data
.depthPassCount
;
236 mDSPixelRate
.lateZTestFailCount
+= (_mm_popcnt_u32(event
.data
.activeLanes
) - event
.data
.depthPassCount
);
241 virtual void Handle(const ClipInfoEvent
& event
)
243 mClipper
.mustClipCount
+= _mm_popcnt_u32(event
.data
.clipMask
);
244 mClipper
.trivialRejectCount
+= event
.data
.numInvocations
- _mm_popcnt_u32(event
.data
.validMask
);
245 mClipper
.trivialAcceptCount
+= _mm_popcnt_u32(event
.data
.validMask
& ~event
.data
.clipMask
);
248 // Flush cached events for this draw
249 virtual void FlushDraw(uint32_t drawId
)
251 if (mNeedFlush
== false) return;
254 EventHandlerFile::Handle(EarlyZSingleSample(drawId
, mDSSingleSample
.earlyZTestPassCount
, mDSSingleSample
.earlyZTestFailCount
));
255 EventHandlerFile::Handle(LateZSingleSample(drawId
, mDSSingleSample
.lateZTestPassCount
, mDSSingleSample
.lateZTestFailCount
));
256 EventHandlerFile::Handle(EarlyStencilSingleSample(drawId
, mDSSingleSample
.earlyStencilTestPassCount
, mDSSingleSample
.earlyStencilTestFailCount
));
257 EventHandlerFile::Handle(LateStencilSingleSample(drawId
, mDSSingleSample
.lateStencilTestPassCount
, mDSSingleSample
.lateStencilTestFailCount
));
260 EventHandlerFile::Handle(EarlyZSampleRate(drawId
, mDSSampleRate
.earlyZTestPassCount
, mDSSampleRate
.earlyZTestFailCount
));
261 EventHandlerFile::Handle(LateZSampleRate(drawId
, mDSSampleRate
.lateZTestPassCount
, mDSSampleRate
.lateZTestFailCount
));
262 EventHandlerFile::Handle(EarlyStencilSampleRate(drawId
, mDSSampleRate
.earlyStencilTestPassCount
, mDSSampleRate
.earlyStencilTestFailCount
));
263 EventHandlerFile::Handle(LateStencilSampleRate(drawId
, mDSSampleRate
.lateStencilTestPassCount
, mDSSampleRate
.lateStencilTestFailCount
));
266 EventHandlerFile::Handle(EarlyZ(drawId
, mDSCombined
.earlyZTestPassCount
, mDSCombined
.earlyZTestFailCount
));
267 EventHandlerFile::Handle(LateZ(drawId
, mDSCombined
.lateZTestPassCount
, mDSCombined
.lateZTestFailCount
));
268 EventHandlerFile::Handle(EarlyStencil(drawId
, mDSCombined
.earlyStencilTestPassCount
, mDSCombined
.earlyStencilTestFailCount
));
269 EventHandlerFile::Handle(LateStencil(drawId
, mDSCombined
.lateStencilTestPassCount
, mDSCombined
.lateStencilTestFailCount
));
272 EventHandlerFile::Handle(EarlyZPixelRate(drawId
, mDSPixelRate
.earlyZTestPassCount
, mDSPixelRate
.earlyZTestFailCount
));
273 EventHandlerFile::Handle(LateZPixelRate(drawId
, mDSPixelRate
.lateZTestPassCount
, mDSPixelRate
.lateZTestFailCount
));
277 EventHandlerFile::Handle(EarlyZNullPS(drawId
, mDSNullPS
.earlyZTestPassCount
, mDSNullPS
.earlyZTestFailCount
));
278 EventHandlerFile::Handle(EarlyStencilNullPS(drawId
, mDSNullPS
.earlyStencilTestPassCount
, mDSNullPS
.earlyStencilTestFailCount
));
280 // Rasterized Subspans
281 EventHandlerFile::Handle(RasterTiles(drawId
, rastStats
.rasterTiles
));
283 //Reset Internal Counters
284 mDSSingleSample
= {};
294 virtual void Handle(const FrontendDrawEndEvent
& event
)
297 EventHandlerFile::Handle(ClipperEvent(event
.data
.drawId
, mClipper
.trivialRejectCount
, mClipper
.trivialAcceptCount
, mClipper
.mustClipCount
));
300 EventHandlerFile::Handle(TessPrims(event
.data
.drawId
, mTS
.inputPrims
));
303 EventHandlerFile::Handle(GSInputPrims(event
.data
.drawId
, mGS
.inputPrimCount
));
304 EventHandlerFile::Handle(GSPrimsGen(event
.data
.drawId
, mGS
.primGeneratedCount
));
305 EventHandlerFile::Handle(GSVertsInput(event
.data
.drawId
, mGS
.vertsInput
));
307 //Reset Internal Counters
313 virtual void Handle(const GSPrimInfo
& event
)
315 mGS
.inputPrimCount
+= event
.data
.inputPrimCount
;
316 mGS
.primGeneratedCount
+= event
.data
.primGeneratedCount
;
317 mGS
.vertsInput
+= event
.data
.vertsInput
;
320 virtual void Handle(const TessPrimCount
& event
)
322 mTS
.inputPrims
+= event
.data
.primCount
;
325 virtual void Handle(const RasterTileCount
& event
)
327 rastStats
.rasterTiles
+= event
.data
.rasterTiles
;
333 DepthStencilStats mDSSingleSample
= {};
334 DepthStencilStats mDSSampleRate
= {};
335 DepthStencilStats mDSPixelRate
= {};
336 DepthStencilStats mDSCombined
= {};
337 DepthStencilStats mDSNullPS
= {};
338 DepthStencilStats mDSOmZ
= {};
339 CStats mClipper
= {};
342 RastStats rastStats
= {};
346 static EventManager
* FromHandle(HANDLE hThreadContext
)
348 return reinterpret_cast<EventManager
*>(hThreadContext
);
351 // Construct an event manager and associate a handler with it.
352 HANDLE
CreateThreadContext(AR_THREAD type
)
354 // Can we assume single threaded here?
355 static std::atomic
<uint32_t> counter(0);
356 uint32_t id
= counter
.fetch_add(1);
358 EventManager
* pManager
= new EventManager();
362 EventHandlerFile
* pHandler
= nullptr;
364 if (type
== AR_THREAD::API
)
366 pHandler
= new EventHandlerApiStats(id
);
367 pManager
->Attach(pHandler
);
368 pHandler
->Handle(ThreadStartApiEvent());
372 pHandler
= new EventHandlerWorkerStats(id
);
373 pManager
->Attach(pHandler
);
374 pHandler
->Handle(ThreadStartWorkerEvent());
377 pHandler
->MarkHeader();
382 SWR_INVALID("Failed to register thread.");
386 void DestroyThreadContext(HANDLE hThreadContext
)
388 EventManager
* pManager
= FromHandle(hThreadContext
);
389 SWR_ASSERT(pManager
!= nullptr);
394 // Dispatch event for this thread.
395 void Dispatch(HANDLE hThreadContext
, const Event
& event
)
397 EventManager
* pManager
= FromHandle(hThreadContext
);
398 SWR_ASSERT(pManager
!= nullptr);
400 pManager
->Dispatch(event
);
403 // Flush for this thread.
404 void FlushDraw(HANDLE hThreadContext
, uint32_t drawId
)
406 EventManager
* pManager
= FromHandle(hThreadContext
);
407 SWR_ASSERT(pManager
!= nullptr);
409 pManager
->FlushDraw(drawId
);