1 /****************************************************************************
2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Implementation for archrast.
27 ******************************************************************************/
30 #include "common/os.h"
31 #include "archrast/archrast.h"
32 #include "archrast/eventmanager.h"
33 #include "gen_ar_eventhandlerfile.hpp"
37 //////////////////////////////////////////////////////////////////////////
38 /// @brief struct that keeps track of depth and stencil event information
39 struct DepthStencilStats
41 uint32_t earlyZTestPassCount
= 0;
42 uint32_t earlyZTestFailCount
= 0;
43 uint32_t lateZTestPassCount
= 0;
44 uint32_t lateZTestFailCount
= 0;
45 uint32_t earlyStencilTestPassCount
= 0;
46 uint32_t earlyStencilTestFailCount
= 0;
47 uint32_t lateStencilTestPassCount
= 0;
48 uint32_t lateStencilTestFailCount
= 0;
53 uint32_t trivialRejectCount
;
54 uint32_t trivialAcceptCount
;
55 uint32_t mustClipCount
;
60 uint32_t inputPrims
= 0;
61 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
66 uint32_t inputPrimCount
;
67 uint32_t primGeneratedCount
;
73 uint32_t rasterTiles
= 0;
76 //////////////////////////////////////////////////////////////////////////
77 /// @brief Event handler that handles API thread events. This is shared
78 /// between the API and its caller (e.g. driver shim) but typically
79 /// there is only a single API thread per context. So you can save
80 /// information in the class to be used for other events.
81 class EventHandlerApiStats
: public EventHandlerFile
84 EventHandlerApiStats(uint32_t id
) : EventHandlerFile(id
) {}
86 virtual void Handle(const DrawInstancedEvent
& event
)
88 DrawInfoEvent
e(event
.data
.drawId
, ArchRast::Instanced
, event
.data
.topology
, event
.data
.numVertices
, 0, 0, event
.data
.startVertex
, event
.data
.numInstances
, event
.data
.startInstance
);
90 EventHandlerFile::Handle(e
);
93 virtual void Handle(const DrawIndexedInstancedEvent
& event
)
95 DrawInfoEvent
e(event
.data
.drawId
, ArchRast::IndexedInstanced
, event
.data
.topology
, 0, event
.data
.numIndices
, event
.data
.indexOffset
, event
.data
.baseVertex
, event
.data
.numInstances
, event
.data
.startInstance
);
97 EventHandlerFile::Handle(e
);
100 virtual void Handle(const DrawInstancedSplitEvent
& event
)
102 DrawInfoEvent
e(event
.data
.drawId
, ArchRast::InstancedSplit
, 0, 0, 0, 0, 0, 0, 0);
104 EventHandlerFile::Handle(e
);
107 virtual void Handle(const DrawIndexedInstancedSplitEvent
& event
)
109 DrawInfoEvent
e(event
.data
.drawId
, ArchRast::IndexedInstancedSplit
, 0, 0, 0, 0, 0, 0, 0);
111 EventHandlerFile::Handle(e
);
115 //////////////////////////////////////////////////////////////////////////
116 /// @brief Event handler that handles worker thread events. There is one
117 /// event handler per thread. The python script will need to sum
118 /// up counters across all of the threads.
119 class EventHandlerWorkerStats
: public EventHandlerFile
122 EventHandlerWorkerStats(uint32_t id
) : EventHandlerFile(id
), mNeedFlush(false) {}
124 virtual void Handle(const EarlyDepthStencilInfoSingleSample
& event
)
126 //earlyZ test compute
127 mDSSingleSample
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
128 mDSSingleSample
.earlyZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
130 //earlyStencil test compute
131 mDSSingleSample
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
132 mDSSingleSample
.earlyStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
134 //earlyZ test single and multi sample
135 mDSCombined
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
136 mDSCombined
.earlyZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
138 //earlyStencil test single and multi sample
139 mDSCombined
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
140 mDSCombined
.earlyStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
145 virtual void Handle(const EarlyDepthStencilInfoSampleRate
& event
)
147 //earlyZ test compute
148 mDSSampleRate
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
149 mDSSampleRate
.earlyZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
151 //earlyStencil test compute
152 mDSSampleRate
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
153 mDSSampleRate
.earlyStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
155 //earlyZ test single and multi sample
156 mDSCombined
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
157 mDSCombined
.earlyZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
159 //earlyStencil test single and multi sample
160 mDSCombined
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
161 mDSCombined
.earlyStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
166 virtual void Handle(const EarlyDepthStencilInfoNullPS
& event
)
168 //earlyZ test compute
169 mDSNullPS
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
170 mDSNullPS
.earlyZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
172 //earlyStencil test compute
173 mDSNullPS
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
174 mDSNullPS
.earlyStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
178 virtual void Handle(const LateDepthStencilInfoSingleSample
& event
)
181 mDSSingleSample
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
182 mDSSingleSample
.lateZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
184 //lateStencil test compute
185 mDSSingleSample
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
186 mDSSingleSample
.lateStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
188 //lateZ test single and multi sample
189 mDSCombined
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
190 mDSCombined
.lateZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
192 //lateStencil test single and multi sample
193 mDSCombined
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
194 mDSCombined
.lateStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
199 virtual void Handle(const LateDepthStencilInfoSampleRate
& event
)
202 mDSSampleRate
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
203 mDSSampleRate
.lateZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
205 //lateStencil test compute
206 mDSSampleRate
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
207 mDSSampleRate
.lateStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
210 //lateZ test single and multi sample
211 mDSCombined
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
212 mDSCombined
.lateZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
214 //lateStencil test single and multi sample
215 mDSCombined
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
216 mDSCombined
.lateStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
221 virtual void Handle(const LateDepthStencilInfoNullPS
& event
)
224 mDSNullPS
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
225 mDSNullPS
.lateZTestFailCount
+= _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
227 //lateStencil test compute
228 mDSNullPS
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
229 mDSNullPS
.lateStencilTestFailCount
+= _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
233 virtual void Handle(const EarlyDepthInfoPixelRate
& event
)
235 //earlyZ test compute
236 mDSPixelRate
.earlyZTestPassCount
+= event
.data
.depthPassCount
;
237 mDSPixelRate
.earlyZTestFailCount
+= (_mm_popcnt_u32(event
.data
.activeLanes
) - event
.data
.depthPassCount
);
242 virtual void Handle(const LateDepthInfoPixelRate
& event
)
245 mDSPixelRate
.lateZTestPassCount
+= event
.data
.depthPassCount
;
246 mDSPixelRate
.lateZTestFailCount
+= (_mm_popcnt_u32(event
.data
.activeLanes
) - event
.data
.depthPassCount
);
251 virtual void Handle(const ClipInfoEvent
& event
)
253 mClipper
.mustClipCount
+= _mm_popcnt_u32(event
.data
.clipMask
);
254 mClipper
.trivialRejectCount
+= event
.data
.numInvocations
- _mm_popcnt_u32(event
.data
.validMask
);
255 mClipper
.trivialAcceptCount
+= _mm_popcnt_u32(event
.data
.validMask
& ~event
.data
.clipMask
);
258 // Flush cached events for this draw
259 virtual void FlushDraw(uint32_t drawId
)
261 if (mNeedFlush
== false) return;
264 EventHandlerFile::Handle(EarlyZSingleSample(drawId
, mDSSingleSample
.earlyZTestPassCount
, mDSSingleSample
.earlyZTestFailCount
));
265 EventHandlerFile::Handle(LateZSingleSample(drawId
, mDSSingleSample
.lateZTestPassCount
, mDSSingleSample
.lateZTestFailCount
));
266 EventHandlerFile::Handle(EarlyStencilSingleSample(drawId
, mDSSingleSample
.earlyStencilTestPassCount
, mDSSingleSample
.earlyStencilTestFailCount
));
267 EventHandlerFile::Handle(LateStencilSingleSample(drawId
, mDSSingleSample
.lateStencilTestPassCount
, mDSSingleSample
.lateStencilTestFailCount
));
270 EventHandlerFile::Handle(EarlyZSampleRate(drawId
, mDSSampleRate
.earlyZTestPassCount
, mDSSampleRate
.earlyZTestFailCount
));
271 EventHandlerFile::Handle(LateZSampleRate(drawId
, mDSSampleRate
.lateZTestPassCount
, mDSSampleRate
.lateZTestFailCount
));
272 EventHandlerFile::Handle(EarlyStencilSampleRate(drawId
, mDSSampleRate
.earlyStencilTestPassCount
, mDSSampleRate
.earlyStencilTestFailCount
));
273 EventHandlerFile::Handle(LateStencilSampleRate(drawId
, mDSSampleRate
.lateStencilTestPassCount
, mDSSampleRate
.lateStencilTestFailCount
));
276 EventHandlerFile::Handle(EarlyZ(drawId
, mDSCombined
.earlyZTestPassCount
, mDSCombined
.earlyZTestFailCount
));
277 EventHandlerFile::Handle(LateZ(drawId
, mDSCombined
.lateZTestPassCount
, mDSCombined
.lateZTestFailCount
));
278 EventHandlerFile::Handle(EarlyStencil(drawId
, mDSCombined
.earlyStencilTestPassCount
, mDSCombined
.earlyStencilTestFailCount
));
279 EventHandlerFile::Handle(LateStencil(drawId
, mDSCombined
.lateStencilTestPassCount
, mDSCombined
.lateStencilTestFailCount
));
282 EventHandlerFile::Handle(EarlyZPixelRate(drawId
, mDSPixelRate
.earlyZTestPassCount
, mDSPixelRate
.earlyZTestFailCount
));
283 EventHandlerFile::Handle(LateZPixelRate(drawId
, mDSPixelRate
.lateZTestPassCount
, mDSPixelRate
.lateZTestFailCount
));
287 EventHandlerFile::Handle(EarlyZNullPS(drawId
, mDSNullPS
.earlyZTestPassCount
, mDSNullPS
.earlyZTestFailCount
));
288 EventHandlerFile::Handle(EarlyStencilNullPS(drawId
, mDSNullPS
.earlyStencilTestPassCount
, mDSNullPS
.earlyStencilTestFailCount
));
290 // Rasterized Subspans
291 EventHandlerFile::Handle(RasterTiles(drawId
, rastStats
.rasterTiles
));
293 //Reset Internal Counters
294 mDSSingleSample
= {};
304 virtual void Handle(const FrontendDrawEndEvent
& event
)
307 EventHandlerFile::Handle(ClipperEvent(event
.data
.drawId
, mClipper
.trivialRejectCount
, mClipper
.trivialAcceptCount
, mClipper
.mustClipCount
));
310 EventHandlerFile::Handle(TessPrims(event
.data
.drawId
, mTS
.inputPrims
));
313 EventHandlerFile::Handle(GSInputPrims(event
.data
.drawId
, mGS
.inputPrimCount
));
314 EventHandlerFile::Handle(GSPrimsGen(event
.data
.drawId
, mGS
.primGeneratedCount
));
315 EventHandlerFile::Handle(GSVertsInput(event
.data
.drawId
, mGS
.vertsInput
));
317 //Reset Internal Counters
323 virtual void Handle(const GSPrimInfo
& event
)
325 mGS
.inputPrimCount
+= event
.data
.inputPrimCount
;
326 mGS
.primGeneratedCount
+= event
.data
.primGeneratedCount
;
327 mGS
.vertsInput
+= event
.data
.vertsInput
;
330 virtual void Handle(const TessPrimCount
& event
)
332 mTS
.inputPrims
+= event
.data
.primCount
;
335 virtual void Handle(const RasterTileCount
& event
)
337 rastStats
.rasterTiles
+= event
.data
.rasterTiles
;
343 DepthStencilStats mDSSingleSample
= {};
344 DepthStencilStats mDSSampleRate
= {};
345 DepthStencilStats mDSPixelRate
= {};
346 DepthStencilStats mDSCombined
= {};
347 DepthStencilStats mDSNullPS
= {};
348 DepthStencilStats mDSOmZ
= {};
349 CStats mClipper
= {};
352 RastStats rastStats
= {};
356 static EventManager
* FromHandle(HANDLE hThreadContext
)
358 return reinterpret_cast<EventManager
*>(hThreadContext
);
361 // Construct an event manager and associate a handler with it.
362 HANDLE
CreateThreadContext(AR_THREAD type
)
364 // Can we assume single threaded here?
365 static std::atomic
<uint32_t> counter(0);
366 uint32_t id
= counter
.fetch_add(1);
368 EventManager
* pManager
= new EventManager();
372 EventHandlerFile
* pHandler
= nullptr;
374 if (type
== AR_THREAD::API
)
376 pHandler
= new EventHandlerApiStats(id
);
377 pManager
->Attach(pHandler
);
378 pHandler
->Handle(ThreadStartApiEvent());
382 pHandler
= new EventHandlerWorkerStats(id
);
383 pManager
->Attach(pHandler
);
384 pHandler
->Handle(ThreadStartWorkerEvent());
387 pHandler
->MarkHeader();
392 SWR_INVALID("Failed to register thread.");
396 void DestroyThreadContext(HANDLE hThreadContext
)
398 EventManager
* pManager
= FromHandle(hThreadContext
);
399 SWR_ASSERT(pManager
!= nullptr);
404 // Dispatch event for this thread.
405 void Dispatch(HANDLE hThreadContext
, const Event
& event
)
407 EventManager
* pManager
= FromHandle(hThreadContext
);
408 SWR_ASSERT(pManager
!= nullptr);
410 pManager
->Dispatch(event
);
413 // Flush for this thread.
414 void FlushDraw(HANDLE hThreadContext
, uint32_t drawId
)
416 EventManager
* pManager
= FromHandle(hThreadContext
);
417 SWR_ASSERT(pManager
!= nullptr);
419 pManager
->FlushDraw(drawId
);