1 /****************************************************************************
2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Implementation for archrast.
27 ******************************************************************************/
30 #include "common/os.h"
31 #include "archrast/archrast.h"
32 #include "archrast/eventmanager.h"
33 #include "gen_ar_eventhandlerfile.hpp"
37 //////////////////////////////////////////////////////////////////////////
38 /// @brief struct that keeps track of depth and stencil event information
39 struct DepthStencilStats
41 uint32_t earlyZTestPassCount
= 0;
42 uint32_t earlyZTestFailCount
= 0;
43 uint32_t lateZTestPassCount
= 0;
44 uint32_t lateZTestFailCount
= 0;
45 uint32_t earlyStencilTestPassCount
= 0;
46 uint32_t earlyStencilTestFailCount
= 0;
47 uint32_t lateStencilTestPassCount
= 0;
48 uint32_t lateStencilTestFailCount
= 0;
53 uint32_t trivialRejectCount
;
54 uint32_t trivialAcceptCount
;
55 uint32_t mustClipCount
;
60 uint32_t inputPrims
= 0;
61 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
66 uint32_t inputPrimCount
;
67 uint32_t primGeneratedCount
;
73 uint32_t rasterTiles
= 0;
78 uint32_t degeneratePrimCount
= 0;
79 uint32_t backfacePrimCount
= 0;
84 uint32_t alphaTestCount
= 0;
85 uint32_t alphaBlendCount
= 0;
88 //////////////////////////////////////////////////////////////////////////
89 /// @brief Event handler that handles API thread events. This is shared
90 /// between the API and its caller (e.g. driver shim) but typically
91 /// there is only a single API thread per context. So you can save
92 /// information in the class to be used for other events.
93 class EventHandlerApiStats
: public EventHandlerFile
96 EventHandlerApiStats(uint32_t id
) : EventHandlerFile(id
)
99 // Attempt to copy the events.proto file to the ArchRast output dir. It's common for
100 // tools to place the events.proto file in the DEBUG_OUTPUT_DIR when launching AR. If it
101 // exists, this will attempt to copy it the first time we get here to package it with
102 // the stats. Otherwise, the user would need to specify the events.proto location when
103 // parsing the stats in post.
104 std::stringstream eventsProtoSrcFilename
, eventsProtoDstFilename
;
105 eventsProtoSrcFilename
<< KNOB_DEBUG_OUTPUT_DIR
<< "\\events.proto" << std::ends
;
106 eventsProtoDstFilename
<< mOutputDir
.substr(0, mOutputDir
.size() - 1)
107 << "\\events.proto" << std::ends
;
109 // If event.proto already exists, we're done; else do the copy
110 struct stat buf
; // Use a Posix stat for file existence check
111 if (!stat(eventsProtoDstFilename
.str().c_str(), &buf
) == 0)
113 // Now check to make sure the events.proto source exists
114 if (stat(eventsProtoSrcFilename
.str().c_str(), &buf
) == 0)
116 std::ifstream srcFile
;
117 srcFile
.open(eventsProtoSrcFilename
.str().c_str(), std::ios::binary
);
118 if (srcFile
.is_open())
120 // Just do a binary buffer copy
121 std::ofstream dstFile
;
122 dstFile
.open(eventsProtoDstFilename
.str().c_str(), std::ios::binary
);
123 dstFile
<< srcFile
.rdbuf();
132 virtual void Handle(const DrawInstancedEvent
& event
)
134 DrawInfoEvent
e(event
.data
.drawId
,
137 event
.data
.numVertices
,
140 event
.data
.startVertex
,
141 event
.data
.numInstances
,
142 event
.data
.startInstance
,
146 event
.data
.soTopology
,
149 EventHandlerFile::Handle(e
);
152 virtual void Handle(const DrawIndexedInstancedEvent
& event
)
154 DrawInfoEvent
e(event
.data
.drawId
,
155 ArchRast::IndexedInstanced
,
158 event
.data
.numIndices
,
159 event
.data
.indexOffset
,
160 event
.data
.baseVertex
,
161 event
.data
.numInstances
,
162 event
.data
.startInstance
,
166 event
.data
.soTopology
,
169 EventHandlerFile::Handle(e
);
173 //////////////////////////////////////////////////////////////////////////
174 /// @brief Event handler that handles worker thread events. There is one
175 /// event handler per thread. The python script will need to sum
176 /// up counters across all of the threads.
177 class EventHandlerWorkerStats
: public EventHandlerFile
180 EventHandlerWorkerStats(uint32_t id
) : EventHandlerFile(id
), mNeedFlush(false)
182 memset(mShaderStats
, 0, sizeof(mShaderStats
));
185 virtual void Handle(const EarlyDepthStencilInfoSingleSample
& event
)
187 // earlyZ test compute
188 mDSSingleSample
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
189 mDSSingleSample
.earlyZTestFailCount
+=
190 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
192 // earlyStencil test compute
193 mDSSingleSample
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
194 mDSSingleSample
.earlyStencilTestFailCount
+=
195 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
197 // earlyZ test single and multi sample
198 mDSCombined
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
199 mDSCombined
.earlyZTestFailCount
+=
200 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
202 // earlyStencil test single and multi sample
203 mDSCombined
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
204 mDSCombined
.earlyStencilTestFailCount
+=
205 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
210 virtual void Handle(const EarlyDepthStencilInfoSampleRate
& event
)
212 // earlyZ test compute
213 mDSSampleRate
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
214 mDSSampleRate
.earlyZTestFailCount
+=
215 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
217 // earlyStencil test compute
218 mDSSampleRate
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
219 mDSSampleRate
.earlyStencilTestFailCount
+=
220 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
222 // earlyZ test single and multi sample
223 mDSCombined
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
224 mDSCombined
.earlyZTestFailCount
+=
225 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
227 // earlyStencil test single and multi sample
228 mDSCombined
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
229 mDSCombined
.earlyStencilTestFailCount
+=
230 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
235 virtual void Handle(const EarlyDepthStencilInfoNullPS
& event
)
237 // earlyZ test compute
238 mDSNullPS
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
239 mDSNullPS
.earlyZTestFailCount
+=
240 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
242 // earlyStencil test compute
243 mDSNullPS
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
244 mDSNullPS
.earlyStencilTestFailCount
+=
245 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
249 virtual void Handle(const LateDepthStencilInfoSingleSample
& event
)
251 // lateZ test compute
252 mDSSingleSample
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
253 mDSSingleSample
.lateZTestFailCount
+=
254 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
256 // lateStencil test compute
257 mDSSingleSample
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
258 mDSSingleSample
.lateStencilTestFailCount
+=
259 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
261 // lateZ test single and multi sample
262 mDSCombined
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
263 mDSCombined
.lateZTestFailCount
+=
264 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
266 // lateStencil test single and multi sample
267 mDSCombined
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
268 mDSCombined
.lateStencilTestFailCount
+=
269 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
274 virtual void Handle(const LateDepthStencilInfoSampleRate
& event
)
276 // lateZ test compute
277 mDSSampleRate
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
278 mDSSampleRate
.lateZTestFailCount
+=
279 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
281 // lateStencil test compute
282 mDSSampleRate
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
283 mDSSampleRate
.lateStencilTestFailCount
+=
284 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
286 // lateZ test single and multi sample
287 mDSCombined
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
288 mDSCombined
.lateZTestFailCount
+=
289 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
291 // lateStencil test single and multi sample
292 mDSCombined
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
293 mDSCombined
.lateStencilTestFailCount
+=
294 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
299 virtual void Handle(const LateDepthStencilInfoNullPS
& event
)
301 // lateZ test compute
302 mDSNullPS
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
303 mDSNullPS
.lateZTestFailCount
+=
304 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
306 // lateStencil test compute
307 mDSNullPS
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
308 mDSNullPS
.lateStencilTestFailCount
+=
309 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
313 virtual void Handle(const EarlyDepthInfoPixelRate
& event
)
315 // earlyZ test compute
316 mDSPixelRate
.earlyZTestPassCount
+= event
.data
.depthPassCount
;
317 mDSPixelRate
.earlyZTestFailCount
+=
318 (_mm_popcnt_u32(event
.data
.activeLanes
) - event
.data
.depthPassCount
);
323 virtual void Handle(const LateDepthInfoPixelRate
& event
)
325 // lateZ test compute
326 mDSPixelRate
.lateZTestPassCount
+= event
.data
.depthPassCount
;
327 mDSPixelRate
.lateZTestFailCount
+=
328 (_mm_popcnt_u32(event
.data
.activeLanes
) - event
.data
.depthPassCount
);
333 virtual void Handle(const ClipInfoEvent
& event
)
335 mClipper
.mustClipCount
+= _mm_popcnt_u32(event
.data
.clipMask
);
336 mClipper
.trivialRejectCount
+=
337 event
.data
.numInvocations
- _mm_popcnt_u32(event
.data
.validMask
);
338 mClipper
.trivialAcceptCount
+=
339 _mm_popcnt_u32(event
.data
.validMask
& ~event
.data
.clipMask
);
344 uint32_t numInstExecuted
;
347 virtual void Handle(const VSStats
& event
)
349 mShaderStats
[SHADER_VERTEX
].numInstExecuted
+= event
.data
.numInstExecuted
;
352 virtual void Handle(const GSStats
& event
)
354 mShaderStats
[SHADER_GEOMETRY
].numInstExecuted
+= event
.data
.numInstExecuted
;
357 virtual void Handle(const DSStats
& event
)
359 mShaderStats
[SHADER_DOMAIN
].numInstExecuted
+= event
.data
.numInstExecuted
;
362 virtual void Handle(const HSStats
& event
)
364 mShaderStats
[SHADER_HULL
].numInstExecuted
+= event
.data
.numInstExecuted
;
367 virtual void Handle(const PSStats
& event
)
369 mShaderStats
[SHADER_PIXEL
].numInstExecuted
+= event
.data
.numInstExecuted
;
373 virtual void Handle(const CSStats
& event
)
375 mShaderStats
[SHADER_COMPUTE
].numInstExecuted
+= event
.data
.numInstExecuted
;
379 // Flush cached events for this draw
380 virtual void FlushDraw(uint32_t drawId
)
382 if (mNeedFlush
== false)
385 EventHandlerFile::Handle(PSInfo(drawId
, mShaderStats
[SHADER_PIXEL
].numInstExecuted
));
386 EventHandlerFile::Handle(CSInfo(drawId
, mShaderStats
[SHADER_COMPUTE
].numInstExecuted
));
389 EventHandlerFile::Handle(EarlyZSingleSample(
390 drawId
, mDSSingleSample
.earlyZTestPassCount
, mDSSingleSample
.earlyZTestFailCount
));
391 EventHandlerFile::Handle(LateZSingleSample(
392 drawId
, mDSSingleSample
.lateZTestPassCount
, mDSSingleSample
.lateZTestFailCount
));
393 EventHandlerFile::Handle(
394 EarlyStencilSingleSample(drawId
,
395 mDSSingleSample
.earlyStencilTestPassCount
,
396 mDSSingleSample
.earlyStencilTestFailCount
));
397 EventHandlerFile::Handle(
398 LateStencilSingleSample(drawId
,
399 mDSSingleSample
.lateStencilTestPassCount
,
400 mDSSingleSample
.lateStencilTestFailCount
));
403 EventHandlerFile::Handle(EarlyZSampleRate(
404 drawId
, mDSSampleRate
.earlyZTestPassCount
, mDSSampleRate
.earlyZTestFailCount
));
405 EventHandlerFile::Handle(LateZSampleRate(
406 drawId
, mDSSampleRate
.lateZTestPassCount
, mDSSampleRate
.lateZTestFailCount
));
407 EventHandlerFile::Handle(
408 EarlyStencilSampleRate(drawId
,
409 mDSSampleRate
.earlyStencilTestPassCount
,
410 mDSSampleRate
.earlyStencilTestFailCount
));
411 EventHandlerFile::Handle(LateStencilSampleRate(drawId
,
412 mDSSampleRate
.lateStencilTestPassCount
,
413 mDSSampleRate
.lateStencilTestFailCount
));
416 EventHandlerFile::Handle(
417 EarlyZ(drawId
, mDSCombined
.earlyZTestPassCount
, mDSCombined
.earlyZTestFailCount
));
418 EventHandlerFile::Handle(
419 LateZ(drawId
, mDSCombined
.lateZTestPassCount
, mDSCombined
.lateZTestFailCount
));
420 EventHandlerFile::Handle(EarlyStencil(drawId
,
421 mDSCombined
.earlyStencilTestPassCount
,
422 mDSCombined
.earlyStencilTestFailCount
));
423 EventHandlerFile::Handle(LateStencil(drawId
,
424 mDSCombined
.lateStencilTestPassCount
,
425 mDSCombined
.lateStencilTestFailCount
));
428 EventHandlerFile::Handle(EarlyZPixelRate(
429 drawId
, mDSPixelRate
.earlyZTestPassCount
, mDSPixelRate
.earlyZTestFailCount
));
430 EventHandlerFile::Handle(LateZPixelRate(
431 drawId
, mDSPixelRate
.lateZTestPassCount
, mDSPixelRate
.lateZTestFailCount
));
435 EventHandlerFile::Handle(
436 EarlyZNullPS(drawId
, mDSNullPS
.earlyZTestPassCount
, mDSNullPS
.earlyZTestFailCount
));
437 EventHandlerFile::Handle(EarlyStencilNullPS(
438 drawId
, mDSNullPS
.earlyStencilTestPassCount
, mDSNullPS
.earlyStencilTestFailCount
));
440 // Rasterized Subspans
441 EventHandlerFile::Handle(RasterTiles(drawId
, rastStats
.rasterTiles
));
444 EventHandlerFile::Handle(
445 AlphaEvent(drawId
, mAlphaStats
.alphaTestCount
, mAlphaStats
.alphaBlendCount
));
448 EventHandlerFile::Handle(
449 CullEvent(drawId
, mCullStats
.backfacePrimCount
, mCullStats
.degeneratePrimCount
));
451 mDSSingleSample
= {};
461 mShaderStats
[SHADER_PIXEL
] = {};
462 mShaderStats
[SHADER_COMPUTE
] = {};
467 virtual void Handle(const FrontendDrawEndEvent
& event
)
470 EventHandlerFile::Handle(ClipperEvent(event
.data
.drawId
,
471 mClipper
.trivialRejectCount
,
472 mClipper
.trivialAcceptCount
,
473 mClipper
.mustClipCount
));
476 EventHandlerFile::Handle(TessPrims(event
.data
.drawId
, mTS
.inputPrims
));
479 EventHandlerFile::Handle(GSInputPrims(event
.data
.drawId
, mGS
.inputPrimCount
));
480 EventHandlerFile::Handle(GSPrimsGen(event
.data
.drawId
, mGS
.primGeneratedCount
));
481 EventHandlerFile::Handle(GSVertsInput(event
.data
.drawId
, mGS
.vertsInput
));
483 EventHandlerFile::Handle(
484 VSInfo(event
.data
.drawId
, mShaderStats
[SHADER_VERTEX
].numInstExecuted
));
485 EventHandlerFile::Handle(
486 HSInfo(event
.data
.drawId
, mShaderStats
[SHADER_HULL
].numInstExecuted
));
487 EventHandlerFile::Handle(
488 DSInfo(event
.data
.drawId
, mShaderStats
[SHADER_DOMAIN
].numInstExecuted
));
489 EventHandlerFile::Handle(
490 GSInfo(event
.data
.drawId
, mShaderStats
[SHADER_GEOMETRY
].numInstExecuted
));
492 mShaderStats
[SHADER_VERTEX
] = {};
493 mShaderStats
[SHADER_HULL
] = {};
494 mShaderStats
[SHADER_DOMAIN
] = {};
495 mShaderStats
[SHADER_GEOMETRY
] = {};
497 // Reset Internal Counters
503 virtual void Handle(const GSPrimInfo
& event
)
505 mGS
.inputPrimCount
+= event
.data
.inputPrimCount
;
506 mGS
.primGeneratedCount
+= event
.data
.primGeneratedCount
;
507 mGS
.vertsInput
+= event
.data
.vertsInput
;
510 virtual void Handle(const TessPrimCount
& event
) { mTS
.inputPrims
+= event
.data
.primCount
; }
512 virtual void Handle(const RasterTileCount
& event
)
514 rastStats
.rasterTiles
+= event
.data
.rasterTiles
;
517 virtual void Handle(const CullInfoEvent
& event
)
519 mCullStats
.degeneratePrimCount
+= _mm_popcnt_u32(
520 event
.data
.validMask
^ (event
.data
.validMask
& ~event
.data
.degeneratePrimMask
));
521 mCullStats
.backfacePrimCount
+= _mm_popcnt_u32(
522 event
.data
.validMask
^ (event
.data
.validMask
& ~event
.data
.backfacePrimMask
));
525 virtual void Handle(const AlphaInfoEvent
& event
)
527 mAlphaStats
.alphaTestCount
+= event
.data
.alphaTestEnable
;
528 mAlphaStats
.alphaBlendCount
+= event
.data
.alphaBlendEnable
;
534 DepthStencilStats mDSSingleSample
= {};
535 DepthStencilStats mDSSampleRate
= {};
536 DepthStencilStats mDSPixelRate
= {};
537 DepthStencilStats mDSCombined
= {};
538 DepthStencilStats mDSNullPS
= {};
539 DepthStencilStats mDSOmZ
= {};
540 CStats mClipper
= {};
542 GSStateInfo mGS
= {};
543 RastStats rastStats
= {};
544 CullStats mCullStats
= {};
545 AlphaStats mAlphaStats
= {};
547 ShaderStats mShaderStats
[NUM_SHADER_TYPES
];
551 static EventManager
* FromHandle(HANDLE hThreadContext
)
553 return reinterpret_cast<EventManager
*>(hThreadContext
);
556 // Construct an event manager and associate a handler with it.
557 HANDLE
CreateThreadContext(AR_THREAD type
)
559 // Can we assume single threaded here?
560 static std::atomic
<uint32_t> counter(0);
561 uint32_t id
= counter
.fetch_add(1);
563 EventManager
* pManager
= new EventManager();
567 EventHandlerFile
* pHandler
= nullptr;
569 if (type
== AR_THREAD::API
)
571 pHandler
= new EventHandlerApiStats(id
);
572 pManager
->Attach(pHandler
);
573 pHandler
->Handle(ThreadStartApiEvent());
577 pHandler
= new EventHandlerWorkerStats(id
);
578 pManager
->Attach(pHandler
);
579 pHandler
->Handle(ThreadStartWorkerEvent());
582 pHandler
->MarkHeader();
587 SWR_INVALID("Failed to register thread.");
591 void DestroyThreadContext(HANDLE hThreadContext
)
593 EventManager
* pManager
= FromHandle(hThreadContext
);
594 SWR_ASSERT(pManager
!= nullptr);
599 // Dispatch event for this thread.
600 void Dispatch(HANDLE hThreadContext
, const Event
& event
)
602 EventManager
* pManager
= FromHandle(hThreadContext
);
603 SWR_ASSERT(pManager
!= nullptr);
605 pManager
->Dispatch(event
);
608 // Flush for this thread.
609 void FlushDraw(HANDLE hThreadContext
, uint32_t drawId
)
611 EventManager
* pManager
= FromHandle(hThreadContext
);
612 SWR_ASSERT(pManager
!= nullptr);
614 pManager
->FlushDraw(drawId
);
616 } // namespace ArchRast