1 /****************************************************************************
2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Implementation for archrast.
27 ******************************************************************************/
33 #include "common/os.h"
34 #include "archrast/archrast.h"
35 #include "archrast/eventmanager.h"
36 #include "gen_ar_event.hpp"
37 #include "gen_ar_eventhandlerfile.hpp"
41 //////////////////////////////////////////////////////////////////////////
42 /// @brief struct that keeps track of depth and stencil event information
43 struct DepthStencilStats
45 uint32_t earlyZTestPassCount
= 0;
46 uint32_t earlyZTestFailCount
= 0;
47 uint32_t lateZTestPassCount
= 0;
48 uint32_t lateZTestFailCount
= 0;
49 uint32_t earlyStencilTestPassCount
= 0;
50 uint32_t earlyStencilTestFailCount
= 0;
51 uint32_t lateStencilTestPassCount
= 0;
52 uint32_t lateStencilTestFailCount
= 0;
57 uint32_t trivialRejectCount
;
58 uint32_t trivialAcceptCount
;
59 uint32_t mustClipCount
;
64 uint32_t inputPrims
= 0;
65 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
70 uint32_t inputPrimCount
;
71 uint32_t primGeneratedCount
;
77 uint32_t rasterTiles
= 0;
82 uint32_t degeneratePrimCount
= 0;
83 uint32_t backfacePrimCount
= 0;
88 uint32_t alphaTestCount
= 0;
89 uint32_t alphaBlendCount
= 0;
93 //////////////////////////////////////////////////////////////////////////
94 /// @brief Event handler that handles API thread events. This is shared
95 /// between the API and its caller (e.g. driver shim) but typically
96 /// there is only a single API thread per context. So you can save
97 /// information in the class to be used for other events.
98 class EventHandlerApiStats
: public EventHandlerFile
101 EventHandlerApiStats(uint32_t id
) : EventHandlerFile(id
)
104 // Attempt to copy the events.proto file to the ArchRast output dir. It's common for
105 // tools to place the events.proto file in the DEBUG_OUTPUT_DIR when launching AR. If it
106 // exists, this will attempt to copy it the first time we get here to package it with
107 // the stats. Otherwise, the user would need to specify the events.proto location when
108 // parsing the stats in post.
109 std::stringstream eventsProtoSrcFilename
, eventsProtoDstFilename
;
110 eventsProtoSrcFilename
<< KNOB_DEBUG_OUTPUT_DIR
<< "\\events.proto" << std::ends
;
111 eventsProtoDstFilename
<< mOutputDir
.substr(0, mOutputDir
.size() - 1)
112 << "\\events.proto" << std::ends
;
114 // If event.proto already exists, we're done; else do the copy
115 struct stat buf
; // Use a Posix stat for file existence check
116 if (!stat(eventsProtoDstFilename
.str().c_str(), &buf
) == 0)
118 // Now check to make sure the events.proto source exists
119 if (stat(eventsProtoSrcFilename
.str().c_str(), &buf
) == 0)
121 std::ifstream srcFile
;
122 srcFile
.open(eventsProtoSrcFilename
.str().c_str(), std::ios::binary
);
123 if (srcFile
.is_open())
125 // Just do a binary buffer copy
126 std::ofstream dstFile
;
127 dstFile
.open(eventsProtoDstFilename
.str().c_str(), std::ios::binary
);
128 dstFile
<< srcFile
.rdbuf();
137 virtual void Handle(const DrawInstancedEvent
& event
)
139 DrawInfoEvent
e(event
.data
.drawId
,
142 event
.data
.numVertices
,
145 event
.data
.startVertex
,
146 event
.data
.numInstances
,
147 event
.data
.startInstance
,
151 event
.data
.soTopology
,
154 EventHandlerFile::Handle(e
);
157 virtual void Handle(const DrawIndexedInstancedEvent
& event
)
159 DrawInfoEvent
e(event
.data
.drawId
,
160 ArchRast::IndexedInstanced
,
163 event
.data
.numIndices
,
164 event
.data
.indexOffset
,
165 event
.data
.baseVertex
,
166 event
.data
.numInstances
,
167 event
.data
.startInstance
,
171 event
.data
.soTopology
,
174 EventHandlerFile::Handle(e
);
178 //////////////////////////////////////////////////////////////////////////
179 /// @brief Event handler that handles worker thread events. There is one
180 /// event handler per thread. The python script will need to sum
181 /// up counters across all of the threads.
182 class EventHandlerWorkerStats
: public EventHandlerFile
185 EventHandlerWorkerStats(uint32_t id
) : EventHandlerFile(id
), mNeedFlush(false)
187 memset(mShaderStats
, 0, sizeof(mShaderStats
));
190 virtual void Handle(const EarlyDepthStencilInfoSingleSample
& event
)
192 // earlyZ test compute
193 mDSSingleSample
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
194 mDSSingleSample
.earlyZTestFailCount
+=
195 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
197 // earlyStencil test compute
198 mDSSingleSample
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
199 mDSSingleSample
.earlyStencilTestFailCount
+=
200 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
202 // earlyZ test single and multi sample
203 mDSCombined
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
204 mDSCombined
.earlyZTestFailCount
+=
205 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
207 // earlyStencil test single and multi sample
208 mDSCombined
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
209 mDSCombined
.earlyStencilTestFailCount
+=
210 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
215 virtual void Handle(const EarlyDepthStencilInfoSampleRate
& event
)
217 // earlyZ test compute
218 mDSSampleRate
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
219 mDSSampleRate
.earlyZTestFailCount
+=
220 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
222 // earlyStencil test compute
223 mDSSampleRate
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
224 mDSSampleRate
.earlyStencilTestFailCount
+=
225 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
227 // earlyZ test single and multi sample
228 mDSCombined
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
229 mDSCombined
.earlyZTestFailCount
+=
230 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
232 // earlyStencil test single and multi sample
233 mDSCombined
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
234 mDSCombined
.earlyStencilTestFailCount
+=
235 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
240 virtual void Handle(const EarlyDepthStencilInfoNullPS
& event
)
242 // earlyZ test compute
243 mDSNullPS
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
244 mDSNullPS
.earlyZTestFailCount
+=
245 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
247 // earlyStencil test compute
248 mDSNullPS
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
249 mDSNullPS
.earlyStencilTestFailCount
+=
250 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
254 virtual void Handle(const LateDepthStencilInfoSingleSample
& event
)
256 // lateZ test compute
257 mDSSingleSample
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
258 mDSSingleSample
.lateZTestFailCount
+=
259 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
261 // lateStencil test compute
262 mDSSingleSample
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
263 mDSSingleSample
.lateStencilTestFailCount
+=
264 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
266 // lateZ test single and multi sample
267 mDSCombined
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
268 mDSCombined
.lateZTestFailCount
+=
269 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
271 // lateStencil test single and multi sample
272 mDSCombined
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
273 mDSCombined
.lateStencilTestFailCount
+=
274 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
279 virtual void Handle(const LateDepthStencilInfoSampleRate
& event
)
281 // lateZ test compute
282 mDSSampleRate
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
283 mDSSampleRate
.lateZTestFailCount
+=
284 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
286 // lateStencil test compute
287 mDSSampleRate
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
288 mDSSampleRate
.lateStencilTestFailCount
+=
289 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
291 // lateZ test single and multi sample
292 mDSCombined
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
293 mDSCombined
.lateZTestFailCount
+=
294 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
296 // lateStencil test single and multi sample
297 mDSCombined
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
298 mDSCombined
.lateStencilTestFailCount
+=
299 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
304 virtual void Handle(const LateDepthStencilInfoNullPS
& event
)
306 // lateZ test compute
307 mDSNullPS
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
308 mDSNullPS
.lateZTestFailCount
+=
309 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
311 // lateStencil test compute
312 mDSNullPS
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
313 mDSNullPS
.lateStencilTestFailCount
+=
314 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
318 virtual void Handle(const EarlyDepthInfoPixelRate
& event
)
320 // earlyZ test compute
321 mDSPixelRate
.earlyZTestPassCount
+= event
.data
.depthPassCount
;
322 mDSPixelRate
.earlyZTestFailCount
+=
323 (_mm_popcnt_u32(event
.data
.activeLanes
) - event
.data
.depthPassCount
);
328 virtual void Handle(const LateDepthInfoPixelRate
& event
)
330 // lateZ test compute
331 mDSPixelRate
.lateZTestPassCount
+= event
.data
.depthPassCount
;
332 mDSPixelRate
.lateZTestFailCount
+=
333 (_mm_popcnt_u32(event
.data
.activeLanes
) - event
.data
.depthPassCount
);
338 virtual void Handle(const ClipInfoEvent
& event
)
340 mClipper
.mustClipCount
+= _mm_popcnt_u32(event
.data
.clipMask
);
341 mClipper
.trivialRejectCount
+=
342 event
.data
.numInvocations
- _mm_popcnt_u32(event
.data
.validMask
);
343 mClipper
.trivialAcceptCount
+=
344 _mm_popcnt_u32(event
.data
.validMask
& ~event
.data
.clipMask
);
347 void UpdateStats(SWR_SHADER_STATS
* pStatTotals
, const SWR_SHADER_STATS
* pStatUpdate
)
349 pStatTotals
->numInstExecuted
+= pStatUpdate
->numInstExecuted
;
350 pStatTotals
->numSampleExecuted
+= pStatUpdate
->numSampleExecuted
;
351 pStatTotals
->numSampleLExecuted
+= pStatUpdate
->numSampleLExecuted
;
352 pStatTotals
->numSampleBExecuted
+= pStatUpdate
->numSampleBExecuted
;
353 pStatTotals
->numSampleCExecuted
+= pStatUpdate
->numSampleCExecuted
;
354 pStatTotals
->numSampleCLZExecuted
+= pStatUpdate
->numSampleCLZExecuted
;
355 pStatTotals
->numSampleCDExecuted
+= pStatUpdate
->numSampleCDExecuted
;
356 pStatTotals
->numGather4Executed
+= pStatUpdate
->numGather4Executed
;
357 pStatTotals
->numGather4CExecuted
+= pStatUpdate
->numGather4CExecuted
;
358 pStatTotals
->numGather4CPOExecuted
+= pStatUpdate
->numGather4CPOExecuted
;
359 pStatTotals
->numGather4CPOCExecuted
+= pStatUpdate
->numGather4CPOCExecuted
;
360 pStatTotals
->numLodExecuted
+= pStatUpdate
->numLodExecuted
;
363 virtual void Handle(const VSStats
& event
)
365 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
366 UpdateStats(&mShaderStats
[SHADER_VERTEX
], pStats
);
369 virtual void Handle(const GSStats
& event
)
371 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
372 UpdateStats(&mShaderStats
[SHADER_GEOMETRY
], pStats
);
375 virtual void Handle(const DSStats
& event
)
377 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
378 UpdateStats(&mShaderStats
[SHADER_DOMAIN
], pStats
);
381 virtual void Handle(const HSStats
& event
)
383 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
384 UpdateStats(&mShaderStats
[SHADER_HULL
], pStats
);
387 virtual void Handle(const PSStats
& event
)
389 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
390 UpdateStats(&mShaderStats
[SHADER_PIXEL
], pStats
);
394 virtual void Handle(const CSStats
& event
)
396 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
397 UpdateStats(&mShaderStats
[SHADER_COMPUTE
], pStats
);
401 // Flush cached events for this draw
402 virtual void FlushDraw(uint32_t drawId
)
404 if (mNeedFlush
== false)
407 EventHandlerFile::Handle(PSInfo(drawId
,
408 mShaderStats
[SHADER_PIXEL
].numInstExecuted
,
409 mShaderStats
[SHADER_PIXEL
].numSampleExecuted
,
410 mShaderStats
[SHADER_PIXEL
].numSampleLExecuted
,
411 mShaderStats
[SHADER_PIXEL
].numSampleBExecuted
,
412 mShaderStats
[SHADER_PIXEL
].numSampleCExecuted
,
413 mShaderStats
[SHADER_PIXEL
].numSampleCLZExecuted
,
414 mShaderStats
[SHADER_PIXEL
].numSampleCDExecuted
,
415 mShaderStats
[SHADER_PIXEL
].numGather4Executed
,
416 mShaderStats
[SHADER_PIXEL
].numGather4CExecuted
,
417 mShaderStats
[SHADER_PIXEL
].numGather4CPOExecuted
,
418 mShaderStats
[SHADER_PIXEL
].numGather4CPOCExecuted
,
419 mShaderStats
[SHADER_PIXEL
].numLodExecuted
));
420 EventHandlerFile::Handle(CSInfo(drawId
,
421 mShaderStats
[SHADER_COMPUTE
].numInstExecuted
,
422 mShaderStats
[SHADER_COMPUTE
].numSampleExecuted
,
423 mShaderStats
[SHADER_COMPUTE
].numSampleLExecuted
,
424 mShaderStats
[SHADER_COMPUTE
].numSampleBExecuted
,
425 mShaderStats
[SHADER_COMPUTE
].numSampleCExecuted
,
426 mShaderStats
[SHADER_COMPUTE
].numSampleCLZExecuted
,
427 mShaderStats
[SHADER_COMPUTE
].numSampleCDExecuted
,
428 mShaderStats
[SHADER_COMPUTE
].numGather4Executed
,
429 mShaderStats
[SHADER_COMPUTE
].numGather4CExecuted
,
430 mShaderStats
[SHADER_COMPUTE
].numGather4CPOExecuted
,
431 mShaderStats
[SHADER_COMPUTE
].numGather4CPOCExecuted
,
432 mShaderStats
[SHADER_COMPUTE
].numLodExecuted
));
435 EventHandlerFile::Handle(EarlyZSingleSample(
436 drawId
, mDSSingleSample
.earlyZTestPassCount
, mDSSingleSample
.earlyZTestFailCount
));
437 EventHandlerFile::Handle(LateZSingleSample(
438 drawId
, mDSSingleSample
.lateZTestPassCount
, mDSSingleSample
.lateZTestFailCount
));
439 EventHandlerFile::Handle(
440 EarlyStencilSingleSample(drawId
,
441 mDSSingleSample
.earlyStencilTestPassCount
,
442 mDSSingleSample
.earlyStencilTestFailCount
));
443 EventHandlerFile::Handle(
444 LateStencilSingleSample(drawId
,
445 mDSSingleSample
.lateStencilTestPassCount
,
446 mDSSingleSample
.lateStencilTestFailCount
));
449 EventHandlerFile::Handle(EarlyZSampleRate(
450 drawId
, mDSSampleRate
.earlyZTestPassCount
, mDSSampleRate
.earlyZTestFailCount
));
451 EventHandlerFile::Handle(LateZSampleRate(
452 drawId
, mDSSampleRate
.lateZTestPassCount
, mDSSampleRate
.lateZTestFailCount
));
453 EventHandlerFile::Handle(
454 EarlyStencilSampleRate(drawId
,
455 mDSSampleRate
.earlyStencilTestPassCount
,
456 mDSSampleRate
.earlyStencilTestFailCount
));
457 EventHandlerFile::Handle(LateStencilSampleRate(drawId
,
458 mDSSampleRate
.lateStencilTestPassCount
,
459 mDSSampleRate
.lateStencilTestFailCount
));
462 EventHandlerFile::Handle(
463 EarlyZ(drawId
, mDSCombined
.earlyZTestPassCount
, mDSCombined
.earlyZTestFailCount
));
464 EventHandlerFile::Handle(
465 LateZ(drawId
, mDSCombined
.lateZTestPassCount
, mDSCombined
.lateZTestFailCount
));
466 EventHandlerFile::Handle(EarlyStencil(drawId
,
467 mDSCombined
.earlyStencilTestPassCount
,
468 mDSCombined
.earlyStencilTestFailCount
));
469 EventHandlerFile::Handle(LateStencil(drawId
,
470 mDSCombined
.lateStencilTestPassCount
,
471 mDSCombined
.lateStencilTestFailCount
));
474 EventHandlerFile::Handle(EarlyZPixelRate(
475 drawId
, mDSPixelRate
.earlyZTestPassCount
, mDSPixelRate
.earlyZTestFailCount
));
476 EventHandlerFile::Handle(LateZPixelRate(
477 drawId
, mDSPixelRate
.lateZTestPassCount
, mDSPixelRate
.lateZTestFailCount
));
481 EventHandlerFile::Handle(
482 EarlyZNullPS(drawId
, mDSNullPS
.earlyZTestPassCount
, mDSNullPS
.earlyZTestFailCount
));
483 EventHandlerFile::Handle(EarlyStencilNullPS(
484 drawId
, mDSNullPS
.earlyStencilTestPassCount
, mDSNullPS
.earlyStencilTestFailCount
));
486 // Rasterized Subspans
487 EventHandlerFile::Handle(RasterTiles(drawId
, rastStats
.rasterTiles
));
490 EventHandlerFile::Handle(
491 AlphaEvent(drawId
, mAlphaStats
.alphaTestCount
, mAlphaStats
.alphaBlendCount
));
494 EventHandlerFile::Handle(
495 CullEvent(drawId
, mCullStats
.backfacePrimCount
, mCullStats
.degeneratePrimCount
));
497 mDSSingleSample
= {};
507 mShaderStats
[SHADER_PIXEL
] = {};
508 mShaderStats
[SHADER_COMPUTE
] = {};
513 virtual void Handle(const FrontendDrawEndEvent
& event
)
516 EventHandlerFile::Handle(ClipperEvent(event
.data
.drawId
,
517 mClipper
.trivialRejectCount
,
518 mClipper
.trivialAcceptCount
,
519 mClipper
.mustClipCount
));
522 EventHandlerFile::Handle(TessPrims(event
.data
.drawId
, mTS
.inputPrims
));
525 EventHandlerFile::Handle(GSInputPrims(event
.data
.drawId
, mGS
.inputPrimCount
));
526 EventHandlerFile::Handle(GSPrimsGen(event
.data
.drawId
, mGS
.primGeneratedCount
));
527 EventHandlerFile::Handle(GSVertsInput(event
.data
.drawId
, mGS
.vertsInput
));
529 EventHandlerFile::Handle(VSInfo(event
.data
.drawId
,
530 mShaderStats
[SHADER_VERTEX
].numInstExecuted
,
531 mShaderStats
[SHADER_VERTEX
].numSampleExecuted
,
532 mShaderStats
[SHADER_VERTEX
].numSampleLExecuted
,
533 mShaderStats
[SHADER_VERTEX
].numSampleBExecuted
,
534 mShaderStats
[SHADER_VERTEX
].numSampleCExecuted
,
535 mShaderStats
[SHADER_VERTEX
].numSampleCLZExecuted
,
536 mShaderStats
[SHADER_VERTEX
].numSampleCDExecuted
,
537 mShaderStats
[SHADER_VERTEX
].numGather4Executed
,
538 mShaderStats
[SHADER_VERTEX
].numGather4CExecuted
,
539 mShaderStats
[SHADER_VERTEX
].numGather4CPOExecuted
,
540 mShaderStats
[SHADER_VERTEX
].numGather4CPOCExecuted
,
541 mShaderStats
[SHADER_VERTEX
].numLodExecuted
));
542 EventHandlerFile::Handle(HSInfo(event
.data
.drawId
,
543 mShaderStats
[SHADER_HULL
].numInstExecuted
,
544 mShaderStats
[SHADER_HULL
].numSampleExecuted
,
545 mShaderStats
[SHADER_HULL
].numSampleLExecuted
,
546 mShaderStats
[SHADER_HULL
].numSampleBExecuted
,
547 mShaderStats
[SHADER_HULL
].numSampleCExecuted
,
548 mShaderStats
[SHADER_HULL
].numSampleCLZExecuted
,
549 mShaderStats
[SHADER_HULL
].numSampleCDExecuted
,
550 mShaderStats
[SHADER_HULL
].numGather4Executed
,
551 mShaderStats
[SHADER_HULL
].numGather4CExecuted
,
552 mShaderStats
[SHADER_HULL
].numGather4CPOExecuted
,
553 mShaderStats
[SHADER_HULL
].numGather4CPOCExecuted
,
554 mShaderStats
[SHADER_HULL
].numLodExecuted
));
555 EventHandlerFile::Handle(DSInfo(event
.data
.drawId
,
556 mShaderStats
[SHADER_DOMAIN
].numInstExecuted
,
557 mShaderStats
[SHADER_DOMAIN
].numSampleExecuted
,
558 mShaderStats
[SHADER_DOMAIN
].numSampleLExecuted
,
559 mShaderStats
[SHADER_DOMAIN
].numSampleBExecuted
,
560 mShaderStats
[SHADER_DOMAIN
].numSampleCExecuted
,
561 mShaderStats
[SHADER_DOMAIN
].numSampleCLZExecuted
,
562 mShaderStats
[SHADER_DOMAIN
].numSampleCDExecuted
,
563 mShaderStats
[SHADER_DOMAIN
].numGather4Executed
,
564 mShaderStats
[SHADER_DOMAIN
].numGather4CExecuted
,
565 mShaderStats
[SHADER_DOMAIN
].numGather4CPOExecuted
,
566 mShaderStats
[SHADER_DOMAIN
].numGather4CPOCExecuted
,
567 mShaderStats
[SHADER_DOMAIN
].numLodExecuted
));
568 EventHandlerFile::Handle(GSInfo(event
.data
.drawId
,
569 mShaderStats
[SHADER_GEOMETRY
].numInstExecuted
,
570 mShaderStats
[SHADER_GEOMETRY
].numSampleExecuted
,
571 mShaderStats
[SHADER_GEOMETRY
].numSampleLExecuted
,
572 mShaderStats
[SHADER_GEOMETRY
].numSampleBExecuted
,
573 mShaderStats
[SHADER_GEOMETRY
].numSampleCExecuted
,
574 mShaderStats
[SHADER_GEOMETRY
].numSampleCLZExecuted
,
575 mShaderStats
[SHADER_GEOMETRY
].numSampleCDExecuted
,
576 mShaderStats
[SHADER_GEOMETRY
].numGather4Executed
,
577 mShaderStats
[SHADER_GEOMETRY
].numGather4CExecuted
,
578 mShaderStats
[SHADER_GEOMETRY
].numGather4CPOExecuted
,
579 mShaderStats
[SHADER_GEOMETRY
].numGather4CPOCExecuted
,
580 mShaderStats
[SHADER_GEOMETRY
].numLodExecuted
));
582 mShaderStats
[SHADER_VERTEX
] = {};
583 mShaderStats
[SHADER_HULL
] = {};
584 mShaderStats
[SHADER_DOMAIN
] = {};
585 mShaderStats
[SHADER_GEOMETRY
] = {};
587 // Reset Internal Counters
593 virtual void Handle(const GSPrimInfo
& event
)
595 mGS
.inputPrimCount
+= event
.data
.inputPrimCount
;
596 mGS
.primGeneratedCount
+= event
.data
.primGeneratedCount
;
597 mGS
.vertsInput
+= event
.data
.vertsInput
;
600 virtual void Handle(const TessPrimCount
& event
) { mTS
.inputPrims
+= event
.data
.primCount
; }
602 virtual void Handle(const RasterTileCount
& event
)
604 rastStats
.rasterTiles
+= event
.data
.rasterTiles
;
607 virtual void Handle(const CullInfoEvent
& event
)
609 mCullStats
.degeneratePrimCount
+= _mm_popcnt_u32(
610 event
.data
.validMask
^ (event
.data
.validMask
& ~event
.data
.degeneratePrimMask
));
611 mCullStats
.backfacePrimCount
+= _mm_popcnt_u32(
612 event
.data
.validMask
^ (event
.data
.validMask
& ~event
.data
.backfacePrimMask
));
615 virtual void Handle(const AlphaInfoEvent
& event
)
617 mAlphaStats
.alphaTestCount
+= event
.data
.alphaTestEnable
;
618 mAlphaStats
.alphaBlendCount
+= event
.data
.alphaBlendEnable
;
624 DepthStencilStats mDSSingleSample
= {};
625 DepthStencilStats mDSSampleRate
= {};
626 DepthStencilStats mDSPixelRate
= {};
627 DepthStencilStats mDSCombined
= {};
628 DepthStencilStats mDSNullPS
= {};
629 DepthStencilStats mDSOmZ
= {};
630 CStats mClipper
= {};
632 GSStateInfo mGS
= {};
633 RastStats rastStats
= {};
634 CullStats mCullStats
= {};
635 AlphaStats mAlphaStats
= {};
637 SWR_SHADER_STATS mShaderStats
[NUM_SHADER_TYPES
];
641 static EventManager
* FromHandle(HANDLE hThreadContext
)
643 return reinterpret_cast<EventManager
*>(hThreadContext
);
646 // Construct an event manager and associate a handler with it.
647 HANDLE
CreateThreadContext(AR_THREAD type
)
649 // Can we assume single threaded here?
650 static std::atomic
<uint32_t> counter(0);
651 uint32_t id
= counter
.fetch_add(1);
653 EventManager
* pManager
= new EventManager();
657 EventHandlerFile
* pHandler
= nullptr;
659 if (type
== AR_THREAD::API
)
661 pHandler
= new EventHandlerApiStats(id
);
662 pManager
->Attach(pHandler
);
663 pHandler
->Handle(ThreadStartApiEvent());
667 pHandler
= new EventHandlerWorkerStats(id
);
668 pManager
->Attach(pHandler
);
669 pHandler
->Handle(ThreadStartWorkerEvent());
672 pHandler
->MarkHeader();
677 SWR_INVALID("Failed to register thread.");
681 void DestroyThreadContext(HANDLE hThreadContext
)
683 EventManager
* pManager
= FromHandle(hThreadContext
);
684 SWR_ASSERT(pManager
!= nullptr);
689 // Dispatch event for this thread.
690 void Dispatch(HANDLE hThreadContext
, const Event
& event
)
692 if (event
.IsEnabled())
694 EventManager
* pManager
= reinterpret_cast<EventManager
*>(hThreadContext
);
695 SWR_ASSERT(pManager
!= nullptr);
696 pManager
->Dispatch(event
);
700 // Flush for this thread.
701 void FlushDraw(HANDLE hThreadContext
, uint32_t drawId
)
703 EventManager
* pManager
= FromHandle(hThreadContext
);
704 SWR_ASSERT(pManager
!= nullptr);
706 pManager
->FlushDraw(drawId
);
708 } // namespace ArchRast