1 /****************************************************************************
2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Implementation for archrast.
27 ******************************************************************************/
31 #include "common/os.h"
32 #include "archrast/archrast.h"
33 #include "archrast/eventmanager.h"
34 #include "gen_ar_eventhandlerfile.hpp"
38 //////////////////////////////////////////////////////////////////////////
39 /// @brief struct that keeps track of depth and stencil event information
40 struct DepthStencilStats
42 uint32_t earlyZTestPassCount
= 0;
43 uint32_t earlyZTestFailCount
= 0;
44 uint32_t lateZTestPassCount
= 0;
45 uint32_t lateZTestFailCount
= 0;
46 uint32_t earlyStencilTestPassCount
= 0;
47 uint32_t earlyStencilTestFailCount
= 0;
48 uint32_t lateStencilTestPassCount
= 0;
49 uint32_t lateStencilTestFailCount
= 0;
54 uint32_t trivialRejectCount
;
55 uint32_t trivialAcceptCount
;
56 uint32_t mustClipCount
;
61 uint32_t inputPrims
= 0;
62 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
67 uint32_t inputPrimCount
;
68 uint32_t primGeneratedCount
;
74 uint32_t rasterTiles
= 0;
79 uint32_t degeneratePrimCount
= 0;
80 uint32_t backfacePrimCount
= 0;
85 uint32_t alphaTestCount
= 0;
86 uint32_t alphaBlendCount
= 0;
91 struct MemoryTrackerKey
97 struct MemoryTrackerData
99 uint32_t accessCountRead
;
100 uint32_t accessCountWrite
;
105 struct AddressRangeComparator
107 bool operator()(MemoryTrackerKey a
, MemoryTrackerKey b
) const
109 return (a
.address
& a
.mask
) < (b
.address
& b
.mask
);
113 typedef std::map
<MemoryTrackerKey
, MemoryTrackerData
, AddressRangeComparator
> MemoryTrackerMap
;
114 MemoryTrackerMap trackedMemory
= {};
116 void TrackMemoryAccess(uint64_t address
, uint64_t addressMask
, uint8_t isRead
, uint64_t tsc
)
118 MemoryTrackerKey key
;
119 key
.address
= address
;
120 key
.mask
= addressMask
;
122 MemoryTrackerMap::iterator i
= trackedMemory
.lower_bound(key
);
123 if (i
!= trackedMemory
.end() && !(trackedMemory
.key_comp()(key
, i
->first
)))
128 i
->second
.accessCountRead
++;
132 i
->second
.accessCountWrite
++;
134 i
->second
.tscMax
= tsc
;
139 MemoryTrackerData data
;
142 data
.accessCountRead
= 1;
143 data
.accessCountWrite
= 0;
147 data
.accessCountRead
= 0;
148 data
.accessCountWrite
= 1;
152 trackedMemory
.insert(i
, MemoryTrackerMap::value_type(key
, data
));
157 //////////////////////////////////////////////////////////////////////////
158 /// @brief Event handler that handles API thread events. This is shared
159 /// between the API and its caller (e.g. driver shim) but typically
160 /// there is only a single API thread per context. So you can save
161 /// information in the class to be used for other events.
162 class EventHandlerApiStats
: public EventHandlerFile
165 EventHandlerApiStats(uint32_t id
) : EventHandlerFile(id
)
168 // Attempt to copy the events.proto file to the ArchRast output dir. It's common for
169 // tools to place the events.proto file in the DEBUG_OUTPUT_DIR when launching AR. If it
170 // exists, this will attempt to copy it the first time we get here to package it with
171 // the stats. Otherwise, the user would need to specify the events.proto location when
172 // parsing the stats in post.
173 std::stringstream eventsProtoSrcFilename
, eventsProtoDstFilename
;
174 eventsProtoSrcFilename
<< KNOB_DEBUG_OUTPUT_DIR
<< "\\events.proto" << std::ends
;
175 eventsProtoDstFilename
<< mOutputDir
.substr(0, mOutputDir
.size() - 1)
176 << "\\events.proto" << std::ends
;
178 // If event.proto already exists, we're done; else do the copy
179 struct stat buf
; // Use a Posix stat for file existence check
180 if (!stat(eventsProtoDstFilename
.str().c_str(), &buf
) == 0)
182 // Now check to make sure the events.proto source exists
183 if (stat(eventsProtoSrcFilename
.str().c_str(), &buf
) == 0)
185 std::ifstream srcFile
;
186 srcFile
.open(eventsProtoSrcFilename
.str().c_str(), std::ios::binary
);
187 if (srcFile
.is_open())
189 // Just do a binary buffer copy
190 std::ofstream dstFile
;
191 dstFile
.open(eventsProtoDstFilename
.str().c_str(), std::ios::binary
);
192 dstFile
<< srcFile
.rdbuf();
201 virtual void Handle(const DrawInstancedEvent
& event
)
203 DrawInfoEvent
e(event
.data
.drawId
,
206 event
.data
.numVertices
,
209 event
.data
.startVertex
,
210 event
.data
.numInstances
,
211 event
.data
.startInstance
,
215 event
.data
.soTopology
,
218 EventHandlerFile::Handle(e
);
221 virtual void Handle(const DrawIndexedInstancedEvent
& event
)
223 DrawInfoEvent
e(event
.data
.drawId
,
224 ArchRast::IndexedInstanced
,
227 event
.data
.numIndices
,
228 event
.data
.indexOffset
,
229 event
.data
.baseVertex
,
230 event
.data
.numInstances
,
231 event
.data
.startInstance
,
235 event
.data
.soTopology
,
238 EventHandlerFile::Handle(e
);
242 //////////////////////////////////////////////////////////////////////////
243 /// @brief Event handler that handles worker thread events. There is one
244 /// event handler per thread. The python script will need to sum
245 /// up counters across all of the threads.
246 class EventHandlerWorkerStats
: public EventHandlerFile
249 EventHandlerWorkerStats(uint32_t id
) : EventHandlerFile(id
), mNeedFlush(false)
251 memset(mShaderStats
, 0, sizeof(mShaderStats
));
253 // compute address mask for memory tracking
255 uint64_t addressRangeBytes
= 64;
256 while (addressRangeBytes
> 0)
258 mAddressMask
= (mAddressMask
<< 1) | 1;
259 addressRangeBytes
= addressRangeBytes
>> 1;
261 mAddressMask
= ~mAddressMask
;
264 virtual void Handle(const EarlyDepthStencilInfoSingleSample
& event
)
266 // earlyZ test compute
267 mDSSingleSample
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
268 mDSSingleSample
.earlyZTestFailCount
+=
269 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
271 // earlyStencil test compute
272 mDSSingleSample
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
273 mDSSingleSample
.earlyStencilTestFailCount
+=
274 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
276 // earlyZ test single and multi sample
277 mDSCombined
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
278 mDSCombined
.earlyZTestFailCount
+=
279 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
281 // earlyStencil test single and multi sample
282 mDSCombined
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
283 mDSCombined
.earlyStencilTestFailCount
+=
284 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
289 virtual void Handle(const EarlyDepthStencilInfoSampleRate
& event
)
291 // earlyZ test compute
292 mDSSampleRate
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
293 mDSSampleRate
.earlyZTestFailCount
+=
294 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
296 // earlyStencil test compute
297 mDSSampleRate
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
298 mDSSampleRate
.earlyStencilTestFailCount
+=
299 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
301 // earlyZ test single and multi sample
302 mDSCombined
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
303 mDSCombined
.earlyZTestFailCount
+=
304 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
306 // earlyStencil test single and multi sample
307 mDSCombined
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
308 mDSCombined
.earlyStencilTestFailCount
+=
309 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
314 virtual void Handle(const EarlyDepthStencilInfoNullPS
& event
)
316 // earlyZ test compute
317 mDSNullPS
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
318 mDSNullPS
.earlyZTestFailCount
+=
319 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
321 // earlyStencil test compute
322 mDSNullPS
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
323 mDSNullPS
.earlyStencilTestFailCount
+=
324 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
328 virtual void Handle(const LateDepthStencilInfoSingleSample
& event
)
330 // lateZ test compute
331 mDSSingleSample
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
332 mDSSingleSample
.lateZTestFailCount
+=
333 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
335 // lateStencil test compute
336 mDSSingleSample
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
337 mDSSingleSample
.lateStencilTestFailCount
+=
338 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
340 // lateZ test single and multi sample
341 mDSCombined
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
342 mDSCombined
.lateZTestFailCount
+=
343 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
345 // lateStencil test single and multi sample
346 mDSCombined
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
347 mDSCombined
.lateStencilTestFailCount
+=
348 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
353 virtual void Handle(const LateDepthStencilInfoSampleRate
& event
)
355 // lateZ test compute
356 mDSSampleRate
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
357 mDSSampleRate
.lateZTestFailCount
+=
358 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
360 // lateStencil test compute
361 mDSSampleRate
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
362 mDSSampleRate
.lateStencilTestFailCount
+=
363 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
365 // lateZ test single and multi sample
366 mDSCombined
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
367 mDSCombined
.lateZTestFailCount
+=
368 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
370 // lateStencil test single and multi sample
371 mDSCombined
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
372 mDSCombined
.lateStencilTestFailCount
+=
373 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
378 virtual void Handle(const LateDepthStencilInfoNullPS
& event
)
380 // lateZ test compute
381 mDSNullPS
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
382 mDSNullPS
.lateZTestFailCount
+=
383 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
385 // lateStencil test compute
386 mDSNullPS
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
387 mDSNullPS
.lateStencilTestFailCount
+=
388 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
392 virtual void Handle(const EarlyDepthInfoPixelRate
& event
)
394 // earlyZ test compute
395 mDSPixelRate
.earlyZTestPassCount
+= event
.data
.depthPassCount
;
396 mDSPixelRate
.earlyZTestFailCount
+=
397 (_mm_popcnt_u32(event
.data
.activeLanes
) - event
.data
.depthPassCount
);
402 virtual void Handle(const LateDepthInfoPixelRate
& event
)
404 // lateZ test compute
405 mDSPixelRate
.lateZTestPassCount
+= event
.data
.depthPassCount
;
406 mDSPixelRate
.lateZTestFailCount
+=
407 (_mm_popcnt_u32(event
.data
.activeLanes
) - event
.data
.depthPassCount
);
412 virtual void Handle(const ClipInfoEvent
& event
)
414 mClipper
.mustClipCount
+= _mm_popcnt_u32(event
.data
.clipMask
);
415 mClipper
.trivialRejectCount
+=
416 event
.data
.numInvocations
- _mm_popcnt_u32(event
.data
.validMask
);
417 mClipper
.trivialAcceptCount
+=
418 _mm_popcnt_u32(event
.data
.validMask
& ~event
.data
.clipMask
);
421 void UpdateStats(SWR_SHADER_STATS
* pStatTotals
, const SWR_SHADER_STATS
* pStatUpdate
)
423 pStatTotals
->numInstExecuted
+= pStatUpdate
->numInstExecuted
;
424 pStatTotals
->numSampleExecuted
+= pStatUpdate
->numSampleExecuted
;
425 pStatTotals
->numSampleLExecuted
+= pStatUpdate
->numSampleLExecuted
;
426 pStatTotals
->numSampleBExecuted
+= pStatUpdate
->numSampleBExecuted
;
427 pStatTotals
->numSampleCExecuted
+= pStatUpdate
->numSampleCExecuted
;
428 pStatTotals
->numSampleCLZExecuted
+= pStatUpdate
->numSampleCLZExecuted
;
429 pStatTotals
->numSampleCDExecuted
+= pStatUpdate
->numSampleCDExecuted
;
430 pStatTotals
->numGather4Executed
+= pStatUpdate
->numGather4Executed
;
431 pStatTotals
->numGather4CExecuted
+= pStatUpdate
->numGather4CExecuted
;
432 pStatTotals
->numGather4CPOExecuted
+= pStatUpdate
->numGather4CPOExecuted
;
433 pStatTotals
->numGather4CPOCExecuted
+= pStatUpdate
->numGather4CPOCExecuted
;
434 pStatTotals
->numLodExecuted
+= pStatUpdate
->numLodExecuted
;
437 virtual void Handle(const VSStats
& event
)
439 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
440 UpdateStats(&mShaderStats
[SHADER_VERTEX
], pStats
);
443 virtual void Handle(const GSStats
& event
)
445 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
446 UpdateStats(&mShaderStats
[SHADER_GEOMETRY
], pStats
);
449 virtual void Handle(const DSStats
& event
)
451 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
452 UpdateStats(&mShaderStats
[SHADER_DOMAIN
], pStats
);
455 virtual void Handle(const HSStats
& event
)
457 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
458 UpdateStats(&mShaderStats
[SHADER_HULL
], pStats
);
461 virtual void Handle(const PSStats
& event
)
463 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
464 UpdateStats(&mShaderStats
[SHADER_PIXEL
], pStats
);
468 virtual void Handle(const CSStats
& event
)
470 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
471 UpdateStats(&mShaderStats
[SHADER_COMPUTE
], pStats
);
475 // Flush cached events for this draw
476 virtual void FlushDraw(uint32_t drawId
)
478 if (mNeedFlush
== false)
481 EventHandlerFile::Handle(PSInfo(drawId
,
482 mShaderStats
[SHADER_PIXEL
].numInstExecuted
,
483 mShaderStats
[SHADER_PIXEL
].numSampleExecuted
,
484 mShaderStats
[SHADER_PIXEL
].numSampleLExecuted
,
485 mShaderStats
[SHADER_PIXEL
].numSampleBExecuted
,
486 mShaderStats
[SHADER_PIXEL
].numSampleCExecuted
,
487 mShaderStats
[SHADER_PIXEL
].numSampleCLZExecuted
,
488 mShaderStats
[SHADER_PIXEL
].numSampleCDExecuted
,
489 mShaderStats
[SHADER_PIXEL
].numGather4Executed
,
490 mShaderStats
[SHADER_PIXEL
].numGather4CExecuted
,
491 mShaderStats
[SHADER_PIXEL
].numGather4CPOExecuted
,
492 mShaderStats
[SHADER_PIXEL
].numGather4CPOCExecuted
,
493 mShaderStats
[SHADER_PIXEL
].numLodExecuted
));
494 EventHandlerFile::Handle(CSInfo(drawId
,
495 mShaderStats
[SHADER_COMPUTE
].numInstExecuted
,
496 mShaderStats
[SHADER_COMPUTE
].numSampleExecuted
,
497 mShaderStats
[SHADER_COMPUTE
].numSampleLExecuted
,
498 mShaderStats
[SHADER_COMPUTE
].numSampleBExecuted
,
499 mShaderStats
[SHADER_COMPUTE
].numSampleCExecuted
,
500 mShaderStats
[SHADER_COMPUTE
].numSampleCLZExecuted
,
501 mShaderStats
[SHADER_COMPUTE
].numSampleCDExecuted
,
502 mShaderStats
[SHADER_COMPUTE
].numGather4Executed
,
503 mShaderStats
[SHADER_COMPUTE
].numGather4CExecuted
,
504 mShaderStats
[SHADER_COMPUTE
].numGather4CPOExecuted
,
505 mShaderStats
[SHADER_COMPUTE
].numGather4CPOCExecuted
,
506 mShaderStats
[SHADER_COMPUTE
].numLodExecuted
));
509 EventHandlerFile::Handle(EarlyZSingleSample(
510 drawId
, mDSSingleSample
.earlyZTestPassCount
, mDSSingleSample
.earlyZTestFailCount
));
511 EventHandlerFile::Handle(LateZSingleSample(
512 drawId
, mDSSingleSample
.lateZTestPassCount
, mDSSingleSample
.lateZTestFailCount
));
513 EventHandlerFile::Handle(
514 EarlyStencilSingleSample(drawId
,
515 mDSSingleSample
.earlyStencilTestPassCount
,
516 mDSSingleSample
.earlyStencilTestFailCount
));
517 EventHandlerFile::Handle(
518 LateStencilSingleSample(drawId
,
519 mDSSingleSample
.lateStencilTestPassCount
,
520 mDSSingleSample
.lateStencilTestFailCount
));
523 EventHandlerFile::Handle(EarlyZSampleRate(
524 drawId
, mDSSampleRate
.earlyZTestPassCount
, mDSSampleRate
.earlyZTestFailCount
));
525 EventHandlerFile::Handle(LateZSampleRate(
526 drawId
, mDSSampleRate
.lateZTestPassCount
, mDSSampleRate
.lateZTestFailCount
));
527 EventHandlerFile::Handle(
528 EarlyStencilSampleRate(drawId
,
529 mDSSampleRate
.earlyStencilTestPassCount
,
530 mDSSampleRate
.earlyStencilTestFailCount
));
531 EventHandlerFile::Handle(LateStencilSampleRate(drawId
,
532 mDSSampleRate
.lateStencilTestPassCount
,
533 mDSSampleRate
.lateStencilTestFailCount
));
536 EventHandlerFile::Handle(
537 EarlyZ(drawId
, mDSCombined
.earlyZTestPassCount
, mDSCombined
.earlyZTestFailCount
));
538 EventHandlerFile::Handle(
539 LateZ(drawId
, mDSCombined
.lateZTestPassCount
, mDSCombined
.lateZTestFailCount
));
540 EventHandlerFile::Handle(EarlyStencil(drawId
,
541 mDSCombined
.earlyStencilTestPassCount
,
542 mDSCombined
.earlyStencilTestFailCount
));
543 EventHandlerFile::Handle(LateStencil(drawId
,
544 mDSCombined
.lateStencilTestPassCount
,
545 mDSCombined
.lateStencilTestFailCount
));
548 EventHandlerFile::Handle(EarlyZPixelRate(
549 drawId
, mDSPixelRate
.earlyZTestPassCount
, mDSPixelRate
.earlyZTestFailCount
));
550 EventHandlerFile::Handle(LateZPixelRate(
551 drawId
, mDSPixelRate
.lateZTestPassCount
, mDSPixelRate
.lateZTestFailCount
));
555 EventHandlerFile::Handle(
556 EarlyZNullPS(drawId
, mDSNullPS
.earlyZTestPassCount
, mDSNullPS
.earlyZTestFailCount
));
557 EventHandlerFile::Handle(EarlyStencilNullPS(
558 drawId
, mDSNullPS
.earlyStencilTestPassCount
, mDSNullPS
.earlyStencilTestFailCount
));
560 // Rasterized Subspans
561 EventHandlerFile::Handle(RasterTiles(drawId
, rastStats
.rasterTiles
));
564 EventHandlerFile::Handle(
565 AlphaEvent(drawId
, mAlphaStats
.alphaTestCount
, mAlphaStats
.alphaBlendCount
));
568 EventHandlerFile::Handle(
569 CullEvent(drawId
, mCullStats
.backfacePrimCount
, mCullStats
.degeneratePrimCount
));
571 mDSSingleSample
= {};
581 mShaderStats
[SHADER_PIXEL
] = {};
582 mShaderStats
[SHADER_COMPUTE
] = {};
587 virtual void Handle(const FrontendDrawEndEvent
& event
)
590 EventHandlerFile::Handle(ClipperEvent(event
.data
.drawId
,
591 mClipper
.trivialRejectCount
,
592 mClipper
.trivialAcceptCount
,
593 mClipper
.mustClipCount
));
596 EventHandlerFile::Handle(TessPrims(event
.data
.drawId
, mTS
.inputPrims
));
599 EventHandlerFile::Handle(GSInputPrims(event
.data
.drawId
, mGS
.inputPrimCount
));
600 EventHandlerFile::Handle(GSPrimsGen(event
.data
.drawId
, mGS
.primGeneratedCount
));
601 EventHandlerFile::Handle(GSVertsInput(event
.data
.drawId
, mGS
.vertsInput
));
603 EventHandlerFile::Handle(VSInfo(event
.data
.drawId
,
604 mShaderStats
[SHADER_VERTEX
].numInstExecuted
,
605 mShaderStats
[SHADER_VERTEX
].numSampleExecuted
,
606 mShaderStats
[SHADER_VERTEX
].numSampleLExecuted
,
607 mShaderStats
[SHADER_VERTEX
].numSampleBExecuted
,
608 mShaderStats
[SHADER_VERTEX
].numSampleCExecuted
,
609 mShaderStats
[SHADER_VERTEX
].numSampleCLZExecuted
,
610 mShaderStats
[SHADER_VERTEX
].numSampleCDExecuted
,
611 mShaderStats
[SHADER_VERTEX
].numGather4Executed
,
612 mShaderStats
[SHADER_VERTEX
].numGather4CExecuted
,
613 mShaderStats
[SHADER_VERTEX
].numGather4CPOExecuted
,
614 mShaderStats
[SHADER_VERTEX
].numGather4CPOCExecuted
,
615 mShaderStats
[SHADER_VERTEX
].numLodExecuted
));
616 EventHandlerFile::Handle(HSInfo(event
.data
.drawId
,
617 mShaderStats
[SHADER_HULL
].numInstExecuted
,
618 mShaderStats
[SHADER_HULL
].numSampleExecuted
,
619 mShaderStats
[SHADER_HULL
].numSampleLExecuted
,
620 mShaderStats
[SHADER_HULL
].numSampleBExecuted
,
621 mShaderStats
[SHADER_HULL
].numSampleCExecuted
,
622 mShaderStats
[SHADER_HULL
].numSampleCLZExecuted
,
623 mShaderStats
[SHADER_HULL
].numSampleCDExecuted
,
624 mShaderStats
[SHADER_HULL
].numGather4Executed
,
625 mShaderStats
[SHADER_HULL
].numGather4CExecuted
,
626 mShaderStats
[SHADER_HULL
].numGather4CPOExecuted
,
627 mShaderStats
[SHADER_HULL
].numGather4CPOCExecuted
,
628 mShaderStats
[SHADER_HULL
].numLodExecuted
));
629 EventHandlerFile::Handle(DSInfo(event
.data
.drawId
,
630 mShaderStats
[SHADER_DOMAIN
].numInstExecuted
,
631 mShaderStats
[SHADER_DOMAIN
].numSampleExecuted
,
632 mShaderStats
[SHADER_DOMAIN
].numSampleLExecuted
,
633 mShaderStats
[SHADER_DOMAIN
].numSampleBExecuted
,
634 mShaderStats
[SHADER_DOMAIN
].numSampleCExecuted
,
635 mShaderStats
[SHADER_DOMAIN
].numSampleCLZExecuted
,
636 mShaderStats
[SHADER_DOMAIN
].numSampleCDExecuted
,
637 mShaderStats
[SHADER_DOMAIN
].numGather4Executed
,
638 mShaderStats
[SHADER_DOMAIN
].numGather4CExecuted
,
639 mShaderStats
[SHADER_DOMAIN
].numGather4CPOExecuted
,
640 mShaderStats
[SHADER_DOMAIN
].numGather4CPOCExecuted
,
641 mShaderStats
[SHADER_DOMAIN
].numLodExecuted
));
642 EventHandlerFile::Handle(GSInfo(event
.data
.drawId
,
643 mShaderStats
[SHADER_GEOMETRY
].numInstExecuted
,
644 mShaderStats
[SHADER_GEOMETRY
].numSampleExecuted
,
645 mShaderStats
[SHADER_GEOMETRY
].numSampleLExecuted
,
646 mShaderStats
[SHADER_GEOMETRY
].numSampleBExecuted
,
647 mShaderStats
[SHADER_GEOMETRY
].numSampleCExecuted
,
648 mShaderStats
[SHADER_GEOMETRY
].numSampleCLZExecuted
,
649 mShaderStats
[SHADER_GEOMETRY
].numSampleCDExecuted
,
650 mShaderStats
[SHADER_GEOMETRY
].numGather4Executed
,
651 mShaderStats
[SHADER_GEOMETRY
].numGather4CExecuted
,
652 mShaderStats
[SHADER_GEOMETRY
].numGather4CPOExecuted
,
653 mShaderStats
[SHADER_GEOMETRY
].numGather4CPOCExecuted
,
654 mShaderStats
[SHADER_GEOMETRY
].numLodExecuted
));
656 mShaderStats
[SHADER_VERTEX
] = {};
657 mShaderStats
[SHADER_HULL
] = {};
658 mShaderStats
[SHADER_DOMAIN
] = {};
659 mShaderStats
[SHADER_GEOMETRY
] = {};
661 // Reset Internal Counters
667 virtual void Handle(const MemoryAccessEvent
& event
)
669 mMemoryStats
.TrackMemoryAccess(event
.data
.ptr
, mAddressMask
, event
.data
.isRead
, event
.data
.tsc
);
672 virtual void Handle(const MemoryStatsEndEvent
& event
)
674 MemoryStats::MemoryTrackerMap::iterator i
= mMemoryStats
.trackedMemory
.begin();
675 while (i
!= mMemoryStats
.trackedMemory
.end())
677 MemoryStatsEvent
mse(event
.data
.drawId
,
678 i
->first
.address
& mAddressMask
,
679 i
->second
.accessCountRead
,
680 i
->second
.accessCountWrite
,
683 EventHandlerFile::Handle(mse
);
686 mMemoryStats
.trackedMemory
.clear();
689 virtual void Handle(const GSPrimInfo
& event
)
691 mGS
.inputPrimCount
+= event
.data
.inputPrimCount
;
692 mGS
.primGeneratedCount
+= event
.data
.primGeneratedCount
;
693 mGS
.vertsInput
+= event
.data
.vertsInput
;
696 virtual void Handle(const TessPrimCount
& event
) { mTS
.inputPrims
+= event
.data
.primCount
; }
698 virtual void Handle(const RasterTileCount
& event
)
700 rastStats
.rasterTiles
+= event
.data
.rasterTiles
;
703 virtual void Handle(const CullInfoEvent
& event
)
705 mCullStats
.degeneratePrimCount
+= _mm_popcnt_u32(
706 event
.data
.validMask
^ (event
.data
.validMask
& ~event
.data
.degeneratePrimMask
));
707 mCullStats
.backfacePrimCount
+= _mm_popcnt_u32(
708 event
.data
.validMask
^ (event
.data
.validMask
& ~event
.data
.backfacePrimMask
));
711 virtual void Handle(const AlphaInfoEvent
& event
)
713 mAlphaStats
.alphaTestCount
+= event
.data
.alphaTestEnable
;
714 mAlphaStats
.alphaBlendCount
+= event
.data
.alphaBlendEnable
;
720 DepthStencilStats mDSSingleSample
= {};
721 DepthStencilStats mDSSampleRate
= {};
722 DepthStencilStats mDSPixelRate
= {};
723 DepthStencilStats mDSCombined
= {};
724 DepthStencilStats mDSNullPS
= {};
725 DepthStencilStats mDSOmZ
= {};
726 CStats mClipper
= {};
728 GSStateInfo mGS
= {};
729 RastStats rastStats
= {};
730 CullStats mCullStats
= {};
731 AlphaStats mAlphaStats
= {};
733 SWR_SHADER_STATS mShaderStats
[NUM_SHADER_TYPES
];
735 MemoryStats mMemoryStats
= {};
736 uint64_t mAddressMask
= 0;
740 static EventManager
* FromHandle(HANDLE hThreadContext
)
742 return reinterpret_cast<EventManager
*>(hThreadContext
);
745 // Construct an event manager and associate a handler with it.
746 HANDLE
CreateThreadContext(AR_THREAD type
)
748 // Can we assume single threaded here?
749 static std::atomic
<uint32_t> counter(0);
750 uint32_t id
= counter
.fetch_add(1);
752 EventManager
* pManager
= new EventManager();
756 EventHandlerFile
* pHandler
= nullptr;
758 if (type
== AR_THREAD::API
)
760 pHandler
= new EventHandlerApiStats(id
);
761 pManager
->Attach(pHandler
);
762 pHandler
->Handle(ThreadStartApiEvent());
766 pHandler
= new EventHandlerWorkerStats(id
);
767 pManager
->Attach(pHandler
);
768 pHandler
->Handle(ThreadStartWorkerEvent());
771 pHandler
->MarkHeader();
776 SWR_INVALID("Failed to register thread.");
780 void DestroyThreadContext(HANDLE hThreadContext
)
782 EventManager
* pManager
= FromHandle(hThreadContext
);
783 SWR_ASSERT(pManager
!= nullptr);
788 // Dispatch event for this thread.
789 void Dispatch(HANDLE hThreadContext
, const Event
& event
)
791 EventManager
* pManager
= FromHandle(hThreadContext
);
792 SWR_ASSERT(pManager
!= nullptr);
794 pManager
->Dispatch(event
);
797 // Flush for this thread.
798 void FlushDraw(HANDLE hThreadContext
, uint32_t drawId
)
800 EventManager
* pManager
= FromHandle(hThreadContext
);
801 SWR_ASSERT(pManager
!= nullptr);
803 pManager
->FlushDraw(drawId
);
805 } // namespace ArchRast