1 /****************************************************************************
2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Implementation for archrast.
27 ******************************************************************************/
31 #include "common/os.h"
32 #include "archrast/archrast.h"
33 #include "archrast/eventmanager.h"
34 #include "gen_ar_eventhandlerfile.hpp"
38 //////////////////////////////////////////////////////////////////////////
39 /// @brief struct that keeps track of depth and stencil event information
40 struct DepthStencilStats
42 uint32_t earlyZTestPassCount
= 0;
43 uint32_t earlyZTestFailCount
= 0;
44 uint32_t lateZTestPassCount
= 0;
45 uint32_t lateZTestFailCount
= 0;
46 uint32_t earlyStencilTestPassCount
= 0;
47 uint32_t earlyStencilTestFailCount
= 0;
48 uint32_t lateStencilTestPassCount
= 0;
49 uint32_t lateStencilTestFailCount
= 0;
54 uint32_t trivialRejectCount
;
55 uint32_t trivialAcceptCount
;
56 uint32_t mustClipCount
;
61 uint32_t inputPrims
= 0;
62 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
67 uint32_t inputPrimCount
;
68 uint32_t primGeneratedCount
;
74 uint32_t rasterTiles
= 0;
79 uint32_t degeneratePrimCount
= 0;
80 uint32_t backfacePrimCount
= 0;
85 uint32_t alphaTestCount
= 0;
86 uint32_t alphaBlendCount
= 0;
91 struct MemoryTrackerKey
97 struct MemoryTrackerData
99 uint32_t accessCountRead
;
100 uint32_t accessCountWrite
;
101 uint32_t totalSizeRead
;
102 uint32_t totalSizeWrite
;
107 struct AddressRangeComparator
109 bool operator()(MemoryTrackerKey a
, MemoryTrackerKey b
) const
111 return (a
.address
& a
.mask
) < (b
.address
& b
.mask
);
115 typedef std::map
<MemoryTrackerKey
, MemoryTrackerData
, AddressRangeComparator
> MemoryTrackerMap
;
116 MemoryTrackerMap trackedMemory
= {};
118 void TrackMemoryAccess(uint64_t address
, uint64_t addressMask
, uint8_t isRead
, uint64_t tsc
, uint32_t size
)
120 MemoryTrackerKey key
;
121 key
.address
= address
;
122 key
.mask
= addressMask
;
124 MemoryTrackerMap::iterator i
= trackedMemory
.lower_bound(key
);
125 if (i
!= trackedMemory
.end() && !(trackedMemory
.key_comp()(key
, i
->first
)))
130 i
->second
.accessCountRead
++;
131 i
->second
.totalSizeRead
+= size
;
135 i
->second
.accessCountWrite
++;
136 i
->second
.totalSizeWrite
+= size
;
138 i
->second
.tscMax
= tsc
;
143 MemoryTrackerData data
;
146 data
.accessCountRead
= 1;
147 data
.totalSizeRead
= size
;
148 data
.accessCountWrite
= 0;
149 data
.totalSizeWrite
= 0;
153 data
.accessCountRead
= 0;
154 data
.totalSizeRead
= 0;
155 data
.accessCountWrite
= 1;
156 data
.totalSizeWrite
= size
;
160 trackedMemory
.insert(i
, MemoryTrackerMap::value_type(key
, data
));
165 //////////////////////////////////////////////////////////////////////////
166 /// @brief Event handler that handles API thread events. This is shared
167 /// between the API and its caller (e.g. driver shim) but typically
168 /// there is only a single API thread per context. So you can save
169 /// information in the class to be used for other events.
170 class EventHandlerApiStats
: public EventHandlerFile
173 EventHandlerApiStats(uint32_t id
) : EventHandlerFile(id
)
176 // Attempt to copy the events.proto file to the ArchRast output dir. It's common for
177 // tools to place the events.proto file in the DEBUG_OUTPUT_DIR when launching AR. If it
178 // exists, this will attempt to copy it the first time we get here to package it with
179 // the stats. Otherwise, the user would need to specify the events.proto location when
180 // parsing the stats in post.
181 std::stringstream eventsProtoSrcFilename
, eventsProtoDstFilename
;
182 eventsProtoSrcFilename
<< KNOB_DEBUG_OUTPUT_DIR
<< "\\events.proto" << std::ends
;
183 eventsProtoDstFilename
<< mOutputDir
.substr(0, mOutputDir
.size() - 1)
184 << "\\events.proto" << std::ends
;
186 // If event.proto already exists, we're done; else do the copy
187 struct stat buf
; // Use a Posix stat for file existence check
188 if (!stat(eventsProtoDstFilename
.str().c_str(), &buf
) == 0)
190 // Now check to make sure the events.proto source exists
191 if (stat(eventsProtoSrcFilename
.str().c_str(), &buf
) == 0)
193 std::ifstream srcFile
;
194 srcFile
.open(eventsProtoSrcFilename
.str().c_str(), std::ios::binary
);
195 if (srcFile
.is_open())
197 // Just do a binary buffer copy
198 std::ofstream dstFile
;
199 dstFile
.open(eventsProtoDstFilename
.str().c_str(), std::ios::binary
);
200 dstFile
<< srcFile
.rdbuf();
209 virtual void Handle(const DrawInstancedEvent
& event
)
211 DrawInfoEvent
e(event
.data
.drawId
,
214 event
.data
.numVertices
,
217 event
.data
.startVertex
,
218 event
.data
.numInstances
,
219 event
.data
.startInstance
,
223 event
.data
.soTopology
,
226 EventHandlerFile::Handle(e
);
229 virtual void Handle(const DrawIndexedInstancedEvent
& event
)
231 DrawInfoEvent
e(event
.data
.drawId
,
232 ArchRast::IndexedInstanced
,
235 event
.data
.numIndices
,
236 event
.data
.indexOffset
,
237 event
.data
.baseVertex
,
238 event
.data
.numInstances
,
239 event
.data
.startInstance
,
243 event
.data
.soTopology
,
246 EventHandlerFile::Handle(e
);
250 //////////////////////////////////////////////////////////////////////////
251 /// @brief Event handler that handles worker thread events. There is one
252 /// event handler per thread. The python script will need to sum
253 /// up counters across all of the threads.
254 class EventHandlerWorkerStats
: public EventHandlerFile
257 EventHandlerWorkerStats(uint32_t id
) : EventHandlerFile(id
), mNeedFlush(false)
259 memset(mShaderStats
, 0, sizeof(mShaderStats
));
261 // compute address mask for memory tracking
263 uint64_t addressRangeBytes
= 64;
264 while (addressRangeBytes
> 0)
266 mAddressMask
= (mAddressMask
<< 1) | 1;
267 addressRangeBytes
= addressRangeBytes
>> 1;
269 mMemGranularity
= mAddressMask
+ 1;
270 mAddressMask
= ~mAddressMask
;
273 virtual void Handle(const EarlyDepthStencilInfoSingleSample
& event
)
275 // earlyZ test compute
276 mDSSingleSample
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
277 mDSSingleSample
.earlyZTestFailCount
+=
278 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
280 // earlyStencil test compute
281 mDSSingleSample
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
282 mDSSingleSample
.earlyStencilTestFailCount
+=
283 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
285 // earlyZ test single and multi sample
286 mDSCombined
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
287 mDSCombined
.earlyZTestFailCount
+=
288 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
290 // earlyStencil test single and multi sample
291 mDSCombined
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
292 mDSCombined
.earlyStencilTestFailCount
+=
293 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
298 virtual void Handle(const EarlyDepthStencilInfoSampleRate
& event
)
300 // earlyZ test compute
301 mDSSampleRate
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
302 mDSSampleRate
.earlyZTestFailCount
+=
303 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
305 // earlyStencil test compute
306 mDSSampleRate
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
307 mDSSampleRate
.earlyStencilTestFailCount
+=
308 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
310 // earlyZ test single and multi sample
311 mDSCombined
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
312 mDSCombined
.earlyZTestFailCount
+=
313 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
315 // earlyStencil test single and multi sample
316 mDSCombined
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
317 mDSCombined
.earlyStencilTestFailCount
+=
318 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
323 virtual void Handle(const EarlyDepthStencilInfoNullPS
& event
)
325 // earlyZ test compute
326 mDSNullPS
.earlyZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
327 mDSNullPS
.earlyZTestFailCount
+=
328 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
330 // earlyStencil test compute
331 mDSNullPS
.earlyStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
332 mDSNullPS
.earlyStencilTestFailCount
+=
333 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
337 virtual void Handle(const LateDepthStencilInfoSingleSample
& event
)
339 // lateZ test compute
340 mDSSingleSample
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
341 mDSSingleSample
.lateZTestFailCount
+=
342 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
344 // lateStencil test compute
345 mDSSingleSample
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
346 mDSSingleSample
.lateStencilTestFailCount
+=
347 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
349 // lateZ test single and multi sample
350 mDSCombined
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
351 mDSCombined
.lateZTestFailCount
+=
352 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
354 // lateStencil test single and multi sample
355 mDSCombined
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
356 mDSCombined
.lateStencilTestFailCount
+=
357 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
362 virtual void Handle(const LateDepthStencilInfoSampleRate
& event
)
364 // lateZ test compute
365 mDSSampleRate
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
366 mDSSampleRate
.lateZTestFailCount
+=
367 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
369 // lateStencil test compute
370 mDSSampleRate
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
371 mDSSampleRate
.lateStencilTestFailCount
+=
372 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
374 // lateZ test single and multi sample
375 mDSCombined
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
376 mDSCombined
.lateZTestFailCount
+=
377 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
379 // lateStencil test single and multi sample
380 mDSCombined
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
381 mDSCombined
.lateStencilTestFailCount
+=
382 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
387 virtual void Handle(const LateDepthStencilInfoNullPS
& event
)
389 // lateZ test compute
390 mDSNullPS
.lateZTestPassCount
+= _mm_popcnt_u32(event
.data
.depthPassMask
);
391 mDSNullPS
.lateZTestFailCount
+=
392 _mm_popcnt_u32((!event
.data
.depthPassMask
) & event
.data
.coverageMask
);
394 // lateStencil test compute
395 mDSNullPS
.lateStencilTestPassCount
+= _mm_popcnt_u32(event
.data
.stencilPassMask
);
396 mDSNullPS
.lateStencilTestFailCount
+=
397 _mm_popcnt_u32((!event
.data
.stencilPassMask
) & event
.data
.coverageMask
);
401 virtual void Handle(const EarlyDepthInfoPixelRate
& event
)
403 // earlyZ test compute
404 mDSPixelRate
.earlyZTestPassCount
+= event
.data
.depthPassCount
;
405 mDSPixelRate
.earlyZTestFailCount
+=
406 (_mm_popcnt_u32(event
.data
.activeLanes
) - event
.data
.depthPassCount
);
411 virtual void Handle(const LateDepthInfoPixelRate
& event
)
413 // lateZ test compute
414 mDSPixelRate
.lateZTestPassCount
+= event
.data
.depthPassCount
;
415 mDSPixelRate
.lateZTestFailCount
+=
416 (_mm_popcnt_u32(event
.data
.activeLanes
) - event
.data
.depthPassCount
);
421 virtual void Handle(const ClipInfoEvent
& event
)
423 mClipper
.mustClipCount
+= _mm_popcnt_u32(event
.data
.clipMask
);
424 mClipper
.trivialRejectCount
+=
425 event
.data
.numInvocations
- _mm_popcnt_u32(event
.data
.validMask
);
426 mClipper
.trivialAcceptCount
+=
427 _mm_popcnt_u32(event
.data
.validMask
& ~event
.data
.clipMask
);
430 void UpdateStats(SWR_SHADER_STATS
* pStatTotals
, const SWR_SHADER_STATS
* pStatUpdate
)
432 pStatTotals
->numInstExecuted
+= pStatUpdate
->numInstExecuted
;
433 pStatTotals
->numSampleExecuted
+= pStatUpdate
->numSampleExecuted
;
434 pStatTotals
->numSampleLExecuted
+= pStatUpdate
->numSampleLExecuted
;
435 pStatTotals
->numSampleBExecuted
+= pStatUpdate
->numSampleBExecuted
;
436 pStatTotals
->numSampleCExecuted
+= pStatUpdate
->numSampleCExecuted
;
437 pStatTotals
->numSampleCLZExecuted
+= pStatUpdate
->numSampleCLZExecuted
;
438 pStatTotals
->numSampleCDExecuted
+= pStatUpdate
->numSampleCDExecuted
;
439 pStatTotals
->numGather4Executed
+= pStatUpdate
->numGather4Executed
;
440 pStatTotals
->numGather4CExecuted
+= pStatUpdate
->numGather4CExecuted
;
441 pStatTotals
->numGather4CPOExecuted
+= pStatUpdate
->numGather4CPOExecuted
;
442 pStatTotals
->numGather4CPOCExecuted
+= pStatUpdate
->numGather4CPOCExecuted
;
443 pStatTotals
->numLodExecuted
+= pStatUpdate
->numLodExecuted
;
446 virtual void Handle(const VSStats
& event
)
448 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
449 UpdateStats(&mShaderStats
[SHADER_VERTEX
], pStats
);
452 virtual void Handle(const GSStats
& event
)
454 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
455 UpdateStats(&mShaderStats
[SHADER_GEOMETRY
], pStats
);
458 virtual void Handle(const DSStats
& event
)
460 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
461 UpdateStats(&mShaderStats
[SHADER_DOMAIN
], pStats
);
464 virtual void Handle(const HSStats
& event
)
466 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
467 UpdateStats(&mShaderStats
[SHADER_HULL
], pStats
);
470 virtual void Handle(const PSStats
& event
)
472 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
473 UpdateStats(&mShaderStats
[SHADER_PIXEL
], pStats
);
477 virtual void Handle(const CSStats
& event
)
479 SWR_SHADER_STATS
* pStats
= (SWR_SHADER_STATS
*)event
.data
.hStats
;
480 UpdateStats(&mShaderStats
[SHADER_COMPUTE
], pStats
);
484 // Flush cached events for this draw
485 virtual void FlushDraw(uint32_t drawId
)
487 if (mNeedFlush
== false)
490 EventHandlerFile::Handle(PSInfo(drawId
,
491 mShaderStats
[SHADER_PIXEL
].numInstExecuted
,
492 mShaderStats
[SHADER_PIXEL
].numSampleExecuted
,
493 mShaderStats
[SHADER_PIXEL
].numSampleLExecuted
,
494 mShaderStats
[SHADER_PIXEL
].numSampleBExecuted
,
495 mShaderStats
[SHADER_PIXEL
].numSampleCExecuted
,
496 mShaderStats
[SHADER_PIXEL
].numSampleCLZExecuted
,
497 mShaderStats
[SHADER_PIXEL
].numSampleCDExecuted
,
498 mShaderStats
[SHADER_PIXEL
].numGather4Executed
,
499 mShaderStats
[SHADER_PIXEL
].numGather4CExecuted
,
500 mShaderStats
[SHADER_PIXEL
].numGather4CPOExecuted
,
501 mShaderStats
[SHADER_PIXEL
].numGather4CPOCExecuted
,
502 mShaderStats
[SHADER_PIXEL
].numLodExecuted
));
503 EventHandlerFile::Handle(CSInfo(drawId
,
504 mShaderStats
[SHADER_COMPUTE
].numInstExecuted
,
505 mShaderStats
[SHADER_COMPUTE
].numSampleExecuted
,
506 mShaderStats
[SHADER_COMPUTE
].numSampleLExecuted
,
507 mShaderStats
[SHADER_COMPUTE
].numSampleBExecuted
,
508 mShaderStats
[SHADER_COMPUTE
].numSampleCExecuted
,
509 mShaderStats
[SHADER_COMPUTE
].numSampleCLZExecuted
,
510 mShaderStats
[SHADER_COMPUTE
].numSampleCDExecuted
,
511 mShaderStats
[SHADER_COMPUTE
].numGather4Executed
,
512 mShaderStats
[SHADER_COMPUTE
].numGather4CExecuted
,
513 mShaderStats
[SHADER_COMPUTE
].numGather4CPOExecuted
,
514 mShaderStats
[SHADER_COMPUTE
].numGather4CPOCExecuted
,
515 mShaderStats
[SHADER_COMPUTE
].numLodExecuted
));
518 EventHandlerFile::Handle(EarlyZSingleSample(
519 drawId
, mDSSingleSample
.earlyZTestPassCount
, mDSSingleSample
.earlyZTestFailCount
));
520 EventHandlerFile::Handle(LateZSingleSample(
521 drawId
, mDSSingleSample
.lateZTestPassCount
, mDSSingleSample
.lateZTestFailCount
));
522 EventHandlerFile::Handle(
523 EarlyStencilSingleSample(drawId
,
524 mDSSingleSample
.earlyStencilTestPassCount
,
525 mDSSingleSample
.earlyStencilTestFailCount
));
526 EventHandlerFile::Handle(
527 LateStencilSingleSample(drawId
,
528 mDSSingleSample
.lateStencilTestPassCount
,
529 mDSSingleSample
.lateStencilTestFailCount
));
532 EventHandlerFile::Handle(EarlyZSampleRate(
533 drawId
, mDSSampleRate
.earlyZTestPassCount
, mDSSampleRate
.earlyZTestFailCount
));
534 EventHandlerFile::Handle(LateZSampleRate(
535 drawId
, mDSSampleRate
.lateZTestPassCount
, mDSSampleRate
.lateZTestFailCount
));
536 EventHandlerFile::Handle(
537 EarlyStencilSampleRate(drawId
,
538 mDSSampleRate
.earlyStencilTestPassCount
,
539 mDSSampleRate
.earlyStencilTestFailCount
));
540 EventHandlerFile::Handle(LateStencilSampleRate(drawId
,
541 mDSSampleRate
.lateStencilTestPassCount
,
542 mDSSampleRate
.lateStencilTestFailCount
));
545 EventHandlerFile::Handle(
546 EarlyZ(drawId
, mDSCombined
.earlyZTestPassCount
, mDSCombined
.earlyZTestFailCount
));
547 EventHandlerFile::Handle(
548 LateZ(drawId
, mDSCombined
.lateZTestPassCount
, mDSCombined
.lateZTestFailCount
));
549 EventHandlerFile::Handle(EarlyStencil(drawId
,
550 mDSCombined
.earlyStencilTestPassCount
,
551 mDSCombined
.earlyStencilTestFailCount
));
552 EventHandlerFile::Handle(LateStencil(drawId
,
553 mDSCombined
.lateStencilTestPassCount
,
554 mDSCombined
.lateStencilTestFailCount
));
557 EventHandlerFile::Handle(EarlyZPixelRate(
558 drawId
, mDSPixelRate
.earlyZTestPassCount
, mDSPixelRate
.earlyZTestFailCount
));
559 EventHandlerFile::Handle(LateZPixelRate(
560 drawId
, mDSPixelRate
.lateZTestPassCount
, mDSPixelRate
.lateZTestFailCount
));
564 EventHandlerFile::Handle(
565 EarlyZNullPS(drawId
, mDSNullPS
.earlyZTestPassCount
, mDSNullPS
.earlyZTestFailCount
));
566 EventHandlerFile::Handle(EarlyStencilNullPS(
567 drawId
, mDSNullPS
.earlyStencilTestPassCount
, mDSNullPS
.earlyStencilTestFailCount
));
569 // Rasterized Subspans
570 EventHandlerFile::Handle(RasterTiles(drawId
, rastStats
.rasterTiles
));
573 EventHandlerFile::Handle(
574 AlphaEvent(drawId
, mAlphaStats
.alphaTestCount
, mAlphaStats
.alphaBlendCount
));
577 EventHandlerFile::Handle(
578 CullEvent(drawId
, mCullStats
.backfacePrimCount
, mCullStats
.degeneratePrimCount
));
580 mDSSingleSample
= {};
590 mShaderStats
[SHADER_PIXEL
] = {};
591 mShaderStats
[SHADER_COMPUTE
] = {};
596 virtual void Handle(const FrontendDrawEndEvent
& event
)
599 EventHandlerFile::Handle(ClipperEvent(event
.data
.drawId
,
600 mClipper
.trivialRejectCount
,
601 mClipper
.trivialAcceptCount
,
602 mClipper
.mustClipCount
));
605 EventHandlerFile::Handle(TessPrims(event
.data
.drawId
, mTS
.inputPrims
));
608 EventHandlerFile::Handle(GSInputPrims(event
.data
.drawId
, mGS
.inputPrimCount
));
609 EventHandlerFile::Handle(GSPrimsGen(event
.data
.drawId
, mGS
.primGeneratedCount
));
610 EventHandlerFile::Handle(GSVertsInput(event
.data
.drawId
, mGS
.vertsInput
));
612 EventHandlerFile::Handle(VSInfo(event
.data
.drawId
,
613 mShaderStats
[SHADER_VERTEX
].numInstExecuted
,
614 mShaderStats
[SHADER_VERTEX
].numSampleExecuted
,
615 mShaderStats
[SHADER_VERTEX
].numSampleLExecuted
,
616 mShaderStats
[SHADER_VERTEX
].numSampleBExecuted
,
617 mShaderStats
[SHADER_VERTEX
].numSampleCExecuted
,
618 mShaderStats
[SHADER_VERTEX
].numSampleCLZExecuted
,
619 mShaderStats
[SHADER_VERTEX
].numSampleCDExecuted
,
620 mShaderStats
[SHADER_VERTEX
].numGather4Executed
,
621 mShaderStats
[SHADER_VERTEX
].numGather4CExecuted
,
622 mShaderStats
[SHADER_VERTEX
].numGather4CPOExecuted
,
623 mShaderStats
[SHADER_VERTEX
].numGather4CPOCExecuted
,
624 mShaderStats
[SHADER_VERTEX
].numLodExecuted
));
625 EventHandlerFile::Handle(HSInfo(event
.data
.drawId
,
626 mShaderStats
[SHADER_HULL
].numInstExecuted
,
627 mShaderStats
[SHADER_HULL
].numSampleExecuted
,
628 mShaderStats
[SHADER_HULL
].numSampleLExecuted
,
629 mShaderStats
[SHADER_HULL
].numSampleBExecuted
,
630 mShaderStats
[SHADER_HULL
].numSampleCExecuted
,
631 mShaderStats
[SHADER_HULL
].numSampleCLZExecuted
,
632 mShaderStats
[SHADER_HULL
].numSampleCDExecuted
,
633 mShaderStats
[SHADER_HULL
].numGather4Executed
,
634 mShaderStats
[SHADER_HULL
].numGather4CExecuted
,
635 mShaderStats
[SHADER_HULL
].numGather4CPOExecuted
,
636 mShaderStats
[SHADER_HULL
].numGather4CPOCExecuted
,
637 mShaderStats
[SHADER_HULL
].numLodExecuted
));
638 EventHandlerFile::Handle(DSInfo(event
.data
.drawId
,
639 mShaderStats
[SHADER_DOMAIN
].numInstExecuted
,
640 mShaderStats
[SHADER_DOMAIN
].numSampleExecuted
,
641 mShaderStats
[SHADER_DOMAIN
].numSampleLExecuted
,
642 mShaderStats
[SHADER_DOMAIN
].numSampleBExecuted
,
643 mShaderStats
[SHADER_DOMAIN
].numSampleCExecuted
,
644 mShaderStats
[SHADER_DOMAIN
].numSampleCLZExecuted
,
645 mShaderStats
[SHADER_DOMAIN
].numSampleCDExecuted
,
646 mShaderStats
[SHADER_DOMAIN
].numGather4Executed
,
647 mShaderStats
[SHADER_DOMAIN
].numGather4CExecuted
,
648 mShaderStats
[SHADER_DOMAIN
].numGather4CPOExecuted
,
649 mShaderStats
[SHADER_DOMAIN
].numGather4CPOCExecuted
,
650 mShaderStats
[SHADER_DOMAIN
].numLodExecuted
));
651 EventHandlerFile::Handle(GSInfo(event
.data
.drawId
,
652 mShaderStats
[SHADER_GEOMETRY
].numInstExecuted
,
653 mShaderStats
[SHADER_GEOMETRY
].numSampleExecuted
,
654 mShaderStats
[SHADER_GEOMETRY
].numSampleLExecuted
,
655 mShaderStats
[SHADER_GEOMETRY
].numSampleBExecuted
,
656 mShaderStats
[SHADER_GEOMETRY
].numSampleCExecuted
,
657 mShaderStats
[SHADER_GEOMETRY
].numSampleCLZExecuted
,
658 mShaderStats
[SHADER_GEOMETRY
].numSampleCDExecuted
,
659 mShaderStats
[SHADER_GEOMETRY
].numGather4Executed
,
660 mShaderStats
[SHADER_GEOMETRY
].numGather4CExecuted
,
661 mShaderStats
[SHADER_GEOMETRY
].numGather4CPOExecuted
,
662 mShaderStats
[SHADER_GEOMETRY
].numGather4CPOCExecuted
,
663 mShaderStats
[SHADER_GEOMETRY
].numLodExecuted
));
665 mShaderStats
[SHADER_VERTEX
] = {};
666 mShaderStats
[SHADER_HULL
] = {};
667 mShaderStats
[SHADER_DOMAIN
] = {};
668 mShaderStats
[SHADER_GEOMETRY
] = {};
670 // Reset Internal Counters
676 virtual void Handle(const MemoryAccessEvent
& event
)
678 uint64_t trackAddr
= event
.data
.ptr
;
679 uint64_t nextAddr
= (trackAddr
& mAddressMask
);
680 uint32_t sizeTracked
= 0;
682 while (sizeTracked
< event
.data
.size
)
684 nextAddr
+= mMemGranularity
;
685 uint32_t size
= nextAddr
- trackAddr
;
686 size
= std::min(event
.data
.size
, size
);
687 mMemoryStats
.TrackMemoryAccess(trackAddr
, mAddressMask
, event
.data
.isRead
, event
.data
.tsc
, size
);
689 trackAddr
= nextAddr
;
693 virtual void Handle(const MemoryStatsEndEvent
& event
)
695 MemoryStats::MemoryTrackerMap::iterator i
= mMemoryStats
.trackedMemory
.begin();
696 while (i
!= mMemoryStats
.trackedMemory
.end())
698 MemoryStatsEvent
mse(event
.data
.drawId
,
699 i
->first
.address
& mAddressMask
,
700 i
->second
.accessCountRead
,
701 i
->second
.accessCountWrite
,
702 i
->second
.totalSizeRead
,
703 i
->second
.totalSizeWrite
,
706 EventHandlerFile::Handle(mse
);
709 mMemoryStats
.trackedMemory
.clear();
712 virtual void Handle(const GSPrimInfo
& event
)
714 mGS
.inputPrimCount
+= event
.data
.inputPrimCount
;
715 mGS
.primGeneratedCount
+= event
.data
.primGeneratedCount
;
716 mGS
.vertsInput
+= event
.data
.vertsInput
;
719 virtual void Handle(const TessPrimCount
& event
) { mTS
.inputPrims
+= event
.data
.primCount
; }
721 virtual void Handle(const RasterTileCount
& event
)
723 rastStats
.rasterTiles
+= event
.data
.rasterTiles
;
726 virtual void Handle(const CullInfoEvent
& event
)
728 mCullStats
.degeneratePrimCount
+= _mm_popcnt_u32(
729 event
.data
.validMask
^ (event
.data
.validMask
& ~event
.data
.degeneratePrimMask
));
730 mCullStats
.backfacePrimCount
+= _mm_popcnt_u32(
731 event
.data
.validMask
^ (event
.data
.validMask
& ~event
.data
.backfacePrimMask
));
734 virtual void Handle(const AlphaInfoEvent
& event
)
736 mAlphaStats
.alphaTestCount
+= event
.data
.alphaTestEnable
;
737 mAlphaStats
.alphaBlendCount
+= event
.data
.alphaBlendEnable
;
743 DepthStencilStats mDSSingleSample
= {};
744 DepthStencilStats mDSSampleRate
= {};
745 DepthStencilStats mDSPixelRate
= {};
746 DepthStencilStats mDSCombined
= {};
747 DepthStencilStats mDSNullPS
= {};
748 DepthStencilStats mDSOmZ
= {};
749 CStats mClipper
= {};
751 GSStateInfo mGS
= {};
752 RastStats rastStats
= {};
753 CullStats mCullStats
= {};
754 AlphaStats mAlphaStats
= {};
756 SWR_SHADER_STATS mShaderStats
[NUM_SHADER_TYPES
];
758 MemoryStats mMemoryStats
= {};
759 uint64_t mAddressMask
= 0;
760 uint64_t mMemGranularity
= 0;
764 static EventManager
* FromHandle(HANDLE hThreadContext
)
766 return reinterpret_cast<EventManager
*>(hThreadContext
);
769 // Construct an event manager and associate a handler with it.
770 HANDLE
CreateThreadContext(AR_THREAD type
)
772 // Can we assume single threaded here?
773 static std::atomic
<uint32_t> counter(0);
774 uint32_t id
= counter
.fetch_add(1);
776 EventManager
* pManager
= new EventManager();
780 EventHandlerFile
* pHandler
= nullptr;
782 if (type
== AR_THREAD::API
)
784 pHandler
= new EventHandlerApiStats(id
);
785 pManager
->Attach(pHandler
);
786 pHandler
->Handle(ThreadStartApiEvent());
790 pHandler
= new EventHandlerWorkerStats(id
);
791 pManager
->Attach(pHandler
);
792 pHandler
->Handle(ThreadStartWorkerEvent());
795 pHandler
->MarkHeader();
800 SWR_INVALID("Failed to register thread.");
804 void DestroyThreadContext(HANDLE hThreadContext
)
806 EventManager
* pManager
= FromHandle(hThreadContext
);
807 SWR_ASSERT(pManager
!= nullptr);
812 // Dispatch event for this thread.
813 void Dispatch(HANDLE hThreadContext
, const Event
& event
)
815 EventManager
* pManager
= FromHandle(hThreadContext
);
816 SWR_ASSERT(pManager
!= nullptr);
818 pManager
->Dispatch(event
);
821 // Flush for this thread.
822 void FlushDraw(HANDLE hThreadContext
, uint32_t drawId
)
824 EventManager
* pManager
= FromHandle(hThreadContext
);
825 SWR_ASSERT(pManager
!= nullptr);
827 pManager
->FlushDraw(drawId
);
829 } // namespace ArchRast