swr/rasterizer: enable size accumulation in mem stats
[mesa.git] / src / gallium / drivers / swr / rasterizer / archrast / archrast.cpp
1 /****************************************************************************
2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file archrast.cpp
24 *
25 * @brief Implementation for archrast.
26 *
27 ******************************************************************************/
28 #include <atomic>
29 #include <map>
30
31 #include "common/os.h"
32 #include "archrast/archrast.h"
33 #include "archrast/eventmanager.h"
34 #include "gen_ar_eventhandlerfile.hpp"
35
36 namespace ArchRast
37 {
38 //////////////////////////////////////////////////////////////////////////
39 /// @brief struct that keeps track of depth and stencil event information
40 struct DepthStencilStats
41 {
42 uint32_t earlyZTestPassCount = 0;
43 uint32_t earlyZTestFailCount = 0;
44 uint32_t lateZTestPassCount = 0;
45 uint32_t lateZTestFailCount = 0;
46 uint32_t earlyStencilTestPassCount = 0;
47 uint32_t earlyStencilTestFailCount = 0;
48 uint32_t lateStencilTestPassCount = 0;
49 uint32_t lateStencilTestFailCount = 0;
50 };
51
52 struct CStats
53 {
54 uint32_t trivialRejectCount;
55 uint32_t trivialAcceptCount;
56 uint32_t mustClipCount;
57 };
58
59 struct TEStats
60 {
61 uint32_t inputPrims = 0;
62 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
63 };
64
65 struct GSStateInfo
66 {
67 uint32_t inputPrimCount;
68 uint32_t primGeneratedCount;
69 uint32_t vertsInput;
70 };
71
72 struct RastStats
73 {
74 uint32_t rasterTiles = 0;
75 };
76
77 struct CullStats
78 {
79 uint32_t degeneratePrimCount = 0;
80 uint32_t backfacePrimCount = 0;
81 };
82
83 struct AlphaStats
84 {
85 uint32_t alphaTestCount = 0;
86 uint32_t alphaBlendCount = 0;
87 };
88
89 struct MemoryStats
90 {
91 struct MemoryTrackerKey
92 {
93 uint64_t address;
94 uint64_t mask;
95 };
96
97 struct MemoryTrackerData
98 {
99 uint32_t accessCountRead;
100 uint32_t accessCountWrite;
101 uint32_t totalSizeRead;
102 uint32_t totalSizeWrite;
103 uint64_t tscMin;
104 uint64_t tscMax;
105 };
106
107 struct AddressRangeComparator
108 {
109 bool operator()(MemoryTrackerKey a, MemoryTrackerKey b) const
110 {
111 return (a.address & a.mask) < (b.address & b.mask);
112 }
113 };
114
115 typedef std::map<MemoryTrackerKey, MemoryTrackerData, AddressRangeComparator> MemoryTrackerMap;
116 MemoryTrackerMap trackedMemory = {};
117
118 void TrackMemoryAccess(uint64_t address, uint64_t addressMask, uint8_t isRead, uint64_t tsc, uint32_t size)
119 {
120 MemoryTrackerKey key;
121 key.address = address;
122 key.mask = addressMask;
123
124 MemoryTrackerMap::iterator i = trackedMemory.lower_bound(key);
125 if (i != trackedMemory.end() && !(trackedMemory.key_comp()(key, i->first)))
126 {
127 // already in map
128 if (isRead)
129 {
130 i->second.accessCountRead++;
131 i->second.totalSizeRead += size;
132 }
133 else
134 {
135 i->second.accessCountWrite++;
136 i->second.totalSizeWrite += size;
137 }
138 i->second.tscMax = tsc;
139 }
140 else
141 {
142 // new entry
143 MemoryTrackerData data;
144 if (isRead)
145 {
146 data.accessCountRead = 1;
147 data.totalSizeRead = size;
148 data.accessCountWrite = 0;
149 data.totalSizeWrite = 0;
150 }
151 else
152 {
153 data.accessCountRead = 0;
154 data.totalSizeRead = 0;
155 data.accessCountWrite = 1;
156 data.totalSizeWrite = size;
157 }
158 data.tscMin = tsc;
159 data.tscMax = tsc;
160 trackedMemory.insert(i, MemoryTrackerMap::value_type(key, data));
161 }
162 }
163 };
164
165 //////////////////////////////////////////////////////////////////////////
166 /// @brief Event handler that handles API thread events. This is shared
167 /// between the API and its caller (e.g. driver shim) but typically
168 /// there is only a single API thread per context. So you can save
169 /// information in the class to be used for other events.
170 class EventHandlerApiStats : public EventHandlerFile
171 {
172 public:
173 EventHandlerApiStats(uint32_t id) : EventHandlerFile(id)
174 {
175 #if defined(_WIN32)
176 // Attempt to copy the events.proto file to the ArchRast output dir. It's common for
177 // tools to place the events.proto file in the DEBUG_OUTPUT_DIR when launching AR. If it
178 // exists, this will attempt to copy it the first time we get here to package it with
179 // the stats. Otherwise, the user would need to specify the events.proto location when
180 // parsing the stats in post.
181 std::stringstream eventsProtoSrcFilename, eventsProtoDstFilename;
182 eventsProtoSrcFilename << KNOB_DEBUG_OUTPUT_DIR << "\\events.proto" << std::ends;
183 eventsProtoDstFilename << mOutputDir.substr(0, mOutputDir.size() - 1)
184 << "\\events.proto" << std::ends;
185
186 // If event.proto already exists, we're done; else do the copy
187 struct stat buf; // Use a Posix stat for file existence check
188 if (!stat(eventsProtoDstFilename.str().c_str(), &buf) == 0)
189 {
190 // Now check to make sure the events.proto source exists
191 if (stat(eventsProtoSrcFilename.str().c_str(), &buf) == 0)
192 {
193 std::ifstream srcFile;
194 srcFile.open(eventsProtoSrcFilename.str().c_str(), std::ios::binary);
195 if (srcFile.is_open())
196 {
197 // Just do a binary buffer copy
198 std::ofstream dstFile;
199 dstFile.open(eventsProtoDstFilename.str().c_str(), std::ios::binary);
200 dstFile << srcFile.rdbuf();
201 dstFile.close();
202 }
203 srcFile.close();
204 }
205 }
206 #endif
207 }
208
209 virtual void Handle(const DrawInstancedEvent& event)
210 {
211 DrawInfoEvent e(event.data.drawId,
212 ArchRast::Instanced,
213 event.data.topology,
214 event.data.numVertices,
215 0,
216 0,
217 event.data.startVertex,
218 event.data.numInstances,
219 event.data.startInstance,
220 event.data.tsEnable,
221 event.data.gsEnable,
222 event.data.soEnable,
223 event.data.soTopology,
224 event.data.splitId);
225
226 EventHandlerFile::Handle(e);
227 }
228
229 virtual void Handle(const DrawIndexedInstancedEvent& event)
230 {
231 DrawInfoEvent e(event.data.drawId,
232 ArchRast::IndexedInstanced,
233 event.data.topology,
234 0,
235 event.data.numIndices,
236 event.data.indexOffset,
237 event.data.baseVertex,
238 event.data.numInstances,
239 event.data.startInstance,
240 event.data.tsEnable,
241 event.data.gsEnable,
242 event.data.soEnable,
243 event.data.soTopology,
244 event.data.splitId);
245
246 EventHandlerFile::Handle(e);
247 }
248 };
249
250 //////////////////////////////////////////////////////////////////////////
251 /// @brief Event handler that handles worker thread events. There is one
252 /// event handler per thread. The python script will need to sum
253 /// up counters across all of the threads.
254 class EventHandlerWorkerStats : public EventHandlerFile
255 {
256 public:
257 EventHandlerWorkerStats(uint32_t id) : EventHandlerFile(id), mNeedFlush(false)
258 {
259 memset(mShaderStats, 0, sizeof(mShaderStats));
260
261 // compute address mask for memory tracking
262 mAddressMask = 0;
263 uint64_t addressRangeBytes = 64;
264 while (addressRangeBytes > 0)
265 {
266 mAddressMask = (mAddressMask << 1) | 1;
267 addressRangeBytes = addressRangeBytes >> 1;
268 }
269 mMemGranularity = mAddressMask + 1;
270 mAddressMask = ~mAddressMask;
271 }
272
273 virtual void Handle(const EarlyDepthStencilInfoSingleSample& event)
274 {
275 // earlyZ test compute
276 mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
277 mDSSingleSample.earlyZTestFailCount +=
278 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
279
280 // earlyStencil test compute
281 mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
282 mDSSingleSample.earlyStencilTestFailCount +=
283 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
284
285 // earlyZ test single and multi sample
286 mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
287 mDSCombined.earlyZTestFailCount +=
288 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
289
290 // earlyStencil test single and multi sample
291 mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
292 mDSCombined.earlyStencilTestFailCount +=
293 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
294
295 mNeedFlush = true;
296 }
297
298 virtual void Handle(const EarlyDepthStencilInfoSampleRate& event)
299 {
300 // earlyZ test compute
301 mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
302 mDSSampleRate.earlyZTestFailCount +=
303 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
304
305 // earlyStencil test compute
306 mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
307 mDSSampleRate.earlyStencilTestFailCount +=
308 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
309
310 // earlyZ test single and multi sample
311 mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
312 mDSCombined.earlyZTestFailCount +=
313 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
314
315 // earlyStencil test single and multi sample
316 mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
317 mDSCombined.earlyStencilTestFailCount +=
318 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
319
320 mNeedFlush = true;
321 }
322
323 virtual void Handle(const EarlyDepthStencilInfoNullPS& event)
324 {
325 // earlyZ test compute
326 mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
327 mDSNullPS.earlyZTestFailCount +=
328 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
329
330 // earlyStencil test compute
331 mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
332 mDSNullPS.earlyStencilTestFailCount +=
333 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
334 mNeedFlush = true;
335 }
336
337 virtual void Handle(const LateDepthStencilInfoSingleSample& event)
338 {
339 // lateZ test compute
340 mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
341 mDSSingleSample.lateZTestFailCount +=
342 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
343
344 // lateStencil test compute
345 mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
346 mDSSingleSample.lateStencilTestFailCount +=
347 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
348
349 // lateZ test single and multi sample
350 mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
351 mDSCombined.lateZTestFailCount +=
352 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
353
354 // lateStencil test single and multi sample
355 mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
356 mDSCombined.lateStencilTestFailCount +=
357 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
358
359 mNeedFlush = true;
360 }
361
362 virtual void Handle(const LateDepthStencilInfoSampleRate& event)
363 {
364 // lateZ test compute
365 mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
366 mDSSampleRate.lateZTestFailCount +=
367 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
368
369 // lateStencil test compute
370 mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
371 mDSSampleRate.lateStencilTestFailCount +=
372 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
373
374 // lateZ test single and multi sample
375 mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
376 mDSCombined.lateZTestFailCount +=
377 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
378
379 // lateStencil test single and multi sample
380 mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
381 mDSCombined.lateStencilTestFailCount +=
382 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
383
384 mNeedFlush = true;
385 }
386
387 virtual void Handle(const LateDepthStencilInfoNullPS& event)
388 {
389 // lateZ test compute
390 mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
391 mDSNullPS.lateZTestFailCount +=
392 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
393
394 // lateStencil test compute
395 mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
396 mDSNullPS.lateStencilTestFailCount +=
397 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
398 mNeedFlush = true;
399 }
400
401 virtual void Handle(const EarlyDepthInfoPixelRate& event)
402 {
403 // earlyZ test compute
404 mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
405 mDSPixelRate.earlyZTestFailCount +=
406 (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
407 mNeedFlush = true;
408 }
409
410
411 virtual void Handle(const LateDepthInfoPixelRate& event)
412 {
413 // lateZ test compute
414 mDSPixelRate.lateZTestPassCount += event.data.depthPassCount;
415 mDSPixelRate.lateZTestFailCount +=
416 (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
417 mNeedFlush = true;
418 }
419
420
421 virtual void Handle(const ClipInfoEvent& event)
422 {
423 mClipper.mustClipCount += _mm_popcnt_u32(event.data.clipMask);
424 mClipper.trivialRejectCount +=
425 event.data.numInvocations - _mm_popcnt_u32(event.data.validMask);
426 mClipper.trivialAcceptCount +=
427 _mm_popcnt_u32(event.data.validMask & ~event.data.clipMask);
428 }
429
430 void UpdateStats(SWR_SHADER_STATS* pStatTotals, const SWR_SHADER_STATS* pStatUpdate)
431 {
432 pStatTotals->numInstExecuted += pStatUpdate->numInstExecuted;
433 pStatTotals->numSampleExecuted += pStatUpdate->numSampleExecuted;
434 pStatTotals->numSampleLExecuted += pStatUpdate->numSampleLExecuted;
435 pStatTotals->numSampleBExecuted += pStatUpdate->numSampleBExecuted;
436 pStatTotals->numSampleCExecuted += pStatUpdate->numSampleCExecuted;
437 pStatTotals->numSampleCLZExecuted += pStatUpdate->numSampleCLZExecuted;
438 pStatTotals->numSampleCDExecuted += pStatUpdate->numSampleCDExecuted;
439 pStatTotals->numGather4Executed += pStatUpdate->numGather4Executed;
440 pStatTotals->numGather4CExecuted += pStatUpdate->numGather4CExecuted;
441 pStatTotals->numGather4CPOExecuted += pStatUpdate->numGather4CPOExecuted;
442 pStatTotals->numGather4CPOCExecuted += pStatUpdate->numGather4CPOCExecuted;
443 pStatTotals->numLodExecuted += pStatUpdate->numLodExecuted;
444 }
445
446 virtual void Handle(const VSStats& event)
447 {
448 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
449 UpdateStats(&mShaderStats[SHADER_VERTEX], pStats);
450 }
451
452 virtual void Handle(const GSStats& event)
453 {
454 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
455 UpdateStats(&mShaderStats[SHADER_GEOMETRY], pStats);
456 }
457
458 virtual void Handle(const DSStats& event)
459 {
460 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
461 UpdateStats(&mShaderStats[SHADER_DOMAIN], pStats);
462 }
463
464 virtual void Handle(const HSStats& event)
465 {
466 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
467 UpdateStats(&mShaderStats[SHADER_HULL], pStats);
468 }
469
470 virtual void Handle(const PSStats& event)
471 {
472 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
473 UpdateStats(&mShaderStats[SHADER_PIXEL], pStats);
474 mNeedFlush = true;
475 }
476
477 virtual void Handle(const CSStats& event)
478 {
479 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
480 UpdateStats(&mShaderStats[SHADER_COMPUTE], pStats);
481 mNeedFlush = true;
482 }
483
484 // Flush cached events for this draw
485 virtual void FlushDraw(uint32_t drawId)
486 {
487 if (mNeedFlush == false)
488 return;
489
490 EventHandlerFile::Handle(PSInfo(drawId,
491 mShaderStats[SHADER_PIXEL].numInstExecuted,
492 mShaderStats[SHADER_PIXEL].numSampleExecuted,
493 mShaderStats[SHADER_PIXEL].numSampleLExecuted,
494 mShaderStats[SHADER_PIXEL].numSampleBExecuted,
495 mShaderStats[SHADER_PIXEL].numSampleCExecuted,
496 mShaderStats[SHADER_PIXEL].numSampleCLZExecuted,
497 mShaderStats[SHADER_PIXEL].numSampleCDExecuted,
498 mShaderStats[SHADER_PIXEL].numGather4Executed,
499 mShaderStats[SHADER_PIXEL].numGather4CExecuted,
500 mShaderStats[SHADER_PIXEL].numGather4CPOExecuted,
501 mShaderStats[SHADER_PIXEL].numGather4CPOCExecuted,
502 mShaderStats[SHADER_PIXEL].numLodExecuted));
503 EventHandlerFile::Handle(CSInfo(drawId,
504 mShaderStats[SHADER_COMPUTE].numInstExecuted,
505 mShaderStats[SHADER_COMPUTE].numSampleExecuted,
506 mShaderStats[SHADER_COMPUTE].numSampleLExecuted,
507 mShaderStats[SHADER_COMPUTE].numSampleBExecuted,
508 mShaderStats[SHADER_COMPUTE].numSampleCExecuted,
509 mShaderStats[SHADER_COMPUTE].numSampleCLZExecuted,
510 mShaderStats[SHADER_COMPUTE].numSampleCDExecuted,
511 mShaderStats[SHADER_COMPUTE].numGather4Executed,
512 mShaderStats[SHADER_COMPUTE].numGather4CExecuted,
513 mShaderStats[SHADER_COMPUTE].numGather4CPOExecuted,
514 mShaderStats[SHADER_COMPUTE].numGather4CPOCExecuted,
515 mShaderStats[SHADER_COMPUTE].numLodExecuted));
516
517 // singleSample
518 EventHandlerFile::Handle(EarlyZSingleSample(
519 drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
520 EventHandlerFile::Handle(LateZSingleSample(
521 drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
522 EventHandlerFile::Handle(
523 EarlyStencilSingleSample(drawId,
524 mDSSingleSample.earlyStencilTestPassCount,
525 mDSSingleSample.earlyStencilTestFailCount));
526 EventHandlerFile::Handle(
527 LateStencilSingleSample(drawId,
528 mDSSingleSample.lateStencilTestPassCount,
529 mDSSingleSample.lateStencilTestFailCount));
530
531 // sampleRate
532 EventHandlerFile::Handle(EarlyZSampleRate(
533 drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
534 EventHandlerFile::Handle(LateZSampleRate(
535 drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
536 EventHandlerFile::Handle(
537 EarlyStencilSampleRate(drawId,
538 mDSSampleRate.earlyStencilTestPassCount,
539 mDSSampleRate.earlyStencilTestFailCount));
540 EventHandlerFile::Handle(LateStencilSampleRate(drawId,
541 mDSSampleRate.lateStencilTestPassCount,
542 mDSSampleRate.lateStencilTestFailCount));
543
544 // combined
545 EventHandlerFile::Handle(
546 EarlyZ(drawId, mDSCombined.earlyZTestPassCount, mDSCombined.earlyZTestFailCount));
547 EventHandlerFile::Handle(
548 LateZ(drawId, mDSCombined.lateZTestPassCount, mDSCombined.lateZTestFailCount));
549 EventHandlerFile::Handle(EarlyStencil(drawId,
550 mDSCombined.earlyStencilTestPassCount,
551 mDSCombined.earlyStencilTestFailCount));
552 EventHandlerFile::Handle(LateStencil(drawId,
553 mDSCombined.lateStencilTestPassCount,
554 mDSCombined.lateStencilTestFailCount));
555
556 // pixelRate
557 EventHandlerFile::Handle(EarlyZPixelRate(
558 drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
559 EventHandlerFile::Handle(LateZPixelRate(
560 drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
561
562
563 // NullPS
564 EventHandlerFile::Handle(
565 EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
566 EventHandlerFile::Handle(EarlyStencilNullPS(
567 drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
568
569 // Rasterized Subspans
570 EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles));
571
572 // Alpha Subspans
573 EventHandlerFile::Handle(
574 AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount));
575
576 // Primitive Culling
577 EventHandlerFile::Handle(
578 CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));
579
580 mDSSingleSample = {};
581 mDSSampleRate = {};
582 mDSCombined = {};
583 mDSPixelRate = {};
584 mDSNullPS = {};
585
586 rastStats = {};
587 mCullStats = {};
588 mAlphaStats = {};
589
590 mShaderStats[SHADER_PIXEL] = {};
591 mShaderStats[SHADER_COMPUTE] = {};
592
593 mNeedFlush = false;
594 }
595
596 virtual void Handle(const FrontendDrawEndEvent& event)
597 {
598 // Clipper
599 EventHandlerFile::Handle(ClipperEvent(event.data.drawId,
600 mClipper.trivialRejectCount,
601 mClipper.trivialAcceptCount,
602 mClipper.mustClipCount));
603
604 // Tesselator
605 EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims));
606
607 // Geometry Shader
608 EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount));
609 EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
610 EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));
611
612 EventHandlerFile::Handle(VSInfo(event.data.drawId,
613 mShaderStats[SHADER_VERTEX].numInstExecuted,
614 mShaderStats[SHADER_VERTEX].numSampleExecuted,
615 mShaderStats[SHADER_VERTEX].numSampleLExecuted,
616 mShaderStats[SHADER_VERTEX].numSampleBExecuted,
617 mShaderStats[SHADER_VERTEX].numSampleCExecuted,
618 mShaderStats[SHADER_VERTEX].numSampleCLZExecuted,
619 mShaderStats[SHADER_VERTEX].numSampleCDExecuted,
620 mShaderStats[SHADER_VERTEX].numGather4Executed,
621 mShaderStats[SHADER_VERTEX].numGather4CExecuted,
622 mShaderStats[SHADER_VERTEX].numGather4CPOExecuted,
623 mShaderStats[SHADER_VERTEX].numGather4CPOCExecuted,
624 mShaderStats[SHADER_VERTEX].numLodExecuted));
625 EventHandlerFile::Handle(HSInfo(event.data.drawId,
626 mShaderStats[SHADER_HULL].numInstExecuted,
627 mShaderStats[SHADER_HULL].numSampleExecuted,
628 mShaderStats[SHADER_HULL].numSampleLExecuted,
629 mShaderStats[SHADER_HULL].numSampleBExecuted,
630 mShaderStats[SHADER_HULL].numSampleCExecuted,
631 mShaderStats[SHADER_HULL].numSampleCLZExecuted,
632 mShaderStats[SHADER_HULL].numSampleCDExecuted,
633 mShaderStats[SHADER_HULL].numGather4Executed,
634 mShaderStats[SHADER_HULL].numGather4CExecuted,
635 mShaderStats[SHADER_HULL].numGather4CPOExecuted,
636 mShaderStats[SHADER_HULL].numGather4CPOCExecuted,
637 mShaderStats[SHADER_HULL].numLodExecuted));
638 EventHandlerFile::Handle(DSInfo(event.data.drawId,
639 mShaderStats[SHADER_DOMAIN].numInstExecuted,
640 mShaderStats[SHADER_DOMAIN].numSampleExecuted,
641 mShaderStats[SHADER_DOMAIN].numSampleLExecuted,
642 mShaderStats[SHADER_DOMAIN].numSampleBExecuted,
643 mShaderStats[SHADER_DOMAIN].numSampleCExecuted,
644 mShaderStats[SHADER_DOMAIN].numSampleCLZExecuted,
645 mShaderStats[SHADER_DOMAIN].numSampleCDExecuted,
646 mShaderStats[SHADER_DOMAIN].numGather4Executed,
647 mShaderStats[SHADER_DOMAIN].numGather4CExecuted,
648 mShaderStats[SHADER_DOMAIN].numGather4CPOExecuted,
649 mShaderStats[SHADER_DOMAIN].numGather4CPOCExecuted,
650 mShaderStats[SHADER_DOMAIN].numLodExecuted));
651 EventHandlerFile::Handle(GSInfo(event.data.drawId,
652 mShaderStats[SHADER_GEOMETRY].numInstExecuted,
653 mShaderStats[SHADER_GEOMETRY].numSampleExecuted,
654 mShaderStats[SHADER_GEOMETRY].numSampleLExecuted,
655 mShaderStats[SHADER_GEOMETRY].numSampleBExecuted,
656 mShaderStats[SHADER_GEOMETRY].numSampleCExecuted,
657 mShaderStats[SHADER_GEOMETRY].numSampleCLZExecuted,
658 mShaderStats[SHADER_GEOMETRY].numSampleCDExecuted,
659 mShaderStats[SHADER_GEOMETRY].numGather4Executed,
660 mShaderStats[SHADER_GEOMETRY].numGather4CExecuted,
661 mShaderStats[SHADER_GEOMETRY].numGather4CPOExecuted,
662 mShaderStats[SHADER_GEOMETRY].numGather4CPOCExecuted,
663 mShaderStats[SHADER_GEOMETRY].numLodExecuted));
664
665 mShaderStats[SHADER_VERTEX] = {};
666 mShaderStats[SHADER_HULL] = {};
667 mShaderStats[SHADER_DOMAIN] = {};
668 mShaderStats[SHADER_GEOMETRY] = {};
669
670 // Reset Internal Counters
671 mClipper = {};
672 mTS = {};
673 mGS = {};
674 }
675
676 virtual void Handle(const MemoryAccessEvent& event)
677 {
678 uint64_t trackAddr = event.data.ptr;
679 uint64_t nextAddr = (trackAddr & mAddressMask);
680 uint32_t sizeTracked = 0;
681
682 while (sizeTracked < event.data.size)
683 {
684 nextAddr += mMemGranularity;
685 uint32_t size = nextAddr - trackAddr;
686 size = std::min(event.data.size, size);
687 mMemoryStats.TrackMemoryAccess(trackAddr, mAddressMask, event.data.isRead, event.data.tsc, size);
688 sizeTracked += size;
689 trackAddr = nextAddr;
690 }
691 }
692
693 virtual void Handle(const MemoryStatsEndEvent& event)
694 {
695 MemoryStats::MemoryTrackerMap::iterator i = mMemoryStats.trackedMemory.begin();
696 while (i != mMemoryStats.trackedMemory.end())
697 {
698 MemoryStatsEvent mse(event.data.drawId,
699 i->first.address & mAddressMask,
700 i->second.accessCountRead,
701 i->second.accessCountWrite,
702 i->second.totalSizeRead,
703 i->second.totalSizeWrite,
704 i->second.tscMin,
705 i->second.tscMax);
706 EventHandlerFile::Handle(mse);
707 i++;
708 }
709 mMemoryStats.trackedMemory.clear();
710 }
711
712 virtual void Handle(const GSPrimInfo& event)
713 {
714 mGS.inputPrimCount += event.data.inputPrimCount;
715 mGS.primGeneratedCount += event.data.primGeneratedCount;
716 mGS.vertsInput += event.data.vertsInput;
717 }
718
719 virtual void Handle(const TessPrimCount& event) { mTS.inputPrims += event.data.primCount; }
720
721 virtual void Handle(const RasterTileCount& event)
722 {
723 rastStats.rasterTiles += event.data.rasterTiles;
724 }
725
726 virtual void Handle(const CullInfoEvent& event)
727 {
728 mCullStats.degeneratePrimCount += _mm_popcnt_u32(
729 event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask));
730 mCullStats.backfacePrimCount += _mm_popcnt_u32(
731 event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask));
732 }
733
734 virtual void Handle(const AlphaInfoEvent& event)
735 {
736 mAlphaStats.alphaTestCount += event.data.alphaTestEnable;
737 mAlphaStats.alphaBlendCount += event.data.alphaBlendEnable;
738 }
739
740 protected:
741 bool mNeedFlush;
742 // Per draw stats
743 DepthStencilStats mDSSingleSample = {};
744 DepthStencilStats mDSSampleRate = {};
745 DepthStencilStats mDSPixelRate = {};
746 DepthStencilStats mDSCombined = {};
747 DepthStencilStats mDSNullPS = {};
748 DepthStencilStats mDSOmZ = {};
749 CStats mClipper = {};
750 TEStats mTS = {};
751 GSStateInfo mGS = {};
752 RastStats rastStats = {};
753 CullStats mCullStats = {};
754 AlphaStats mAlphaStats = {};
755
756 SWR_SHADER_STATS mShaderStats[NUM_SHADER_TYPES];
757
758 MemoryStats mMemoryStats = {};
759 uint64_t mAddressMask = 0;
760 uint64_t mMemGranularity = 0;
761
762 };
763
764 static EventManager* FromHandle(HANDLE hThreadContext)
765 {
766 return reinterpret_cast<EventManager*>(hThreadContext);
767 }
768
769 // Construct an event manager and associate a handler with it.
770 HANDLE CreateThreadContext(AR_THREAD type)
771 {
772 // Can we assume single threaded here?
773 static std::atomic<uint32_t> counter(0);
774 uint32_t id = counter.fetch_add(1);
775
776 EventManager* pManager = new EventManager();
777
778 if (pManager)
779 {
780 EventHandlerFile* pHandler = nullptr;
781
782 if (type == AR_THREAD::API)
783 {
784 pHandler = new EventHandlerApiStats(id);
785 pManager->Attach(pHandler);
786 pHandler->Handle(ThreadStartApiEvent());
787 }
788 else
789 {
790 pHandler = new EventHandlerWorkerStats(id);
791 pManager->Attach(pHandler);
792 pHandler->Handle(ThreadStartWorkerEvent());
793 }
794
795 pHandler->MarkHeader();
796
797 return pManager;
798 }
799
800 SWR_INVALID("Failed to register thread.");
801 return nullptr;
802 }
803
804 void DestroyThreadContext(HANDLE hThreadContext)
805 {
806 EventManager* pManager = FromHandle(hThreadContext);
807 SWR_ASSERT(pManager != nullptr);
808
809 delete pManager;
810 }
811
812 // Dispatch event for this thread.
813 void Dispatch(HANDLE hThreadContext, const Event& event)
814 {
815 EventManager* pManager = FromHandle(hThreadContext);
816 SWR_ASSERT(pManager != nullptr);
817
818 pManager->Dispatch(event);
819 }
820
821 // Flush for this thread.
822 void FlushDraw(HANDLE hThreadContext, uint32_t drawId)
823 {
824 EventManager* pManager = FromHandle(hThreadContext);
825 SWR_ASSERT(pManager != nullptr);
826
827 pManager->FlushDraw(drawId);
828 }
829 } // namespace ArchRast