06e0f616f707e18d8018a9b91134f5c1b9cc54d5
[mesa.git] / src / gallium / drivers / swr / rasterizer / archrast / archrast.cpp
1 /****************************************************************************
2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file archrast.cpp
24 *
25 * @brief Implementation for archrast.
26 *
27 ******************************************************************************/
28 #include <atomic>
29 #include <map>
30
31 #include "common/os.h"
32 #include "archrast/archrast.h"
33 #include "archrast/eventmanager.h"
34 #include "gen_ar_eventhandlerfile.hpp"
35
36 namespace ArchRast
37 {
38 //////////////////////////////////////////////////////////////////////////
39 /// @brief struct that keeps track of depth and stencil event information
40 struct DepthStencilStats
41 {
42 uint32_t earlyZTestPassCount = 0;
43 uint32_t earlyZTestFailCount = 0;
44 uint32_t lateZTestPassCount = 0;
45 uint32_t lateZTestFailCount = 0;
46 uint32_t earlyStencilTestPassCount = 0;
47 uint32_t earlyStencilTestFailCount = 0;
48 uint32_t lateStencilTestPassCount = 0;
49 uint32_t lateStencilTestFailCount = 0;
50 };
51
52 struct CStats
53 {
54 uint32_t trivialRejectCount;
55 uint32_t trivialAcceptCount;
56 uint32_t mustClipCount;
57 };
58
59 struct TEStats
60 {
61 uint32_t inputPrims = 0;
62 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
63 };
64
65 struct GSStateInfo
66 {
67 uint32_t inputPrimCount;
68 uint32_t primGeneratedCount;
69 uint32_t vertsInput;
70 };
71
72 struct RastStats
73 {
74 uint32_t rasterTiles = 0;
75 };
76
77 struct CullStats
78 {
79 uint32_t degeneratePrimCount = 0;
80 uint32_t backfacePrimCount = 0;
81 };
82
83 struct AlphaStats
84 {
85 uint32_t alphaTestCount = 0;
86 uint32_t alphaBlendCount = 0;
87 };
88
89 struct MemoryStats
90 {
91 struct MemoryTrackerKey
92 {
93 uint64_t address;
94 uint64_t mask;
95 };
96
97 struct MemoryTrackerData
98 {
99 uint32_t accessCountRead;
100 uint32_t accessCountWrite;
101 uint64_t tscMin;
102 uint64_t tscMax;
103 };
104
105 struct AddressRangeComparator
106 {
107 bool operator()(MemoryTrackerKey a, MemoryTrackerKey b) const
108 {
109 return (a.address & a.mask) < (b.address & b.mask);
110 }
111 };
112
113 typedef std::map<MemoryTrackerKey, MemoryTrackerData, AddressRangeComparator> MemoryTrackerMap;
114 MemoryTrackerMap trackedMemory = {};
115
116 void TrackMemoryAccess(uint64_t address, uint64_t addressMask, uint8_t isRead, uint64_t tsc)
117 {
118 MemoryTrackerKey key;
119 key.address = address;
120 key.mask = addressMask;
121
122 MemoryTrackerMap::iterator i = trackedMemory.lower_bound(key);
123 if (i != trackedMemory.end() && !(trackedMemory.key_comp()(key, i->first)))
124 {
125 // already in map
126 if (isRead)
127 {
128 i->second.accessCountRead++;
129 }
130 else
131 {
132 i->second.accessCountWrite++;
133 }
134 i->second.tscMax = tsc;
135 }
136 else
137 {
138 // new entry
139 MemoryTrackerData data;
140 if (isRead)
141 {
142 data.accessCountRead = 1;
143 data.accessCountWrite = 0;
144 }
145 else
146 {
147 data.accessCountRead = 0;
148 data.accessCountWrite = 1;
149 }
150 data.tscMin = tsc;
151 data.tscMax = tsc;
152 trackedMemory.insert(i, MemoryTrackerMap::value_type(key, data));
153 }
154 }
155 };
156
157 //////////////////////////////////////////////////////////////////////////
158 /// @brief Event handler that handles API thread events. This is shared
159 /// between the API and its caller (e.g. driver shim) but typically
160 /// there is only a single API thread per context. So you can save
161 /// information in the class to be used for other events.
162 class EventHandlerApiStats : public EventHandlerFile
163 {
164 public:
165 EventHandlerApiStats(uint32_t id) : EventHandlerFile(id)
166 {
167 #if defined(_WIN32)
168 // Attempt to copy the events.proto file to the ArchRast output dir. It's common for
169 // tools to place the events.proto file in the DEBUG_OUTPUT_DIR when launching AR. If it
170 // exists, this will attempt to copy it the first time we get here to package it with
171 // the stats. Otherwise, the user would need to specify the events.proto location when
172 // parsing the stats in post.
173 std::stringstream eventsProtoSrcFilename, eventsProtoDstFilename;
174 eventsProtoSrcFilename << KNOB_DEBUG_OUTPUT_DIR << "\\events.proto" << std::ends;
175 eventsProtoDstFilename << mOutputDir.substr(0, mOutputDir.size() - 1)
176 << "\\events.proto" << std::ends;
177
178 // If event.proto already exists, we're done; else do the copy
179 struct stat buf; // Use a Posix stat for file existence check
180 if (!stat(eventsProtoDstFilename.str().c_str(), &buf) == 0)
181 {
182 // Now check to make sure the events.proto source exists
183 if (stat(eventsProtoSrcFilename.str().c_str(), &buf) == 0)
184 {
185 std::ifstream srcFile;
186 srcFile.open(eventsProtoSrcFilename.str().c_str(), std::ios::binary);
187 if (srcFile.is_open())
188 {
189 // Just do a binary buffer copy
190 std::ofstream dstFile;
191 dstFile.open(eventsProtoDstFilename.str().c_str(), std::ios::binary);
192 dstFile << srcFile.rdbuf();
193 dstFile.close();
194 }
195 srcFile.close();
196 }
197 }
198 #endif
199 }
200
201 virtual void Handle(const DrawInstancedEvent& event)
202 {
203 DrawInfoEvent e(event.data.drawId,
204 ArchRast::Instanced,
205 event.data.topology,
206 event.data.numVertices,
207 0,
208 0,
209 event.data.startVertex,
210 event.data.numInstances,
211 event.data.startInstance,
212 event.data.tsEnable,
213 event.data.gsEnable,
214 event.data.soEnable,
215 event.data.soTopology,
216 event.data.splitId);
217
218 EventHandlerFile::Handle(e);
219 }
220
221 virtual void Handle(const DrawIndexedInstancedEvent& event)
222 {
223 DrawInfoEvent e(event.data.drawId,
224 ArchRast::IndexedInstanced,
225 event.data.topology,
226 0,
227 event.data.numIndices,
228 event.data.indexOffset,
229 event.data.baseVertex,
230 event.data.numInstances,
231 event.data.startInstance,
232 event.data.tsEnable,
233 event.data.gsEnable,
234 event.data.soEnable,
235 event.data.soTopology,
236 event.data.splitId);
237
238 EventHandlerFile::Handle(e);
239 }
240 };
241
242 //////////////////////////////////////////////////////////////////////////
243 /// @brief Event handler that handles worker thread events. There is one
244 /// event handler per thread. The python script will need to sum
245 /// up counters across all of the threads.
246 class EventHandlerWorkerStats : public EventHandlerFile
247 {
248 public:
249 EventHandlerWorkerStats(uint32_t id) : EventHandlerFile(id), mNeedFlush(false)
250 {
251 memset(mShaderStats, 0, sizeof(mShaderStats));
252
253 // compute address mask for memory tracking
254 mAddressMask = 0;
255 uint64_t addressRangeBytes = 64;
256 while (addressRangeBytes > 0)
257 {
258 mAddressMask = (mAddressMask << 1) | 1;
259 addressRangeBytes = addressRangeBytes >> 1;
260 }
261 mAddressMask = ~mAddressMask;
262 }
263
264 virtual void Handle(const EarlyDepthStencilInfoSingleSample& event)
265 {
266 // earlyZ test compute
267 mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
268 mDSSingleSample.earlyZTestFailCount +=
269 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
270
271 // earlyStencil test compute
272 mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
273 mDSSingleSample.earlyStencilTestFailCount +=
274 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
275
276 // earlyZ test single and multi sample
277 mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
278 mDSCombined.earlyZTestFailCount +=
279 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
280
281 // earlyStencil test single and multi sample
282 mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
283 mDSCombined.earlyStencilTestFailCount +=
284 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
285
286 mNeedFlush = true;
287 }
288
289 virtual void Handle(const EarlyDepthStencilInfoSampleRate& event)
290 {
291 // earlyZ test compute
292 mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
293 mDSSampleRate.earlyZTestFailCount +=
294 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
295
296 // earlyStencil test compute
297 mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
298 mDSSampleRate.earlyStencilTestFailCount +=
299 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
300
301 // earlyZ test single and multi sample
302 mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
303 mDSCombined.earlyZTestFailCount +=
304 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
305
306 // earlyStencil test single and multi sample
307 mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
308 mDSCombined.earlyStencilTestFailCount +=
309 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
310
311 mNeedFlush = true;
312 }
313
314 virtual void Handle(const EarlyDepthStencilInfoNullPS& event)
315 {
316 // earlyZ test compute
317 mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
318 mDSNullPS.earlyZTestFailCount +=
319 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
320
321 // earlyStencil test compute
322 mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
323 mDSNullPS.earlyStencilTestFailCount +=
324 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
325 mNeedFlush = true;
326 }
327
328 virtual void Handle(const LateDepthStencilInfoSingleSample& event)
329 {
330 // lateZ test compute
331 mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
332 mDSSingleSample.lateZTestFailCount +=
333 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
334
335 // lateStencil test compute
336 mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
337 mDSSingleSample.lateStencilTestFailCount +=
338 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
339
340 // lateZ test single and multi sample
341 mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
342 mDSCombined.lateZTestFailCount +=
343 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
344
345 // lateStencil test single and multi sample
346 mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
347 mDSCombined.lateStencilTestFailCount +=
348 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
349
350 mNeedFlush = true;
351 }
352
353 virtual void Handle(const LateDepthStencilInfoSampleRate& event)
354 {
355 // lateZ test compute
356 mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
357 mDSSampleRate.lateZTestFailCount +=
358 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
359
360 // lateStencil test compute
361 mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
362 mDSSampleRate.lateStencilTestFailCount +=
363 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
364
365 // lateZ test single and multi sample
366 mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
367 mDSCombined.lateZTestFailCount +=
368 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
369
370 // lateStencil test single and multi sample
371 mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
372 mDSCombined.lateStencilTestFailCount +=
373 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
374
375 mNeedFlush = true;
376 }
377
378 virtual void Handle(const LateDepthStencilInfoNullPS& event)
379 {
380 // lateZ test compute
381 mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
382 mDSNullPS.lateZTestFailCount +=
383 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
384
385 // lateStencil test compute
386 mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
387 mDSNullPS.lateStencilTestFailCount +=
388 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
389 mNeedFlush = true;
390 }
391
392 virtual void Handle(const EarlyDepthInfoPixelRate& event)
393 {
394 // earlyZ test compute
395 mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
396 mDSPixelRate.earlyZTestFailCount +=
397 (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
398 mNeedFlush = true;
399 }
400
401
402 virtual void Handle(const LateDepthInfoPixelRate& event)
403 {
404 // lateZ test compute
405 mDSPixelRate.lateZTestPassCount += event.data.depthPassCount;
406 mDSPixelRate.lateZTestFailCount +=
407 (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
408 mNeedFlush = true;
409 }
410
411
412 virtual void Handle(const ClipInfoEvent& event)
413 {
414 mClipper.mustClipCount += _mm_popcnt_u32(event.data.clipMask);
415 mClipper.trivialRejectCount +=
416 event.data.numInvocations - _mm_popcnt_u32(event.data.validMask);
417 mClipper.trivialAcceptCount +=
418 _mm_popcnt_u32(event.data.validMask & ~event.data.clipMask);
419 }
420
421 void UpdateStats(SWR_SHADER_STATS* pStatTotals, const SWR_SHADER_STATS* pStatUpdate)
422 {
423 pStatTotals->numInstExecuted += pStatUpdate->numInstExecuted;
424 pStatTotals->numSampleExecuted += pStatUpdate->numSampleExecuted;
425 pStatTotals->numSampleLExecuted += pStatUpdate->numSampleLExecuted;
426 pStatTotals->numSampleBExecuted += pStatUpdate->numSampleBExecuted;
427 pStatTotals->numSampleCExecuted += pStatUpdate->numSampleCExecuted;
428 pStatTotals->numSampleCLZExecuted += pStatUpdate->numSampleCLZExecuted;
429 pStatTotals->numSampleCDExecuted += pStatUpdate->numSampleCDExecuted;
430 pStatTotals->numGather4Executed += pStatUpdate->numGather4Executed;
431 pStatTotals->numGather4CExecuted += pStatUpdate->numGather4CExecuted;
432 pStatTotals->numGather4CPOExecuted += pStatUpdate->numGather4CPOExecuted;
433 pStatTotals->numGather4CPOCExecuted += pStatUpdate->numGather4CPOCExecuted;
434 pStatTotals->numLodExecuted += pStatUpdate->numLodExecuted;
435 }
436
437 virtual void Handle(const VSStats& event)
438 {
439 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
440 UpdateStats(&mShaderStats[SHADER_VERTEX], pStats);
441 }
442
443 virtual void Handle(const GSStats& event)
444 {
445 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
446 UpdateStats(&mShaderStats[SHADER_GEOMETRY], pStats);
447 }
448
449 virtual void Handle(const DSStats& event)
450 {
451 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
452 UpdateStats(&mShaderStats[SHADER_DOMAIN], pStats);
453 }
454
455 virtual void Handle(const HSStats& event)
456 {
457 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
458 UpdateStats(&mShaderStats[SHADER_HULL], pStats);
459 }
460
461 virtual void Handle(const PSStats& event)
462 {
463 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
464 UpdateStats(&mShaderStats[SHADER_PIXEL], pStats);
465 mNeedFlush = true;
466 }
467
468 virtual void Handle(const CSStats& event)
469 {
470 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
471 UpdateStats(&mShaderStats[SHADER_COMPUTE], pStats);
472 mNeedFlush = true;
473 }
474
475 // Flush cached events for this draw
476 virtual void FlushDraw(uint32_t drawId)
477 {
478 if (mNeedFlush == false)
479 return;
480
481 EventHandlerFile::Handle(PSInfo(drawId,
482 mShaderStats[SHADER_PIXEL].numInstExecuted,
483 mShaderStats[SHADER_PIXEL].numSampleExecuted,
484 mShaderStats[SHADER_PIXEL].numSampleLExecuted,
485 mShaderStats[SHADER_PIXEL].numSampleBExecuted,
486 mShaderStats[SHADER_PIXEL].numSampleCExecuted,
487 mShaderStats[SHADER_PIXEL].numSampleCLZExecuted,
488 mShaderStats[SHADER_PIXEL].numSampleCDExecuted,
489 mShaderStats[SHADER_PIXEL].numGather4Executed,
490 mShaderStats[SHADER_PIXEL].numGather4CExecuted,
491 mShaderStats[SHADER_PIXEL].numGather4CPOExecuted,
492 mShaderStats[SHADER_PIXEL].numGather4CPOCExecuted,
493 mShaderStats[SHADER_PIXEL].numLodExecuted));
494 EventHandlerFile::Handle(CSInfo(drawId,
495 mShaderStats[SHADER_COMPUTE].numInstExecuted,
496 mShaderStats[SHADER_COMPUTE].numSampleExecuted,
497 mShaderStats[SHADER_COMPUTE].numSampleLExecuted,
498 mShaderStats[SHADER_COMPUTE].numSampleBExecuted,
499 mShaderStats[SHADER_COMPUTE].numSampleCExecuted,
500 mShaderStats[SHADER_COMPUTE].numSampleCLZExecuted,
501 mShaderStats[SHADER_COMPUTE].numSampleCDExecuted,
502 mShaderStats[SHADER_COMPUTE].numGather4Executed,
503 mShaderStats[SHADER_COMPUTE].numGather4CExecuted,
504 mShaderStats[SHADER_COMPUTE].numGather4CPOExecuted,
505 mShaderStats[SHADER_COMPUTE].numGather4CPOCExecuted,
506 mShaderStats[SHADER_COMPUTE].numLodExecuted));
507
508 // singleSample
509 EventHandlerFile::Handle(EarlyZSingleSample(
510 drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
511 EventHandlerFile::Handle(LateZSingleSample(
512 drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
513 EventHandlerFile::Handle(
514 EarlyStencilSingleSample(drawId,
515 mDSSingleSample.earlyStencilTestPassCount,
516 mDSSingleSample.earlyStencilTestFailCount));
517 EventHandlerFile::Handle(
518 LateStencilSingleSample(drawId,
519 mDSSingleSample.lateStencilTestPassCount,
520 mDSSingleSample.lateStencilTestFailCount));
521
522 // sampleRate
523 EventHandlerFile::Handle(EarlyZSampleRate(
524 drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
525 EventHandlerFile::Handle(LateZSampleRate(
526 drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
527 EventHandlerFile::Handle(
528 EarlyStencilSampleRate(drawId,
529 mDSSampleRate.earlyStencilTestPassCount,
530 mDSSampleRate.earlyStencilTestFailCount));
531 EventHandlerFile::Handle(LateStencilSampleRate(drawId,
532 mDSSampleRate.lateStencilTestPassCount,
533 mDSSampleRate.lateStencilTestFailCount));
534
535 // combined
536 EventHandlerFile::Handle(
537 EarlyZ(drawId, mDSCombined.earlyZTestPassCount, mDSCombined.earlyZTestFailCount));
538 EventHandlerFile::Handle(
539 LateZ(drawId, mDSCombined.lateZTestPassCount, mDSCombined.lateZTestFailCount));
540 EventHandlerFile::Handle(EarlyStencil(drawId,
541 mDSCombined.earlyStencilTestPassCount,
542 mDSCombined.earlyStencilTestFailCount));
543 EventHandlerFile::Handle(LateStencil(drawId,
544 mDSCombined.lateStencilTestPassCount,
545 mDSCombined.lateStencilTestFailCount));
546
547 // pixelRate
548 EventHandlerFile::Handle(EarlyZPixelRate(
549 drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
550 EventHandlerFile::Handle(LateZPixelRate(
551 drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
552
553
554 // NullPS
555 EventHandlerFile::Handle(
556 EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
557 EventHandlerFile::Handle(EarlyStencilNullPS(
558 drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
559
560 // Rasterized Subspans
561 EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles));
562
563 // Alpha Subspans
564 EventHandlerFile::Handle(
565 AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount));
566
567 // Primitive Culling
568 EventHandlerFile::Handle(
569 CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));
570
571 mDSSingleSample = {};
572 mDSSampleRate = {};
573 mDSCombined = {};
574 mDSPixelRate = {};
575 mDSNullPS = {};
576
577 rastStats = {};
578 mCullStats = {};
579 mAlphaStats = {};
580
581 mShaderStats[SHADER_PIXEL] = {};
582 mShaderStats[SHADER_COMPUTE] = {};
583
584 mNeedFlush = false;
585 }
586
587 virtual void Handle(const FrontendDrawEndEvent& event)
588 {
589 // Clipper
590 EventHandlerFile::Handle(ClipperEvent(event.data.drawId,
591 mClipper.trivialRejectCount,
592 mClipper.trivialAcceptCount,
593 mClipper.mustClipCount));
594
595 // Tesselator
596 EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims));
597
598 // Geometry Shader
599 EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount));
600 EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
601 EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));
602
603 EventHandlerFile::Handle(VSInfo(event.data.drawId,
604 mShaderStats[SHADER_VERTEX].numInstExecuted,
605 mShaderStats[SHADER_VERTEX].numSampleExecuted,
606 mShaderStats[SHADER_VERTEX].numSampleLExecuted,
607 mShaderStats[SHADER_VERTEX].numSampleBExecuted,
608 mShaderStats[SHADER_VERTEX].numSampleCExecuted,
609 mShaderStats[SHADER_VERTEX].numSampleCLZExecuted,
610 mShaderStats[SHADER_VERTEX].numSampleCDExecuted,
611 mShaderStats[SHADER_VERTEX].numGather4Executed,
612 mShaderStats[SHADER_VERTEX].numGather4CExecuted,
613 mShaderStats[SHADER_VERTEX].numGather4CPOExecuted,
614 mShaderStats[SHADER_VERTEX].numGather4CPOCExecuted,
615 mShaderStats[SHADER_VERTEX].numLodExecuted));
616 EventHandlerFile::Handle(HSInfo(event.data.drawId,
617 mShaderStats[SHADER_HULL].numInstExecuted,
618 mShaderStats[SHADER_HULL].numSampleExecuted,
619 mShaderStats[SHADER_HULL].numSampleLExecuted,
620 mShaderStats[SHADER_HULL].numSampleBExecuted,
621 mShaderStats[SHADER_HULL].numSampleCExecuted,
622 mShaderStats[SHADER_HULL].numSampleCLZExecuted,
623 mShaderStats[SHADER_HULL].numSampleCDExecuted,
624 mShaderStats[SHADER_HULL].numGather4Executed,
625 mShaderStats[SHADER_HULL].numGather4CExecuted,
626 mShaderStats[SHADER_HULL].numGather4CPOExecuted,
627 mShaderStats[SHADER_HULL].numGather4CPOCExecuted,
628 mShaderStats[SHADER_HULL].numLodExecuted));
629 EventHandlerFile::Handle(DSInfo(event.data.drawId,
630 mShaderStats[SHADER_DOMAIN].numInstExecuted,
631 mShaderStats[SHADER_DOMAIN].numSampleExecuted,
632 mShaderStats[SHADER_DOMAIN].numSampleLExecuted,
633 mShaderStats[SHADER_DOMAIN].numSampleBExecuted,
634 mShaderStats[SHADER_DOMAIN].numSampleCExecuted,
635 mShaderStats[SHADER_DOMAIN].numSampleCLZExecuted,
636 mShaderStats[SHADER_DOMAIN].numSampleCDExecuted,
637 mShaderStats[SHADER_DOMAIN].numGather4Executed,
638 mShaderStats[SHADER_DOMAIN].numGather4CExecuted,
639 mShaderStats[SHADER_DOMAIN].numGather4CPOExecuted,
640 mShaderStats[SHADER_DOMAIN].numGather4CPOCExecuted,
641 mShaderStats[SHADER_DOMAIN].numLodExecuted));
642 EventHandlerFile::Handle(GSInfo(event.data.drawId,
643 mShaderStats[SHADER_GEOMETRY].numInstExecuted,
644 mShaderStats[SHADER_GEOMETRY].numSampleExecuted,
645 mShaderStats[SHADER_GEOMETRY].numSampleLExecuted,
646 mShaderStats[SHADER_GEOMETRY].numSampleBExecuted,
647 mShaderStats[SHADER_GEOMETRY].numSampleCExecuted,
648 mShaderStats[SHADER_GEOMETRY].numSampleCLZExecuted,
649 mShaderStats[SHADER_GEOMETRY].numSampleCDExecuted,
650 mShaderStats[SHADER_GEOMETRY].numGather4Executed,
651 mShaderStats[SHADER_GEOMETRY].numGather4CExecuted,
652 mShaderStats[SHADER_GEOMETRY].numGather4CPOExecuted,
653 mShaderStats[SHADER_GEOMETRY].numGather4CPOCExecuted,
654 mShaderStats[SHADER_GEOMETRY].numLodExecuted));
655
656 mShaderStats[SHADER_VERTEX] = {};
657 mShaderStats[SHADER_HULL] = {};
658 mShaderStats[SHADER_DOMAIN] = {};
659 mShaderStats[SHADER_GEOMETRY] = {};
660
661 // Reset Internal Counters
662 mClipper = {};
663 mTS = {};
664 mGS = {};
665 }
666
667 virtual void Handle(const MemoryAccessEvent& event)
668 {
669 mMemoryStats.TrackMemoryAccess(event.data.ptr, mAddressMask, event.data.isRead, event.data.tsc);
670 }
671
672 virtual void Handle(const MemoryStatsEndEvent& event)
673 {
674 MemoryStats::MemoryTrackerMap::iterator i = mMemoryStats.trackedMemory.begin();
675 while (i != mMemoryStats.trackedMemory.end())
676 {
677 MemoryStatsEvent mse(event.data.drawId,
678 i->first.address & mAddressMask,
679 i->second.accessCountRead,
680 i->second.accessCountWrite,
681 i->second.tscMin,
682 i->second.tscMax);
683 EventHandlerFile::Handle(mse);
684 i++;
685 }
686 mMemoryStats.trackedMemory.clear();
687 }
688
689 virtual void Handle(const GSPrimInfo& event)
690 {
691 mGS.inputPrimCount += event.data.inputPrimCount;
692 mGS.primGeneratedCount += event.data.primGeneratedCount;
693 mGS.vertsInput += event.data.vertsInput;
694 }
695
696 virtual void Handle(const TessPrimCount& event) { mTS.inputPrims += event.data.primCount; }
697
698 virtual void Handle(const RasterTileCount& event)
699 {
700 rastStats.rasterTiles += event.data.rasterTiles;
701 }
702
703 virtual void Handle(const CullInfoEvent& event)
704 {
705 mCullStats.degeneratePrimCount += _mm_popcnt_u32(
706 event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask));
707 mCullStats.backfacePrimCount += _mm_popcnt_u32(
708 event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask));
709 }
710
711 virtual void Handle(const AlphaInfoEvent& event)
712 {
713 mAlphaStats.alphaTestCount += event.data.alphaTestEnable;
714 mAlphaStats.alphaBlendCount += event.data.alphaBlendEnable;
715 }
716
717 protected:
718 bool mNeedFlush;
719 // Per draw stats
720 DepthStencilStats mDSSingleSample = {};
721 DepthStencilStats mDSSampleRate = {};
722 DepthStencilStats mDSPixelRate = {};
723 DepthStencilStats mDSCombined = {};
724 DepthStencilStats mDSNullPS = {};
725 DepthStencilStats mDSOmZ = {};
726 CStats mClipper = {};
727 TEStats mTS = {};
728 GSStateInfo mGS = {};
729 RastStats rastStats = {};
730 CullStats mCullStats = {};
731 AlphaStats mAlphaStats = {};
732
733 SWR_SHADER_STATS mShaderStats[NUM_SHADER_TYPES];
734
735 MemoryStats mMemoryStats = {};
736 uint64_t mAddressMask = 0;
737
738 };
739
740 static EventManager* FromHandle(HANDLE hThreadContext)
741 {
742 return reinterpret_cast<EventManager*>(hThreadContext);
743 }
744
745 // Construct an event manager and associate a handler with it.
746 HANDLE CreateThreadContext(AR_THREAD type)
747 {
748 // Can we assume single threaded here?
749 static std::atomic<uint32_t> counter(0);
750 uint32_t id = counter.fetch_add(1);
751
752 EventManager* pManager = new EventManager();
753
754 if (pManager)
755 {
756 EventHandlerFile* pHandler = nullptr;
757
758 if (type == AR_THREAD::API)
759 {
760 pHandler = new EventHandlerApiStats(id);
761 pManager->Attach(pHandler);
762 pHandler->Handle(ThreadStartApiEvent());
763 }
764 else
765 {
766 pHandler = new EventHandlerWorkerStats(id);
767 pManager->Attach(pHandler);
768 pHandler->Handle(ThreadStartWorkerEvent());
769 }
770
771 pHandler->MarkHeader();
772
773 return pManager;
774 }
775
776 SWR_INVALID("Failed to register thread.");
777 return nullptr;
778 }
779
780 void DestroyThreadContext(HANDLE hThreadContext)
781 {
782 EventManager* pManager = FromHandle(hThreadContext);
783 SWR_ASSERT(pManager != nullptr);
784
785 delete pManager;
786 }
787
788 // Dispatch event for this thread.
789 void Dispatch(HANDLE hThreadContext, const Event& event)
790 {
791 EventManager* pManager = FromHandle(hThreadContext);
792 SWR_ASSERT(pManager != nullptr);
793
794 pManager->Dispatch(event);
795 }
796
797 // Flush for this thread.
798 void FlushDraw(HANDLE hThreadContext, uint32_t drawId)
799 {
800 EventManager* pManager = FromHandle(hThreadContext);
801 SWR_ASSERT(pManager != nullptr);
802
803 pManager->FlushDraw(drawId);
804 }
805 } // namespace ArchRast