swr: fix build with mingw
[mesa.git] / src / gallium / drivers / swr / rasterizer / archrast / archrast.cpp
1 /****************************************************************************
2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file archrast.cpp
24 *
25 * @brief Implementation for archrast.
26 *
27 ******************************************************************************/
28 #include <sys/stat.h>
29
30 #include <atomic>
31 #include <map>
32
33 #include "common/os.h"
34 #include "archrast/archrast.h"
35 #include "archrast/eventmanager.h"
36 #include "gen_ar_event.hpp"
37 #include "gen_ar_eventhandlerfile.hpp"
38
39 namespace ArchRast
40 {
41 //////////////////////////////////////////////////////////////////////////
42 /// @brief struct that keeps track of depth and stencil event information
43 struct DepthStencilStats
44 {
45 uint32_t earlyZTestPassCount = 0;
46 uint32_t earlyZTestFailCount = 0;
47 uint32_t lateZTestPassCount = 0;
48 uint32_t lateZTestFailCount = 0;
49 uint32_t earlyStencilTestPassCount = 0;
50 uint32_t earlyStencilTestFailCount = 0;
51 uint32_t lateStencilTestPassCount = 0;
52 uint32_t lateStencilTestFailCount = 0;
53 };
54
55 struct CStats
56 {
57 uint32_t trivialRejectCount;
58 uint32_t trivialAcceptCount;
59 uint32_t mustClipCount;
60 };
61
62 struct TEStats
63 {
64 uint32_t inputPrims = 0;
65 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
66 };
67
68 struct GSStateInfo
69 {
70 uint32_t inputPrimCount;
71 uint32_t primGeneratedCount;
72 uint32_t vertsInput;
73 };
74
75 struct RastStats
76 {
77 uint32_t rasterTiles = 0;
78 };
79
80 struct CullStats
81 {
82 uint32_t degeneratePrimCount = 0;
83 uint32_t backfacePrimCount = 0;
84 };
85
86 struct AlphaStats
87 {
88 uint32_t alphaTestCount = 0;
89 uint32_t alphaBlendCount = 0;
90 };
91
92
93 //////////////////////////////////////////////////////////////////////////
94 /// @brief Event handler that handles API thread events. This is shared
95 /// between the API and its caller (e.g. driver shim) but typically
96 /// there is only a single API thread per context. So you can save
97 /// information in the class to be used for other events.
98 class EventHandlerApiStats : public EventHandlerFile
99 {
100 public:
101 EventHandlerApiStats(uint32_t id) : EventHandlerFile(id)
102 {
103 #if defined(_WIN32)
104 // Attempt to copy the events.proto file to the ArchRast output dir. It's common for
105 // tools to place the events.proto file in the DEBUG_OUTPUT_DIR when launching AR. If it
106 // exists, this will attempt to copy it the first time we get here to package it with
107 // the stats. Otherwise, the user would need to specify the events.proto location when
108 // parsing the stats in post.
109 std::stringstream eventsProtoSrcFilename, eventsProtoDstFilename;
110 eventsProtoSrcFilename << KNOB_DEBUG_OUTPUT_DIR << "\\events.proto" << std::ends;
111 eventsProtoDstFilename << mOutputDir.substr(0, mOutputDir.size() - 1)
112 << "\\events.proto" << std::ends;
113
114 // If event.proto already exists, we're done; else do the copy
115 struct stat buf; // Use a Posix stat for file existence check
116 if (!stat(eventsProtoDstFilename.str().c_str(), &buf) == 0)
117 {
118 // Now check to make sure the events.proto source exists
119 if (stat(eventsProtoSrcFilename.str().c_str(), &buf) == 0)
120 {
121 std::ifstream srcFile;
122 srcFile.open(eventsProtoSrcFilename.str().c_str(), std::ios::binary);
123 if (srcFile.is_open())
124 {
125 // Just do a binary buffer copy
126 std::ofstream dstFile;
127 dstFile.open(eventsProtoDstFilename.str().c_str(), std::ios::binary);
128 dstFile << srcFile.rdbuf();
129 dstFile.close();
130 }
131 srcFile.close();
132 }
133 }
134 #endif
135 }
136
137 virtual void Handle(const DrawInstancedEvent& event)
138 {
139 DrawInfoEvent e(event.data.drawId,
140 ArchRast::Instanced,
141 event.data.topology,
142 event.data.numVertices,
143 0,
144 0,
145 event.data.startVertex,
146 event.data.numInstances,
147 event.data.startInstance,
148 event.data.tsEnable,
149 event.data.gsEnable,
150 event.data.soEnable,
151 event.data.soTopology,
152 event.data.splitId);
153
154 EventHandlerFile::Handle(e);
155 }
156
157 virtual void Handle(const DrawIndexedInstancedEvent& event)
158 {
159 DrawInfoEvent e(event.data.drawId,
160 ArchRast::IndexedInstanced,
161 event.data.topology,
162 0,
163 event.data.numIndices,
164 event.data.indexOffset,
165 event.data.baseVertex,
166 event.data.numInstances,
167 event.data.startInstance,
168 event.data.tsEnable,
169 event.data.gsEnable,
170 event.data.soEnable,
171 event.data.soTopology,
172 event.data.splitId);
173
174 EventHandlerFile::Handle(e);
175 }
176 };
177
178 //////////////////////////////////////////////////////////////////////////
179 /// @brief Event handler that handles worker thread events. There is one
180 /// event handler per thread. The python script will need to sum
181 /// up counters across all of the threads.
182 class EventHandlerWorkerStats : public EventHandlerFile
183 {
184 public:
185 EventHandlerWorkerStats(uint32_t id) : EventHandlerFile(id), mNeedFlush(false)
186 {
187 memset(mShaderStats, 0, sizeof(mShaderStats));
188 }
189
190 virtual void Handle(const EarlyDepthStencilInfoSingleSample& event)
191 {
192 // earlyZ test compute
193 mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
194 mDSSingleSample.earlyZTestFailCount +=
195 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
196
197 // earlyStencil test compute
198 mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
199 mDSSingleSample.earlyStencilTestFailCount +=
200 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
201
202 // earlyZ test single and multi sample
203 mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
204 mDSCombined.earlyZTestFailCount +=
205 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
206
207 // earlyStencil test single and multi sample
208 mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
209 mDSCombined.earlyStencilTestFailCount +=
210 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
211
212 mNeedFlush = true;
213 }
214
215 virtual void Handle(const EarlyDepthStencilInfoSampleRate& event)
216 {
217 // earlyZ test compute
218 mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
219 mDSSampleRate.earlyZTestFailCount +=
220 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
221
222 // earlyStencil test compute
223 mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
224 mDSSampleRate.earlyStencilTestFailCount +=
225 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
226
227 // earlyZ test single and multi sample
228 mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
229 mDSCombined.earlyZTestFailCount +=
230 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
231
232 // earlyStencil test single and multi sample
233 mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
234 mDSCombined.earlyStencilTestFailCount +=
235 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
236
237 mNeedFlush = true;
238 }
239
240 virtual void Handle(const EarlyDepthStencilInfoNullPS& event)
241 {
242 // earlyZ test compute
243 mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
244 mDSNullPS.earlyZTestFailCount +=
245 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
246
247 // earlyStencil test compute
248 mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
249 mDSNullPS.earlyStencilTestFailCount +=
250 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
251 mNeedFlush = true;
252 }
253
254 virtual void Handle(const LateDepthStencilInfoSingleSample& event)
255 {
256 // lateZ test compute
257 mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
258 mDSSingleSample.lateZTestFailCount +=
259 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
260
261 // lateStencil test compute
262 mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
263 mDSSingleSample.lateStencilTestFailCount +=
264 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
265
266 // lateZ test single and multi sample
267 mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
268 mDSCombined.lateZTestFailCount +=
269 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
270
271 // lateStencil test single and multi sample
272 mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
273 mDSCombined.lateStencilTestFailCount +=
274 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
275
276 mNeedFlush = true;
277 }
278
279 virtual void Handle(const LateDepthStencilInfoSampleRate& event)
280 {
281 // lateZ test compute
282 mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
283 mDSSampleRate.lateZTestFailCount +=
284 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
285
286 // lateStencil test compute
287 mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
288 mDSSampleRate.lateStencilTestFailCount +=
289 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
290
291 // lateZ test single and multi sample
292 mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
293 mDSCombined.lateZTestFailCount +=
294 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
295
296 // lateStencil test single and multi sample
297 mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
298 mDSCombined.lateStencilTestFailCount +=
299 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
300
301 mNeedFlush = true;
302 }
303
304 virtual void Handle(const LateDepthStencilInfoNullPS& event)
305 {
306 // lateZ test compute
307 mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
308 mDSNullPS.lateZTestFailCount +=
309 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
310
311 // lateStencil test compute
312 mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
313 mDSNullPS.lateStencilTestFailCount +=
314 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
315 mNeedFlush = true;
316 }
317
318 virtual void Handle(const EarlyDepthInfoPixelRate& event)
319 {
320 // earlyZ test compute
321 mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
322 mDSPixelRate.earlyZTestFailCount +=
323 (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
324 mNeedFlush = true;
325 }
326
327
328 virtual void Handle(const LateDepthInfoPixelRate& event)
329 {
330 // lateZ test compute
331 mDSPixelRate.lateZTestPassCount += event.data.depthPassCount;
332 mDSPixelRate.lateZTestFailCount +=
333 (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
334 mNeedFlush = true;
335 }
336
337
338 virtual void Handle(const ClipInfoEvent& event)
339 {
340 mClipper.mustClipCount += _mm_popcnt_u32(event.data.clipMask);
341 mClipper.trivialRejectCount +=
342 event.data.numInvocations - _mm_popcnt_u32(event.data.validMask);
343 mClipper.trivialAcceptCount +=
344 _mm_popcnt_u32(event.data.validMask & ~event.data.clipMask);
345 }
346
347 void UpdateStats(SWR_SHADER_STATS* pStatTotals, const SWR_SHADER_STATS* pStatUpdate)
348 {
349 pStatTotals->numInstExecuted += pStatUpdate->numInstExecuted;
350 pStatTotals->numSampleExecuted += pStatUpdate->numSampleExecuted;
351 pStatTotals->numSampleLExecuted += pStatUpdate->numSampleLExecuted;
352 pStatTotals->numSampleBExecuted += pStatUpdate->numSampleBExecuted;
353 pStatTotals->numSampleCExecuted += pStatUpdate->numSampleCExecuted;
354 pStatTotals->numSampleCLZExecuted += pStatUpdate->numSampleCLZExecuted;
355 pStatTotals->numSampleCDExecuted += pStatUpdate->numSampleCDExecuted;
356 pStatTotals->numGather4Executed += pStatUpdate->numGather4Executed;
357 pStatTotals->numGather4CExecuted += pStatUpdate->numGather4CExecuted;
358 pStatTotals->numGather4CPOExecuted += pStatUpdate->numGather4CPOExecuted;
359 pStatTotals->numGather4CPOCExecuted += pStatUpdate->numGather4CPOCExecuted;
360 pStatTotals->numLodExecuted += pStatUpdate->numLodExecuted;
361 }
362
363 virtual void Handle(const VSStats& event)
364 {
365 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
366 UpdateStats(&mShaderStats[SHADER_VERTEX], pStats);
367 }
368
369 virtual void Handle(const GSStats& event)
370 {
371 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
372 UpdateStats(&mShaderStats[SHADER_GEOMETRY], pStats);
373 }
374
375 virtual void Handle(const DSStats& event)
376 {
377 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
378 UpdateStats(&mShaderStats[SHADER_DOMAIN], pStats);
379 }
380
381 virtual void Handle(const HSStats& event)
382 {
383 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
384 UpdateStats(&mShaderStats[SHADER_HULL], pStats);
385 }
386
387 virtual void Handle(const PSStats& event)
388 {
389 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
390 UpdateStats(&mShaderStats[SHADER_PIXEL], pStats);
391 mNeedFlush = true;
392 }
393
394 virtual void Handle(const CSStats& event)
395 {
396 SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
397 UpdateStats(&mShaderStats[SHADER_COMPUTE], pStats);
398 mNeedFlush = true;
399 }
400
401 // Flush cached events for this draw
402 virtual void FlushDraw(uint32_t drawId)
403 {
404 if (mNeedFlush == false)
405 return;
406
407 EventHandlerFile::Handle(PSInfo(drawId,
408 mShaderStats[SHADER_PIXEL].numInstExecuted,
409 mShaderStats[SHADER_PIXEL].numSampleExecuted,
410 mShaderStats[SHADER_PIXEL].numSampleLExecuted,
411 mShaderStats[SHADER_PIXEL].numSampleBExecuted,
412 mShaderStats[SHADER_PIXEL].numSampleCExecuted,
413 mShaderStats[SHADER_PIXEL].numSampleCLZExecuted,
414 mShaderStats[SHADER_PIXEL].numSampleCDExecuted,
415 mShaderStats[SHADER_PIXEL].numGather4Executed,
416 mShaderStats[SHADER_PIXEL].numGather4CExecuted,
417 mShaderStats[SHADER_PIXEL].numGather4CPOExecuted,
418 mShaderStats[SHADER_PIXEL].numGather4CPOCExecuted,
419 mShaderStats[SHADER_PIXEL].numLodExecuted));
420 EventHandlerFile::Handle(CSInfo(drawId,
421 mShaderStats[SHADER_COMPUTE].numInstExecuted,
422 mShaderStats[SHADER_COMPUTE].numSampleExecuted,
423 mShaderStats[SHADER_COMPUTE].numSampleLExecuted,
424 mShaderStats[SHADER_COMPUTE].numSampleBExecuted,
425 mShaderStats[SHADER_COMPUTE].numSampleCExecuted,
426 mShaderStats[SHADER_COMPUTE].numSampleCLZExecuted,
427 mShaderStats[SHADER_COMPUTE].numSampleCDExecuted,
428 mShaderStats[SHADER_COMPUTE].numGather4Executed,
429 mShaderStats[SHADER_COMPUTE].numGather4CExecuted,
430 mShaderStats[SHADER_COMPUTE].numGather4CPOExecuted,
431 mShaderStats[SHADER_COMPUTE].numGather4CPOCExecuted,
432 mShaderStats[SHADER_COMPUTE].numLodExecuted));
433
434 // singleSample
435 EventHandlerFile::Handle(EarlyZSingleSample(
436 drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
437 EventHandlerFile::Handle(LateZSingleSample(
438 drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
439 EventHandlerFile::Handle(
440 EarlyStencilSingleSample(drawId,
441 mDSSingleSample.earlyStencilTestPassCount,
442 mDSSingleSample.earlyStencilTestFailCount));
443 EventHandlerFile::Handle(
444 LateStencilSingleSample(drawId,
445 mDSSingleSample.lateStencilTestPassCount,
446 mDSSingleSample.lateStencilTestFailCount));
447
448 // sampleRate
449 EventHandlerFile::Handle(EarlyZSampleRate(
450 drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
451 EventHandlerFile::Handle(LateZSampleRate(
452 drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
453 EventHandlerFile::Handle(
454 EarlyStencilSampleRate(drawId,
455 mDSSampleRate.earlyStencilTestPassCount,
456 mDSSampleRate.earlyStencilTestFailCount));
457 EventHandlerFile::Handle(LateStencilSampleRate(drawId,
458 mDSSampleRate.lateStencilTestPassCount,
459 mDSSampleRate.lateStencilTestFailCount));
460
461 // combined
462 EventHandlerFile::Handle(
463 EarlyZ(drawId, mDSCombined.earlyZTestPassCount, mDSCombined.earlyZTestFailCount));
464 EventHandlerFile::Handle(
465 LateZ(drawId, mDSCombined.lateZTestPassCount, mDSCombined.lateZTestFailCount));
466 EventHandlerFile::Handle(EarlyStencil(drawId,
467 mDSCombined.earlyStencilTestPassCount,
468 mDSCombined.earlyStencilTestFailCount));
469 EventHandlerFile::Handle(LateStencil(drawId,
470 mDSCombined.lateStencilTestPassCount,
471 mDSCombined.lateStencilTestFailCount));
472
473 // pixelRate
474 EventHandlerFile::Handle(EarlyZPixelRate(
475 drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
476 EventHandlerFile::Handle(LateZPixelRate(
477 drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
478
479
480 // NullPS
481 EventHandlerFile::Handle(
482 EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
483 EventHandlerFile::Handle(EarlyStencilNullPS(
484 drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
485
486 // Rasterized Subspans
487 EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles));
488
489 // Alpha Subspans
490 EventHandlerFile::Handle(
491 AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount));
492
493 // Primitive Culling
494 EventHandlerFile::Handle(
495 CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));
496
497 mDSSingleSample = {};
498 mDSSampleRate = {};
499 mDSCombined = {};
500 mDSPixelRate = {};
501 mDSNullPS = {};
502
503 rastStats = {};
504 mCullStats = {};
505 mAlphaStats = {};
506
507 mShaderStats[SHADER_PIXEL] = {};
508 mShaderStats[SHADER_COMPUTE] = {};
509
510 mNeedFlush = false;
511 }
512
513 virtual void Handle(const FrontendDrawEndEvent& event)
514 {
515 // Clipper
516 EventHandlerFile::Handle(ClipperEvent(event.data.drawId,
517 mClipper.trivialRejectCount,
518 mClipper.trivialAcceptCount,
519 mClipper.mustClipCount));
520
521 // Tesselator
522 EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims));
523
524 // Geometry Shader
525 EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount));
526 EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
527 EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));
528
529 EventHandlerFile::Handle(VSInfo(event.data.drawId,
530 mShaderStats[SHADER_VERTEX].numInstExecuted,
531 mShaderStats[SHADER_VERTEX].numSampleExecuted,
532 mShaderStats[SHADER_VERTEX].numSampleLExecuted,
533 mShaderStats[SHADER_VERTEX].numSampleBExecuted,
534 mShaderStats[SHADER_VERTEX].numSampleCExecuted,
535 mShaderStats[SHADER_VERTEX].numSampleCLZExecuted,
536 mShaderStats[SHADER_VERTEX].numSampleCDExecuted,
537 mShaderStats[SHADER_VERTEX].numGather4Executed,
538 mShaderStats[SHADER_VERTEX].numGather4CExecuted,
539 mShaderStats[SHADER_VERTEX].numGather4CPOExecuted,
540 mShaderStats[SHADER_VERTEX].numGather4CPOCExecuted,
541 mShaderStats[SHADER_VERTEX].numLodExecuted));
542 EventHandlerFile::Handle(HSInfo(event.data.drawId,
543 mShaderStats[SHADER_HULL].numInstExecuted,
544 mShaderStats[SHADER_HULL].numSampleExecuted,
545 mShaderStats[SHADER_HULL].numSampleLExecuted,
546 mShaderStats[SHADER_HULL].numSampleBExecuted,
547 mShaderStats[SHADER_HULL].numSampleCExecuted,
548 mShaderStats[SHADER_HULL].numSampleCLZExecuted,
549 mShaderStats[SHADER_HULL].numSampleCDExecuted,
550 mShaderStats[SHADER_HULL].numGather4Executed,
551 mShaderStats[SHADER_HULL].numGather4CExecuted,
552 mShaderStats[SHADER_HULL].numGather4CPOExecuted,
553 mShaderStats[SHADER_HULL].numGather4CPOCExecuted,
554 mShaderStats[SHADER_HULL].numLodExecuted));
555 EventHandlerFile::Handle(DSInfo(event.data.drawId,
556 mShaderStats[SHADER_DOMAIN].numInstExecuted,
557 mShaderStats[SHADER_DOMAIN].numSampleExecuted,
558 mShaderStats[SHADER_DOMAIN].numSampleLExecuted,
559 mShaderStats[SHADER_DOMAIN].numSampleBExecuted,
560 mShaderStats[SHADER_DOMAIN].numSampleCExecuted,
561 mShaderStats[SHADER_DOMAIN].numSampleCLZExecuted,
562 mShaderStats[SHADER_DOMAIN].numSampleCDExecuted,
563 mShaderStats[SHADER_DOMAIN].numGather4Executed,
564 mShaderStats[SHADER_DOMAIN].numGather4CExecuted,
565 mShaderStats[SHADER_DOMAIN].numGather4CPOExecuted,
566 mShaderStats[SHADER_DOMAIN].numGather4CPOCExecuted,
567 mShaderStats[SHADER_DOMAIN].numLodExecuted));
568 EventHandlerFile::Handle(GSInfo(event.data.drawId,
569 mShaderStats[SHADER_GEOMETRY].numInstExecuted,
570 mShaderStats[SHADER_GEOMETRY].numSampleExecuted,
571 mShaderStats[SHADER_GEOMETRY].numSampleLExecuted,
572 mShaderStats[SHADER_GEOMETRY].numSampleBExecuted,
573 mShaderStats[SHADER_GEOMETRY].numSampleCExecuted,
574 mShaderStats[SHADER_GEOMETRY].numSampleCLZExecuted,
575 mShaderStats[SHADER_GEOMETRY].numSampleCDExecuted,
576 mShaderStats[SHADER_GEOMETRY].numGather4Executed,
577 mShaderStats[SHADER_GEOMETRY].numGather4CExecuted,
578 mShaderStats[SHADER_GEOMETRY].numGather4CPOExecuted,
579 mShaderStats[SHADER_GEOMETRY].numGather4CPOCExecuted,
580 mShaderStats[SHADER_GEOMETRY].numLodExecuted));
581
582 mShaderStats[SHADER_VERTEX] = {};
583 mShaderStats[SHADER_HULL] = {};
584 mShaderStats[SHADER_DOMAIN] = {};
585 mShaderStats[SHADER_GEOMETRY] = {};
586
587 // Reset Internal Counters
588 mClipper = {};
589 mTS = {};
590 mGS = {};
591 }
592
593 virtual void Handle(const GSPrimInfo& event)
594 {
595 mGS.inputPrimCount += event.data.inputPrimCount;
596 mGS.primGeneratedCount += event.data.primGeneratedCount;
597 mGS.vertsInput += event.data.vertsInput;
598 }
599
600 virtual void Handle(const TessPrimCount& event) { mTS.inputPrims += event.data.primCount; }
601
602 virtual void Handle(const RasterTileCount& event)
603 {
604 rastStats.rasterTiles += event.data.rasterTiles;
605 }
606
607 virtual void Handle(const CullInfoEvent& event)
608 {
609 mCullStats.degeneratePrimCount += _mm_popcnt_u32(
610 event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask));
611 mCullStats.backfacePrimCount += _mm_popcnt_u32(
612 event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask));
613 }
614
615 virtual void Handle(const AlphaInfoEvent& event)
616 {
617 mAlphaStats.alphaTestCount += event.data.alphaTestEnable;
618 mAlphaStats.alphaBlendCount += event.data.alphaBlendEnable;
619 }
620
621 protected:
622 bool mNeedFlush;
623 // Per draw stats
624 DepthStencilStats mDSSingleSample = {};
625 DepthStencilStats mDSSampleRate = {};
626 DepthStencilStats mDSPixelRate = {};
627 DepthStencilStats mDSCombined = {};
628 DepthStencilStats mDSNullPS = {};
629 DepthStencilStats mDSOmZ = {};
630 CStats mClipper = {};
631 TEStats mTS = {};
632 GSStateInfo mGS = {};
633 RastStats rastStats = {};
634 CullStats mCullStats = {};
635 AlphaStats mAlphaStats = {};
636
637 SWR_SHADER_STATS mShaderStats[NUM_SHADER_TYPES];
638
639 };
640
641 static EventManager* FromHandle(HANDLE hThreadContext)
642 {
643 return reinterpret_cast<EventManager*>(hThreadContext);
644 }
645
646 // Construct an event manager and associate a handler with it.
647 HANDLE CreateThreadContext(AR_THREAD type)
648 {
649 // Can we assume single threaded here?
650 static std::atomic<uint32_t> counter(0);
651 uint32_t id = counter.fetch_add(1);
652
653 EventManager* pManager = new EventManager();
654
655 if (pManager)
656 {
657 EventHandlerFile* pHandler = nullptr;
658
659 if (type == AR_THREAD::API)
660 {
661 pHandler = new EventHandlerApiStats(id);
662 pManager->Attach(pHandler);
663 pHandler->Handle(ThreadStartApiEvent());
664 }
665 else
666 {
667 pHandler = new EventHandlerWorkerStats(id);
668 pManager->Attach(pHandler);
669 pHandler->Handle(ThreadStartWorkerEvent());
670 }
671
672 pHandler->MarkHeader();
673
674 return pManager;
675 }
676
677 SWR_INVALID("Failed to register thread.");
678 return nullptr;
679 }
680
681 void DestroyThreadContext(HANDLE hThreadContext)
682 {
683 EventManager* pManager = FromHandle(hThreadContext);
684 SWR_ASSERT(pManager != nullptr);
685
686 delete pManager;
687 }
688
689 // Dispatch event for this thread.
690 void Dispatch(HANDLE hThreadContext, const Event& event)
691 {
692 if (event.IsEnabled())
693 {
694 EventManager* pManager = reinterpret_cast<EventManager*>(hThreadContext);
695 SWR_ASSERT(pManager != nullptr);
696 pManager->Dispatch(event);
697 }
698 }
699
700 // Flush for this thread.
701 void FlushDraw(HANDLE hThreadContext, uint32_t drawId)
702 {
703 EventManager* pManager = FromHandle(hThreadContext);
704 SWR_ASSERT(pManager != nullptr);
705
706 pManager->FlushDraw(drawId);
707 }
708 } // namespace ArchRast