swr: [rasterizer archrast] Fix performance issue with archrast stats
[mesa.git] / src / gallium / drivers / swr / rasterizer / archrast / archrast.cpp
1 /****************************************************************************
2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file archrast.h
24 *
25 * @brief Definitions for archrast.
26 *
27 ******************************************************************************/
28 #include <atomic>
29
30 #include "common/os.h"
31 #include "archrast/archrast.h"
32 #include "archrast/eventmanager.h"
33 #include "gen_ar_eventhandlerfile.h"
34
35 namespace ArchRast
36 {
37 //////////////////////////////////////////////////////////////////////////
38 /// @brief struct that keeps track of depth and stencil event information
39 struct DepthStencilStats
40 {
41 uint32_t earlyZTestPassCount = 0;
42 uint32_t earlyZTestFailCount = 0;
43 uint32_t lateZTestPassCount = 0;
44 uint32_t lateZTestFailCount = 0;
45 uint32_t earlyStencilTestPassCount = 0;
46 uint32_t earlyStencilTestFailCount = 0;
47 uint32_t lateStencilTestPassCount = 0;
48 uint32_t lateStencilTestFailCount = 0;
49 uint32_t earlyZTestCount = 0;
50 uint32_t lateZTestCount = 0;
51 uint32_t earlyStencilTestCount = 0;
52 uint32_t lateStencilTestCount = 0;
53 };
54
55 struct CStats
56 {
57 uint32_t clippedVerts = 0;
58 };
59
60 struct TEStats
61 {
62 uint32_t inputPrims = 0;
63 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
64 };
65
66 struct GSStats
67 {
68 uint32_t inputPrimCount;
69 uint32_t primGeneratedCount;
70 uint32_t vertsInput;
71 };
72
73 //////////////////////////////////////////////////////////////////////////
74 /// @brief Event handler that saves stat events to event files. This
75 /// handler filters out unwanted events.
76 class EventHandlerStatsFile : public EventHandlerFile
77 {
78 public:
79 DepthStencilStats DSSingleSample = {};
80 DepthStencilStats DSSampleRate = {};
81 DepthStencilStats DSPixelRate = {};
82 DepthStencilStats DSNullPS = {};
83 DepthStencilStats DSOmZ = {};
84 CStats CS = {};
85 TEStats TS = {};
86 GSStats GS = {};
87
88 EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id) {}
89
90 // These are events that we're not interested in saving in stats event files.
91 virtual void Handle(Start event) {}
92 virtual void Handle(End event) {}
93
94 virtual void Handle(EarlyDepthStencilInfoSingleSample event)
95 {
96 //earlyZ test compute
97 DSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
98 DSSingleSample.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
99 DSSingleSample.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
100
101 //earlyStencil test compute
102 DSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
103 DSSingleSample.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
104 DSSingleSample.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
105
106 //outputerMerger test compute
107 DSOmZ.earlyZTestPassCount += DSSingleSample.earlyZTestPassCount;
108 DSOmZ.earlyZTestFailCount += DSSingleSample.earlyZTestFailCount;
109 DSOmZ.earlyZTestCount += DSSingleSample.earlyZTestCount;
110 DSOmZ.earlyStencilTestPassCount += DSSingleSample.earlyStencilTestPassCount;
111 DSOmZ.earlyStencilTestFailCount += DSSingleSample.earlyStencilTestFailCount;
112 DSOmZ.earlyStencilTestCount += DSSingleSample.earlyStencilTestCount;
113 }
114
115 virtual void Handle(EarlyDepthStencilInfoSampleRate event)
116 {
117 //earlyZ test compute
118 DSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
119 DSSampleRate.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
120 DSSampleRate.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
121
122 //earlyStencil test compute
123 DSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
124 DSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
125 DSSampleRate.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
126
127 //outputerMerger test compute
128 DSOmZ.earlyZTestPassCount += DSSampleRate.earlyZTestPassCount;
129 DSOmZ.earlyZTestFailCount += DSSampleRate.earlyZTestFailCount;
130 DSOmZ.earlyZTestCount += DSSampleRate.earlyZTestCount;
131 DSOmZ.earlyStencilTestPassCount += DSSampleRate.earlyStencilTestPassCount;
132 DSOmZ.earlyStencilTestFailCount += DSSampleRate.earlyStencilTestFailCount;
133 DSOmZ.earlyStencilTestCount += DSSampleRate.earlyStencilTestCount;
134 }
135
136 virtual void Handle(EarlyDepthStencilInfoNullPS event)
137 {
138 //earlyZ test compute
139 DSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
140 DSNullPS.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
141 DSNullPS.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
142
143 //earlyStencil test compute
144 DSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
145 DSNullPS.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
146 DSNullPS.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
147
148 //outputerMerger test compute
149 DSOmZ.earlyZTestPassCount += DSNullPS.earlyZTestPassCount;
150 DSOmZ.earlyZTestFailCount += DSNullPS.earlyZTestFailCount;
151 DSOmZ.earlyZTestCount += DSNullPS.earlyZTestCount;
152 DSOmZ.earlyStencilTestPassCount += DSNullPS.earlyStencilTestPassCount;
153 DSOmZ.earlyStencilTestFailCount += DSNullPS.earlyStencilTestFailCount;
154 DSOmZ.earlyStencilTestCount += DSNullPS.earlyStencilTestCount;
155 }
156
157 virtual void Handle(LateDepthStencilInfoSingleSample event)
158 {
159 //lateZ test compute
160 DSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
161 DSSingleSample.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
162 DSSingleSample.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
163
164 //lateStencil test compute
165 DSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
166 DSSingleSample.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
167 DSSingleSample.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
168
169 //outputerMerger test compute
170 DSOmZ.lateZTestPassCount += DSSingleSample.lateZTestPassCount;
171 DSOmZ.lateZTestFailCount += DSSingleSample.lateZTestFailCount;
172 DSOmZ.lateZTestCount += DSSingleSample.lateZTestCount;
173 DSOmZ.lateStencilTestPassCount += DSSingleSample.lateStencilTestPassCount;
174 DSOmZ.lateStencilTestFailCount += DSSingleSample.lateStencilTestFailCount;
175 DSOmZ.lateStencilTestCount += DSSingleSample.lateStencilTestCount;
176 }
177
178 virtual void Handle(LateDepthStencilInfoSampleRate event)
179 {
180 //lateZ test compute
181 DSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
182 DSSampleRate.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
183 DSSampleRate.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
184
185 //lateStencil test compute
186 DSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
187 DSSampleRate.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
188 DSSampleRate.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
189
190 //outputerMerger test compute
191 DSOmZ.lateZTestPassCount += DSSampleRate.lateZTestPassCount;
192 DSOmZ.lateZTestFailCount += DSSampleRate.lateZTestFailCount;
193 DSOmZ.lateZTestCount += DSSampleRate.lateZTestCount;
194 DSOmZ.lateStencilTestPassCount += DSSampleRate.lateStencilTestPassCount;
195 DSOmZ.lateStencilTestFailCount += DSSampleRate.lateStencilTestFailCount;
196 DSOmZ.lateStencilTestCount += DSSampleRate.lateStencilTestCount;
197 }
198
199 virtual void Handle(LateDepthStencilInfoNullPS event)
200 {
201 //lateZ test compute
202 DSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
203 DSNullPS.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
204 DSNullPS.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
205
206 //lateStencil test compute
207 DSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
208 DSNullPS.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
209 DSNullPS.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
210
211 //outputerMerger test compute
212 DSOmZ.lateZTestPassCount += DSNullPS.lateZTestPassCount;
213 DSOmZ.lateZTestFailCount += DSNullPS.lateZTestFailCount;
214 DSOmZ.lateZTestCount += DSNullPS.lateZTestCount;
215 DSOmZ.lateStencilTestPassCount += DSNullPS.lateStencilTestPassCount;
216 DSOmZ.lateStencilTestFailCount += DSNullPS.lateStencilTestFailCount;
217 DSOmZ.lateStencilTestCount += DSNullPS.lateStencilTestCount;
218 }
219
220 virtual void Handle(EarlyDepthInfoPixelRate event)
221 {
222 //earlyZ test compute
223 DSPixelRate.earlyZTestCount += _mm_popcnt_u32(event.data.activeLanes);
224 DSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
225 DSPixelRate.earlyZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
226
227 //outputerMerger test compute
228 DSOmZ.earlyZTestPassCount += DSPixelRate.earlyZTestPassCount;
229 DSOmZ.earlyZTestFailCount += DSPixelRate.earlyZTestFailCount;
230 DSOmZ.earlyZTestCount += DSPixelRate.earlyZTestCount;
231 }
232
233
234 virtual void Handle(LateDepthInfoPixelRate event)
235 {
236 //lateZ test compute
237 DSPixelRate.lateZTestCount += _mm_popcnt_u32(event.data.activeLanes);
238 DSPixelRate.lateZTestPassCount += event.data.depthPassCount;
239 DSPixelRate.lateZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
240
241 //outputerMerger test compute
242 DSOmZ.lateZTestPassCount += DSPixelRate.lateZTestPassCount;
243 DSOmZ.lateZTestFailCount += DSPixelRate.lateZTestFailCount;
244 DSOmZ.lateZTestCount += DSPixelRate.lateZTestCount;
245
246 }
247
248
249 virtual void Handle(BackendDrawEndEvent event)
250 {
251 //singleSample
252 EventHandlerFile::Handle(EarlyZSingleSample(event.data.drawId, DSSingleSample.earlyZTestPassCount, DSSingleSample.earlyZTestFailCount, DSSingleSample.earlyZTestCount));
253 EventHandlerFile::Handle(LateZSingleSample(event.data.drawId, DSSingleSample.lateZTestPassCount, DSSingleSample.lateZTestFailCount, DSSingleSample.lateZTestCount));
254 EventHandlerFile::Handle(EarlyStencilSingleSample(event.data.drawId, DSSingleSample.earlyStencilTestPassCount, DSSingleSample.earlyStencilTestFailCount, DSSingleSample.earlyStencilTestCount));
255 EventHandlerFile::Handle(LateStencilSingleSample(event.data.drawId, DSSingleSample.lateStencilTestPassCount, DSSingleSample.lateStencilTestFailCount, DSSingleSample.lateStencilTestCount));
256
257 //sampleRate
258 EventHandlerFile::Handle(EarlyZSampleRate(event.data.drawId, DSSampleRate.earlyZTestPassCount, DSSampleRate.earlyZTestFailCount, DSSampleRate.earlyZTestCount));
259 EventHandlerFile::Handle(LateZSampleRate(event.data.drawId, DSSampleRate.lateZTestPassCount, DSSampleRate.lateZTestFailCount, DSSampleRate.lateZTestCount));
260 EventHandlerFile::Handle(EarlyStencilSampleRate(event.data.drawId, DSSampleRate.earlyStencilTestPassCount, DSSampleRate.earlyStencilTestFailCount, DSSampleRate.earlyStencilTestCount));
261 EventHandlerFile::Handle(LateStencilSampleRate(event.data.drawId, DSSampleRate.lateStencilTestPassCount, DSSampleRate.lateStencilTestFailCount, DSSampleRate.lateStencilTestCount));
262
263 //pixelRate
264 EventHandlerFile::Handle(EarlyZPixelRate(event.data.drawId, DSPixelRate.earlyZTestPassCount, DSPixelRate.earlyZTestFailCount, DSPixelRate.earlyZTestCount));
265 EventHandlerFile::Handle(LateZPixelRate(event.data.drawId, DSPixelRate.lateZTestPassCount, DSPixelRate.lateZTestFailCount, DSPixelRate.lateZTestCount));
266
267
268 //NullPS
269 EventHandlerFile::Handle(EarlyZNullPS(event.data.drawId, DSNullPS.earlyZTestPassCount, DSNullPS.earlyZTestFailCount, DSNullPS.earlyZTestCount));
270 EventHandlerFile::Handle(EarlyStencilNullPS(event.data.drawId, DSNullPS.earlyStencilTestPassCount, DSNullPS.earlyStencilTestFailCount, DSNullPS.earlyStencilTestCount));
271
272 //OmZ
273 EventHandlerFile::Handle(EarlyOmZ(event.data.drawId, DSOmZ.earlyZTestPassCount, DSOmZ.earlyZTestFailCount, DSOmZ.earlyZTestCount));
274 EventHandlerFile::Handle(EarlyOmStencil(event.data.drawId, DSOmZ.earlyStencilTestPassCount, DSOmZ.earlyStencilTestFailCount, DSOmZ.earlyStencilTestCount));
275 EventHandlerFile::Handle(LateOmZ(event.data.drawId, DSOmZ.lateZTestPassCount, DSOmZ.lateZTestFailCount, DSOmZ.lateZTestCount));
276 EventHandlerFile::Handle(LateOmStencil(event.data.drawId, DSOmZ.lateStencilTestPassCount, DSOmZ.lateStencilTestFailCount, DSOmZ.lateStencilTestCount));
277
278 //Reset Internal Counters
279 DSSingleSample = {};
280 DSSampleRate = {};
281 DSPixelRate = {};
282 DSNullPS = {};
283 DSOmZ = {};
284 }
285
286 virtual void Handle(FrontendDrawEndEvent event)
287 {
288 //Clipper
289 EventHandlerFile::Handle(VertsClipped(event.data.drawId, CS.clippedVerts));
290
291 //Tesselator
292 EventHandlerFile::Handle(TessPrims(event.data.drawId, TS.inputPrims));
293
294 //Geometry Shader
295 EventHandlerFile::Handle(GSInputPrims(event.data.drawId, GS.inputPrimCount));
296 EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, GS.primGeneratedCount));
297 EventHandlerFile::Handle(GSVertsInput(event.data.drawId, GS.vertsInput));
298
299 //Reset Internal Counters
300 CS = {};
301 TS = {};
302 GS = {};
303 }
304
305 virtual void Handle(GSPrimInfo event)
306 {
307 GS.inputPrimCount += event.data.inputPrimCount;
308 GS.primGeneratedCount += event.data.primGeneratedCount;
309 GS.vertsInput += event.data.vertsInput;
310 }
311
312 virtual void Handle(ClipVertexCount event)
313 {
314 CS.clippedVerts += (_mm_popcnt_u32(event.data.primMask) * event.data.vertsPerPrim);
315 }
316
317 virtual void Handle(TessPrimCount event)
318 {
319 TS.inputPrims += event.data.primCount;
320 }
321 };
322
323 static EventManager* FromHandle(HANDLE hThreadContext)
324 {
325 return reinterpret_cast<EventManager*>(hThreadContext);
326 }
327
328 // Construct an event manager and associate a handler with it.
329 HANDLE CreateThreadContext(AR_THREAD type)
330 {
331 // Can we assume single threaded here?
332 static std::atomic<uint32_t> counter(0);
333 uint32_t id = counter.fetch_add(1);
334
335 EventManager* pManager = new EventManager();
336 EventHandlerFile* pHandler = new EventHandlerStatsFile(id);
337
338 if (pManager && pHandler)
339 {
340 pManager->Attach(pHandler);
341
342 if (type == AR_THREAD::API)
343 {
344 pHandler->Handle(ThreadStartApiEvent());
345 }
346 else
347 {
348 pHandler->Handle(ThreadStartWorkerEvent());
349 }
350 pHandler->MarkHeader();
351
352 return pManager;
353 }
354
355 SWR_ASSERT(0, "Failed to register thread.");
356 return nullptr;
357 }
358
359 void DestroyThreadContext(HANDLE hThreadContext)
360 {
361 EventManager* pManager = FromHandle(hThreadContext);
362 SWR_ASSERT(pManager != nullptr);
363
364 delete pManager;
365 }
366
367 // Dispatch event for this thread.
368 void Dispatch(HANDLE hThreadContext, Event& event)
369 {
370 EventManager* pManager = FromHandle(hThreadContext);
371 SWR_ASSERT(pManager != nullptr);
372
373 pManager->Dispatch(event);
374 }
375 }