swr/rast: Refactor api and worker event handlers.
[mesa.git] / src / gallium / drivers / swr / rasterizer / archrast / archrast.cpp
1 /****************************************************************************
2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file archrast.cpp
24 *
25 * @brief Implementation for archrast.
26 *
27 ******************************************************************************/
28 #include <atomic>
29
30 #include "common/os.h"
31 #include "archrast/archrast.h"
32 #include "archrast/eventmanager.h"
33 #include "gen_ar_eventhandlerfile.hpp"
34
35 namespace ArchRast
36 {
37 //////////////////////////////////////////////////////////////////////////
38 /// @brief struct that keeps track of depth and stencil event information
39 struct DepthStencilStats
40 {
41 uint32_t earlyZTestPassCount = 0;
42 uint32_t earlyZTestFailCount = 0;
43 uint32_t lateZTestPassCount = 0;
44 uint32_t lateZTestFailCount = 0;
45 uint32_t earlyStencilTestPassCount = 0;
46 uint32_t earlyStencilTestFailCount = 0;
47 uint32_t lateStencilTestPassCount = 0;
48 uint32_t lateStencilTestFailCount = 0;
49 };
50
51 struct CStats
52 {
53 uint32_t trivialRejectCount;
54 uint32_t trivialAcceptCount;
55 uint32_t mustClipCount;
56 };
57
58 struct TEStats
59 {
60 uint32_t inputPrims = 0;
61 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
62 };
63
64 struct GSStats
65 {
66 uint32_t inputPrimCount;
67 uint32_t primGeneratedCount;
68 uint32_t vertsInput;
69 };
70
71 struct RastStats
72 {
73 uint32_t rasterTiles = 0;
74 };
75
76 //////////////////////////////////////////////////////////////////////////
77 /// @brief Event handler that handles API thread events. This is shared
78 /// between the API and its caller (e.g. driver shim) but typically
79 /// there is only a single API thread per context. So you can save
80 /// information in the class to be used for other events.
81 class EventHandlerApiStats : public EventHandlerFile
82 {
83 public:
84 EventHandlerApiStats(uint32_t id) : EventHandlerFile(id) {}
85
86 virtual void Handle(const DrawInstancedEvent& event)
87 {
88 DrawInfoEvent e(event.data.drawId, ArchRast::Instanced, event.data.topology, event.data.numVertices, 0, 0, event.data.startVertex, event.data.numInstances, event.data.startInstance);
89
90 EventHandlerFile::Handle(e);
91 }
92
93 virtual void Handle(const DrawIndexedInstancedEvent& event)
94 {
95 DrawInfoEvent e(event.data.drawId, ArchRast::IndexedInstanced, event.data.topology, 0, event.data.numIndices, event.data.indexOffset, event.data.baseVertex, event.data.numInstances, event.data.startInstance);
96
97 EventHandlerFile::Handle(e);
98 }
99
100 virtual void Handle(const DrawInstancedSplitEvent& event)
101 {
102 DrawInfoEvent e(event.data.drawId, ArchRast::InstancedSplit, 0, 0, 0, 0, 0, 0, 0);
103
104 EventHandlerFile::Handle(e);
105 }
106
107 virtual void Handle(const DrawIndexedInstancedSplitEvent& event)
108 {
109 DrawInfoEvent e(event.data.drawId, ArchRast::IndexedInstancedSplit, 0, 0, 0, 0, 0, 0, 0);
110
111 EventHandlerFile::Handle(e);
112 }
113 };
114
115 //////////////////////////////////////////////////////////////////////////
116 /// @brief Event handler that handles worker thread events. There is one
117 /// event handler per thread. The python script will need to sum
118 /// up counters across all of the threads.
119 class EventHandlerWorkerStats : public EventHandlerFile
120 {
121 public:
122 EventHandlerWorkerStats(uint32_t id) : EventHandlerFile(id), mNeedFlush(false) {}
123
124 virtual void Handle(const EarlyDepthStencilInfoSingleSample& event)
125 {
126 //earlyZ test compute
127 mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
128 mDSSingleSample.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
129
130 //earlyStencil test compute
131 mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
132 mDSSingleSample.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
133
134 //earlyZ test single and multi sample
135 mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
136 mDSCombined.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
137
138 //earlyStencil test single and multi sample
139 mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
140 mDSCombined.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
141
142 mNeedFlush = true;
143 }
144
145 virtual void Handle(const EarlyDepthStencilInfoSampleRate& event)
146 {
147 //earlyZ test compute
148 mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
149 mDSSampleRate.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
150
151 //earlyStencil test compute
152 mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
153 mDSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
154
155 //earlyZ test single and multi sample
156 mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
157 mDSCombined.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
158
159 //earlyStencil test single and multi sample
160 mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
161 mDSCombined.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
162
163 mNeedFlush = true;
164 }
165
166 virtual void Handle(const EarlyDepthStencilInfoNullPS& event)
167 {
168 //earlyZ test compute
169 mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
170 mDSNullPS.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
171
172 //earlyStencil test compute
173 mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
174 mDSNullPS.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
175 mNeedFlush = true;
176 }
177
178 virtual void Handle(const LateDepthStencilInfoSingleSample& event)
179 {
180 //lateZ test compute
181 mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
182 mDSSingleSample.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
183
184 //lateStencil test compute
185 mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
186 mDSSingleSample.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
187
188 //lateZ test single and multi sample
189 mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
190 mDSCombined.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
191
192 //lateStencil test single and multi sample
193 mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
194 mDSCombined.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
195
196 mNeedFlush = true;
197 }
198
199 virtual void Handle(const LateDepthStencilInfoSampleRate& event)
200 {
201 //lateZ test compute
202 mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
203 mDSSampleRate.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
204
205 //lateStencil test compute
206 mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
207 mDSSampleRate.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
208
209
210 //lateZ test single and multi sample
211 mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
212 mDSCombined.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
213
214 //lateStencil test single and multi sample
215 mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
216 mDSCombined.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
217
218 mNeedFlush = true;
219 }
220
221 virtual void Handle(const LateDepthStencilInfoNullPS& event)
222 {
223 //lateZ test compute
224 mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
225 mDSNullPS.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
226
227 //lateStencil test compute
228 mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
229 mDSNullPS.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
230 mNeedFlush = true;
231 }
232
233 virtual void Handle(const EarlyDepthInfoPixelRate& event)
234 {
235 //earlyZ test compute
236 mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
237 mDSPixelRate.earlyZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
238 mNeedFlush = true;
239 }
240
241
242 virtual void Handle(const LateDepthInfoPixelRate& event)
243 {
244 //lateZ test compute
245 mDSPixelRate.lateZTestPassCount += event.data.depthPassCount;
246 mDSPixelRate.lateZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
247 mNeedFlush = true;
248 }
249
250
251 virtual void Handle(const ClipInfoEvent& event)
252 {
253 mClipper.mustClipCount += _mm_popcnt_u32(event.data.clipMask);
254 mClipper.trivialRejectCount += event.data.numInvocations - _mm_popcnt_u32(event.data.validMask);
255 mClipper.trivialAcceptCount += _mm_popcnt_u32(event.data.validMask & ~event.data.clipMask);
256 }
257
258 // Flush cached events for this draw
259 virtual void FlushDraw(uint32_t drawId)
260 {
261 if (mNeedFlush == false) return;
262
263 //singleSample
264 EventHandlerFile::Handle(EarlyZSingleSample(drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
265 EventHandlerFile::Handle(LateZSingleSample(drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
266 EventHandlerFile::Handle(EarlyStencilSingleSample(drawId, mDSSingleSample.earlyStencilTestPassCount, mDSSingleSample.earlyStencilTestFailCount));
267 EventHandlerFile::Handle(LateStencilSingleSample(drawId, mDSSingleSample.lateStencilTestPassCount, mDSSingleSample.lateStencilTestFailCount));
268
269 //sampleRate
270 EventHandlerFile::Handle(EarlyZSampleRate(drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
271 EventHandlerFile::Handle(LateZSampleRate(drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
272 EventHandlerFile::Handle(EarlyStencilSampleRate(drawId, mDSSampleRate.earlyStencilTestPassCount, mDSSampleRate.earlyStencilTestFailCount));
273 EventHandlerFile::Handle(LateStencilSampleRate(drawId, mDSSampleRate.lateStencilTestPassCount, mDSSampleRate.lateStencilTestFailCount));
274
275 //combined
276 EventHandlerFile::Handle(EarlyZ(drawId, mDSCombined.earlyZTestPassCount, mDSCombined.earlyZTestFailCount));
277 EventHandlerFile::Handle(LateZ(drawId, mDSCombined.lateZTestPassCount, mDSCombined.lateZTestFailCount));
278 EventHandlerFile::Handle(EarlyStencil(drawId, mDSCombined.earlyStencilTestPassCount, mDSCombined.earlyStencilTestFailCount));
279 EventHandlerFile::Handle(LateStencil(drawId, mDSCombined.lateStencilTestPassCount, mDSCombined.lateStencilTestFailCount));
280
281 //pixelRate
282 EventHandlerFile::Handle(EarlyZPixelRate(drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
283 EventHandlerFile::Handle(LateZPixelRate(drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
284
285
286 //NullPS
287 EventHandlerFile::Handle(EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
288 EventHandlerFile::Handle(EarlyStencilNullPS(drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
289
290 // Rasterized Subspans
291 EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles));
292
293 //Reset Internal Counters
294 mDSSingleSample = {};
295 mDSSampleRate = {};
296 mDSCombined = {};
297 mDSPixelRate = {};
298 mDSNullPS = {};
299
300 rastStats = {};
301 mNeedFlush = false;
302 }
303
304 virtual void Handle(const FrontendDrawEndEvent& event)
305 {
306 //Clipper
307 EventHandlerFile::Handle(ClipperEvent(event.data.drawId, mClipper.trivialRejectCount, mClipper.trivialAcceptCount, mClipper.mustClipCount));
308
309 //Tesselator
310 EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims));
311
312 //Geometry Shader
313 EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount));
314 EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
315 EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));
316
317 //Reset Internal Counters
318 mClipper = {};
319 mTS = {};
320 mGS = {};
321 }
322
323 virtual void Handle(const GSPrimInfo& event)
324 {
325 mGS.inputPrimCount += event.data.inputPrimCount;
326 mGS.primGeneratedCount += event.data.primGeneratedCount;
327 mGS.vertsInput += event.data.vertsInput;
328 }
329
330 virtual void Handle(const TessPrimCount& event)
331 {
332 mTS.inputPrims += event.data.primCount;
333 }
334
335 virtual void Handle(const RasterTileCount& event)
336 {
337 rastStats.rasterTiles += event.data.rasterTiles;
338 }
339
340 protected:
341 bool mNeedFlush;
342 // Per draw stats
343 DepthStencilStats mDSSingleSample = {};
344 DepthStencilStats mDSSampleRate = {};
345 DepthStencilStats mDSPixelRate = {};
346 DepthStencilStats mDSCombined = {};
347 DepthStencilStats mDSNullPS = {};
348 DepthStencilStats mDSOmZ = {};
349 CStats mClipper = {};
350 TEStats mTS = {};
351 GSStats mGS = {};
352 RastStats rastStats = {};
353
354 };
355
356 static EventManager* FromHandle(HANDLE hThreadContext)
357 {
358 return reinterpret_cast<EventManager*>(hThreadContext);
359 }
360
361 // Construct an event manager and associate a handler with it.
362 HANDLE CreateThreadContext(AR_THREAD type)
363 {
364 // Can we assume single threaded here?
365 static std::atomic<uint32_t> counter(0);
366 uint32_t id = counter.fetch_add(1);
367
368 EventManager* pManager = new EventManager();
369
370 if (pManager)
371 {
372 EventHandlerFile* pHandler = nullptr;
373
374 if (type == AR_THREAD::API)
375 {
376 pHandler = new EventHandlerApiStats(id);
377 pManager->Attach(pHandler);
378 pHandler->Handle(ThreadStartApiEvent());
379 }
380 else
381 {
382 pHandler = new EventHandlerWorkerStats(id);
383 pManager->Attach(pHandler);
384 pHandler->Handle(ThreadStartWorkerEvent());
385 }
386
387 pHandler->MarkHeader();
388
389 return pManager;
390 }
391
392 SWR_INVALID("Failed to register thread.");
393 return nullptr;
394 }
395
396 void DestroyThreadContext(HANDLE hThreadContext)
397 {
398 EventManager* pManager = FromHandle(hThreadContext);
399 SWR_ASSERT(pManager != nullptr);
400
401 delete pManager;
402 }
403
404 // Dispatch event for this thread.
405 void Dispatch(HANDLE hThreadContext, const Event& event)
406 {
407 EventManager* pManager = FromHandle(hThreadContext);
408 SWR_ASSERT(pManager != nullptr);
409
410 pManager->Dispatch(event);
411 }
412
413 // Flush for this thread.
414 void FlushDraw(HANDLE hThreadContext, uint32_t drawId)
415 {
416 EventManager* pManager = FromHandle(hThreadContext);
417 SWR_ASSERT(pManager != nullptr);
418
419 pManager->FlushDraw(drawId);
420 }
421 }