swr/rast: Add tracking for stream out topology
[mesa.git] / src / gallium / drivers / swr / rasterizer / archrast / archrast.cpp
1 /****************************************************************************
2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file archrast.cpp
24 *
25 * @brief Implementation for archrast.
26 *
27 ******************************************************************************/
28 #include <atomic>
29
30 #include "common/os.h"
31 #include "archrast/archrast.h"
32 #include "archrast/eventmanager.h"
33 #include "gen_ar_eventhandlerfile.hpp"
34
35 namespace ArchRast
36 {
37 //////////////////////////////////////////////////////////////////////////
38 /// @brief struct that keeps track of depth and stencil event information
39 struct DepthStencilStats
40 {
41 uint32_t earlyZTestPassCount = 0;
42 uint32_t earlyZTestFailCount = 0;
43 uint32_t lateZTestPassCount = 0;
44 uint32_t lateZTestFailCount = 0;
45 uint32_t earlyStencilTestPassCount = 0;
46 uint32_t earlyStencilTestFailCount = 0;
47 uint32_t lateStencilTestPassCount = 0;
48 uint32_t lateStencilTestFailCount = 0;
49 };
50
51 struct CStats
52 {
53 uint32_t trivialRejectCount;
54 uint32_t trivialAcceptCount;
55 uint32_t mustClipCount;
56 };
57
58 struct TEStats
59 {
60 uint32_t inputPrims = 0;
61 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
62 };
63
64 struct GSStats
65 {
66 uint32_t inputPrimCount;
67 uint32_t primGeneratedCount;
68 uint32_t vertsInput;
69 };
70
71 struct RastStats
72 {
73 uint32_t rasterTiles = 0;
74 };
75
76 //////////////////////////////////////////////////////////////////////////
77 /// @brief Event handler that handles API thread events. This is shared
78 /// between the API and its caller (e.g. driver shim) but typically
79 /// there is only a single API thread per context. So you can save
80 /// information in the class to be used for other events.
81 class EventHandlerApiStats : public EventHandlerFile
82 {
83 public:
84 EventHandlerApiStats(uint32_t id) : EventHandlerFile(id) {}
85
86 virtual void Handle(const DrawInstancedEvent& event)
87 {
88 DrawInfoEvent e(event.data.drawId, ArchRast::Instanced, event.data.topology,
89 event.data.numVertices, 0, 0, event.data.startVertex, event.data.numInstances,
90 event.data.startInstance, event.data.tsEnable, event.data.gsEnable, event.data.soEnable, event.data.soTopology, event.data.splitId);
91
92 EventHandlerFile::Handle(e);
93 }
94
95 virtual void Handle(const DrawIndexedInstancedEvent& event)
96 {
97 DrawInfoEvent e(event.data.drawId, ArchRast::IndexedInstanced, event.data.topology, 0,
98 event.data.numIndices, event.data.indexOffset, event.data.baseVertex, event.data.numInstances,
99 event.data.startInstance, event.data.tsEnable, event.data.gsEnable, event.data.soEnable, event.data.soTopology, event.data.splitId);
100
101 EventHandlerFile::Handle(e);
102 }
103 };
104
105 //////////////////////////////////////////////////////////////////////////
106 /// @brief Event handler that handles worker thread events. There is one
107 /// event handler per thread. The python script will need to sum
108 /// up counters across all of the threads.
109 class EventHandlerWorkerStats : public EventHandlerFile
110 {
111 public:
112 EventHandlerWorkerStats(uint32_t id) : EventHandlerFile(id), mNeedFlush(false) {}
113
114 virtual void Handle(const EarlyDepthStencilInfoSingleSample& event)
115 {
116 //earlyZ test compute
117 mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
118 mDSSingleSample.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
119
120 //earlyStencil test compute
121 mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
122 mDSSingleSample.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
123
124 //earlyZ test single and multi sample
125 mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
126 mDSCombined.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
127
128 //earlyStencil test single and multi sample
129 mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
130 mDSCombined.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
131
132 mNeedFlush = true;
133 }
134
135 virtual void Handle(const EarlyDepthStencilInfoSampleRate& event)
136 {
137 //earlyZ test compute
138 mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
139 mDSSampleRate.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
140
141 //earlyStencil test compute
142 mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
143 mDSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
144
145 //earlyZ test single and multi sample
146 mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
147 mDSCombined.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
148
149 //earlyStencil test single and multi sample
150 mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
151 mDSCombined.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
152
153 mNeedFlush = true;
154 }
155
156 virtual void Handle(const EarlyDepthStencilInfoNullPS& event)
157 {
158 //earlyZ test compute
159 mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
160 mDSNullPS.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
161
162 //earlyStencil test compute
163 mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
164 mDSNullPS.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
165 mNeedFlush = true;
166 }
167
168 virtual void Handle(const LateDepthStencilInfoSingleSample& event)
169 {
170 //lateZ test compute
171 mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
172 mDSSingleSample.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
173
174 //lateStencil test compute
175 mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
176 mDSSingleSample.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
177
178 //lateZ test single and multi sample
179 mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
180 mDSCombined.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
181
182 //lateStencil test single and multi sample
183 mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
184 mDSCombined.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
185
186 mNeedFlush = true;
187 }
188
189 virtual void Handle(const LateDepthStencilInfoSampleRate& event)
190 {
191 //lateZ test compute
192 mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
193 mDSSampleRate.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
194
195 //lateStencil test compute
196 mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
197 mDSSampleRate.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
198
199
200 //lateZ test single and multi sample
201 mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
202 mDSCombined.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
203
204 //lateStencil test single and multi sample
205 mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
206 mDSCombined.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
207
208 mNeedFlush = true;
209 }
210
211 virtual void Handle(const LateDepthStencilInfoNullPS& event)
212 {
213 //lateZ test compute
214 mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
215 mDSNullPS.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
216
217 //lateStencil test compute
218 mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
219 mDSNullPS.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
220 mNeedFlush = true;
221 }
222
223 virtual void Handle(const EarlyDepthInfoPixelRate& event)
224 {
225 //earlyZ test compute
226 mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
227 mDSPixelRate.earlyZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
228 mNeedFlush = true;
229 }
230
231
232 virtual void Handle(const LateDepthInfoPixelRate& event)
233 {
234 //lateZ test compute
235 mDSPixelRate.lateZTestPassCount += event.data.depthPassCount;
236 mDSPixelRate.lateZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
237 mNeedFlush = true;
238 }
239
240
241 virtual void Handle(const ClipInfoEvent& event)
242 {
243 mClipper.mustClipCount += _mm_popcnt_u32(event.data.clipMask);
244 mClipper.trivialRejectCount += event.data.numInvocations - _mm_popcnt_u32(event.data.validMask);
245 mClipper.trivialAcceptCount += _mm_popcnt_u32(event.data.validMask & ~event.data.clipMask);
246 }
247
248 // Flush cached events for this draw
249 virtual void FlushDraw(uint32_t drawId)
250 {
251 if (mNeedFlush == false) return;
252
253 //singleSample
254 EventHandlerFile::Handle(EarlyZSingleSample(drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
255 EventHandlerFile::Handle(LateZSingleSample(drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
256 EventHandlerFile::Handle(EarlyStencilSingleSample(drawId, mDSSingleSample.earlyStencilTestPassCount, mDSSingleSample.earlyStencilTestFailCount));
257 EventHandlerFile::Handle(LateStencilSingleSample(drawId, mDSSingleSample.lateStencilTestPassCount, mDSSingleSample.lateStencilTestFailCount));
258
259 //sampleRate
260 EventHandlerFile::Handle(EarlyZSampleRate(drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
261 EventHandlerFile::Handle(LateZSampleRate(drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
262 EventHandlerFile::Handle(EarlyStencilSampleRate(drawId, mDSSampleRate.earlyStencilTestPassCount, mDSSampleRate.earlyStencilTestFailCount));
263 EventHandlerFile::Handle(LateStencilSampleRate(drawId, mDSSampleRate.lateStencilTestPassCount, mDSSampleRate.lateStencilTestFailCount));
264
265 //combined
266 EventHandlerFile::Handle(EarlyZ(drawId, mDSCombined.earlyZTestPassCount, mDSCombined.earlyZTestFailCount));
267 EventHandlerFile::Handle(LateZ(drawId, mDSCombined.lateZTestPassCount, mDSCombined.lateZTestFailCount));
268 EventHandlerFile::Handle(EarlyStencil(drawId, mDSCombined.earlyStencilTestPassCount, mDSCombined.earlyStencilTestFailCount));
269 EventHandlerFile::Handle(LateStencil(drawId, mDSCombined.lateStencilTestPassCount, mDSCombined.lateStencilTestFailCount));
270
271 //pixelRate
272 EventHandlerFile::Handle(EarlyZPixelRate(drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
273 EventHandlerFile::Handle(LateZPixelRate(drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
274
275
276 //NullPS
277 EventHandlerFile::Handle(EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
278 EventHandlerFile::Handle(EarlyStencilNullPS(drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
279
280 // Rasterized Subspans
281 EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles));
282
283 //Reset Internal Counters
284 mDSSingleSample = {};
285 mDSSampleRate = {};
286 mDSCombined = {};
287 mDSPixelRate = {};
288 mDSNullPS = {};
289
290 rastStats = {};
291 mNeedFlush = false;
292 }
293
294 virtual void Handle(const FrontendDrawEndEvent& event)
295 {
296 //Clipper
297 EventHandlerFile::Handle(ClipperEvent(event.data.drawId, mClipper.trivialRejectCount, mClipper.trivialAcceptCount, mClipper.mustClipCount));
298
299 //Tesselator
300 EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims));
301
302 //Geometry Shader
303 EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount));
304 EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
305 EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));
306
307 //Reset Internal Counters
308 mClipper = {};
309 mTS = {};
310 mGS = {};
311 }
312
313 virtual void Handle(const GSPrimInfo& event)
314 {
315 mGS.inputPrimCount += event.data.inputPrimCount;
316 mGS.primGeneratedCount += event.data.primGeneratedCount;
317 mGS.vertsInput += event.data.vertsInput;
318 }
319
320 virtual void Handle(const TessPrimCount& event)
321 {
322 mTS.inputPrims += event.data.primCount;
323 }
324
325 virtual void Handle(const RasterTileCount& event)
326 {
327 rastStats.rasterTiles += event.data.rasterTiles;
328 }
329
330 protected:
331 bool mNeedFlush;
332 // Per draw stats
333 DepthStencilStats mDSSingleSample = {};
334 DepthStencilStats mDSSampleRate = {};
335 DepthStencilStats mDSPixelRate = {};
336 DepthStencilStats mDSCombined = {};
337 DepthStencilStats mDSNullPS = {};
338 DepthStencilStats mDSOmZ = {};
339 CStats mClipper = {};
340 TEStats mTS = {};
341 GSStats mGS = {};
342 RastStats rastStats = {};
343
344 };
345
346 static EventManager* FromHandle(HANDLE hThreadContext)
347 {
348 return reinterpret_cast<EventManager*>(hThreadContext);
349 }
350
351 // Construct an event manager and associate a handler with it.
352 HANDLE CreateThreadContext(AR_THREAD type)
353 {
354 // Can we assume single threaded here?
355 static std::atomic<uint32_t> counter(0);
356 uint32_t id = counter.fetch_add(1);
357
358 EventManager* pManager = new EventManager();
359
360 if (pManager)
361 {
362 EventHandlerFile* pHandler = nullptr;
363
364 if (type == AR_THREAD::API)
365 {
366 pHandler = new EventHandlerApiStats(id);
367 pManager->Attach(pHandler);
368 pHandler->Handle(ThreadStartApiEvent());
369 }
370 else
371 {
372 pHandler = new EventHandlerWorkerStats(id);
373 pManager->Attach(pHandler);
374 pHandler->Handle(ThreadStartWorkerEvent());
375 }
376
377 pHandler->MarkHeader();
378
379 return pManager;
380 }
381
382 SWR_INVALID("Failed to register thread.");
383 return nullptr;
384 }
385
386 void DestroyThreadContext(HANDLE hThreadContext)
387 {
388 EventManager* pManager = FromHandle(hThreadContext);
389 SWR_ASSERT(pManager != nullptr);
390
391 delete pManager;
392 }
393
394 // Dispatch event for this thread.
395 void Dispatch(HANDLE hThreadContext, const Event& event)
396 {
397 EventManager* pManager = FromHandle(hThreadContext);
398 SWR_ASSERT(pManager != nullptr);
399
400 pManager->Dispatch(event);
401 }
402
403 // Flush for this thread.
404 void FlushDraw(HANDLE hThreadContext, uint32_t drawId)
405 {
406 EventManager* pManager = FromHandle(hThreadContext);
407 SWR_ASSERT(pManager != nullptr);
408
409 pManager->FlushDraw(drawId);
410 }
411 }