swr: [rasterizer archrast/scripts] Further archrast cleanups
[mesa.git] / src / gallium / drivers / swr / rasterizer / archrast / archrast.cpp
1 /****************************************************************************
2 * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file archrast.cpp
24 *
25 * @brief Implementation for archrast.
26 *
27 ******************************************************************************/
28 #include <atomic>
29
30 #include "common/os.h"
31 #include "archrast/archrast.h"
32 #include "archrast/eventmanager.h"
33 #include "gen_ar_eventhandlerfile.h"
34
35 namespace ArchRast
36 {
37 //////////////////////////////////////////////////////////////////////////
38 /// @brief struct that keeps track of depth and stencil event information
39 struct DepthStencilStats
40 {
41 uint32_t earlyZTestPassCount = 0;
42 uint32_t earlyZTestFailCount = 0;
43 uint32_t lateZTestPassCount = 0;
44 uint32_t lateZTestFailCount = 0;
45 uint32_t earlyStencilTestPassCount = 0;
46 uint32_t earlyStencilTestFailCount = 0;
47 uint32_t lateStencilTestPassCount = 0;
48 uint32_t lateStencilTestFailCount = 0;
49 uint32_t earlyZTestCount = 0;
50 uint32_t lateZTestCount = 0;
51 uint32_t earlyStencilTestCount = 0;
52 uint32_t lateStencilTestCount = 0;
53 };
54
55 struct CStats
56 {
57 uint32_t clippedVerts = 0;
58 };
59
60 struct TEStats
61 {
62 uint32_t inputPrims = 0;
63 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
64 };
65
66 struct GSStats
67 {
68 uint32_t inputPrimCount;
69 uint32_t primGeneratedCount;
70 uint32_t vertsInput;
71 };
72
73 //////////////////////////////////////////////////////////////////////////
74 /// @brief Event handler that saves stat events to event files. This
75 /// handler filters out unwanted events.
76 class EventHandlerStatsFile : public EventHandlerFile
77 {
78 public:
79 EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id) {}
80
81 // These are events that we're not interested in saving in stats event files.
82 virtual void Handle(const Start& event) {}
83 virtual void Handle(const End& event) {}
84
85 virtual void Handle(const EarlyDepthStencilInfoSingleSample& event)
86 {
87 //earlyZ test compute
88 mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
89 mDSSingleSample.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
90 mDSSingleSample.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
91
92 //earlyStencil test compute
93 mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
94 mDSSingleSample.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
95 mDSSingleSample.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
96 }
97
98 virtual void Handle(const EarlyDepthStencilInfoSampleRate& event)
99 {
100 //earlyZ test compute
101 mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
102 mDSSampleRate.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
103 mDSSampleRate.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
104
105 //earlyStencil test compute
106 mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
107 mDSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
108 mDSSampleRate.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
109 }
110
111 virtual void Handle(const EarlyDepthStencilInfoNullPS& event)
112 {
113 //earlyZ test compute
114 mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
115 mDSNullPS.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
116 mDSNullPS.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
117
118 //earlyStencil test compute
119 mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
120 mDSNullPS.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
121 mDSNullPS.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
122 }
123
124 virtual void Handle(const LateDepthStencilInfoSingleSample& event)
125 {
126 //lateZ test compute
127 mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
128 mDSSingleSample.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
129 mDSSingleSample.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
130
131 //lateStencil test compute
132 mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
133 mDSSingleSample.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
134 mDSSingleSample.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
135 }
136
137 virtual void Handle(const LateDepthStencilInfoSampleRate& event)
138 {
139 //lateZ test compute
140 mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
141 mDSSampleRate.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
142 mDSSampleRate.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
143
144 //lateStencil test compute
145 mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
146 mDSSampleRate.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
147 mDSSampleRate.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
148 }
149
150 virtual void Handle(const LateDepthStencilInfoNullPS& event)
151 {
152 //lateZ test compute
153 mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
154 mDSNullPS.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
155 mDSNullPS.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
156
157 //lateStencil test compute
158 mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
159 mDSNullPS.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
160 mDSNullPS.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
161 }
162
163 virtual void Handle(const EarlyDepthInfoPixelRate& event)
164 {
165 //earlyZ test compute
166 mDSPixelRate.earlyZTestCount += _mm_popcnt_u32(event.data.activeLanes);
167 mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
168 mDSPixelRate.earlyZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
169 }
170
171
172 virtual void Handle(const LateDepthInfoPixelRate& event)
173 {
174 //lateZ test compute
175 mDSPixelRate.lateZTestCount += _mm_popcnt_u32(event.data.activeLanes);
176 mDSPixelRate.lateZTestPassCount += event.data.depthPassCount;
177 mDSPixelRate.lateZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
178
179 }
180
181
182 virtual void Handle(const BackendDrawEndEvent& event)
183 {
184 //singleSample
185 EventHandlerFile::Handle(EarlyZSingleSample(event.data.drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount, mDSSingleSample.earlyZTestCount));
186 EventHandlerFile::Handle(LateZSingleSample(event.data.drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount, mDSSingleSample.lateZTestCount));
187 EventHandlerFile::Handle(EarlyStencilSingleSample(event.data.drawId, mDSSingleSample.earlyStencilTestPassCount, mDSSingleSample.earlyStencilTestFailCount, mDSSingleSample.earlyStencilTestCount));
188 EventHandlerFile::Handle(LateStencilSingleSample(event.data.drawId, mDSSingleSample.lateStencilTestPassCount, mDSSingleSample.lateStencilTestFailCount, mDSSingleSample.lateStencilTestCount));
189
190 //sampleRate
191 EventHandlerFile::Handle(EarlyZSampleRate(event.data.drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount, mDSSampleRate.earlyZTestCount));
192 EventHandlerFile::Handle(LateZSampleRate(event.data.drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount, mDSSampleRate.lateZTestCount));
193 EventHandlerFile::Handle(EarlyStencilSampleRate(event.data.drawId, mDSSampleRate.earlyStencilTestPassCount, mDSSampleRate.earlyStencilTestFailCount, mDSSampleRate.earlyStencilTestCount));
194 EventHandlerFile::Handle(LateStencilSampleRate(event.data.drawId, mDSSampleRate.lateStencilTestPassCount, mDSSampleRate.lateStencilTestFailCount, mDSSampleRate.lateStencilTestCount));
195
196 //pixelRate
197 EventHandlerFile::Handle(EarlyZPixelRate(event.data.drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount, mDSPixelRate.earlyZTestCount));
198 EventHandlerFile::Handle(LateZPixelRate(event.data.drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount, mDSPixelRate.lateZTestCount));
199
200
201 //NullPS
202 EventHandlerFile::Handle(EarlyZNullPS(event.data.drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount, mDSNullPS.earlyZTestCount));
203 EventHandlerFile::Handle(EarlyStencilNullPS(event.data.drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount, mDSNullPS.earlyStencilTestCount));
204
205 //Reset Internal Counters
206 mDSSingleSample = {};
207 mDSSampleRate = {};
208 mDSPixelRate = {};
209 mDSNullPS = {};
210 }
211
212 virtual void Handle(const FrontendDrawEndEvent& event)
213 {
214 //Clipper
215 EventHandlerFile::Handle(VertsClipped(event.data.drawId, mClipper.clippedVerts));
216
217 //Tesselator
218 EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims));
219
220 //Geometry Shader
221 EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount));
222 EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
223 EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));
224
225 //Reset Internal Counters
226 mClipper = {};
227 mTS = {};
228 mGS = {};
229 }
230
231 virtual void Handle(const GSPrimInfo& event)
232 {
233 mGS.inputPrimCount += event.data.inputPrimCount;
234 mGS.primGeneratedCount += event.data.primGeneratedCount;
235 mGS.vertsInput += event.data.vertsInput;
236 }
237
238 virtual void Handle(const ClipVertexCount& event)
239 {
240 mClipper.clippedVerts += (_mm_popcnt_u32(event.data.primMask) * event.data.vertsPerPrim);
241 }
242
243 virtual void Handle(const TessPrimCount& event)
244 {
245 mTS.inputPrims += event.data.primCount;
246 }
247
248 protected:
249
250 // Per draw stats
251 DepthStencilStats mDSSingleSample = {};
252 DepthStencilStats mDSSampleRate = {};
253 DepthStencilStats mDSPixelRate = {};
254 DepthStencilStats mDSNullPS = {};
255 DepthStencilStats mDSOmZ = {};
256 CStats mClipper = {};
257 TEStats mTS = {};
258 GSStats mGS = {};
259
260 };
261
262 static EventManager* FromHandle(HANDLE hThreadContext)
263 {
264 return reinterpret_cast<EventManager*>(hThreadContext);
265 }
266
267 // Construct an event manager and associate a handler with it.
268 HANDLE CreateThreadContext(AR_THREAD type)
269 {
270 // Can we assume single threaded here?
271 static std::atomic<uint32_t> counter(0);
272 uint32_t id = counter.fetch_add(1);
273
274 EventManager* pManager = new EventManager();
275 EventHandlerFile* pHandler = new EventHandlerStatsFile(id);
276
277 if (pManager && pHandler)
278 {
279 pManager->Attach(pHandler);
280
281 if (type == AR_THREAD::API)
282 {
283 pHandler->Handle(ThreadStartApiEvent());
284 }
285 else
286 {
287 pHandler->Handle(ThreadStartWorkerEvent());
288 }
289 pHandler->MarkHeader();
290
291 return pManager;
292 }
293
294 SWR_ASSERT(0, "Failed to register thread.");
295 return nullptr;
296 }
297
298 void DestroyThreadContext(HANDLE hThreadContext)
299 {
300 EventManager* pManager = FromHandle(hThreadContext);
301 SWR_ASSERT(pManager != nullptr);
302
303 delete pManager;
304 }
305
306 // Dispatch event for this thread.
307 void Dispatch(HANDLE hThreadContext, Event& event)
308 {
309 EventManager* pManager = FromHandle(hThreadContext);
310 SWR_ASSERT(pManager != nullptr);
311
312 pManager->Dispatch(event);
313 }
314
315 }