swr/rast: Removed unused variable
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / backend_sample.cpp
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file backend.cpp
24 *
25 * @brief Backend handles rasterization, pixel shading and output merger
26 * operations.
27 *
28 ******************************************************************************/
29
30 #include <smmintrin.h>
31
32 #include "backend.h"
33 #include "backend_impl.h"
34 #include "tilemgr.h"
35 #include "memory/tilingtraits.h"
36 #include "core/multisample.h"
37
38 #include <algorithm>
39
40 template<typename T>
41 void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
42 {
43 RDTSC_BEGIN(BESampleRateBackend, pDC->drawId);
44 RDTSC_BEGIN(BESetup, pDC->drawId);
45
46 const API_STATE &state = GetApiState(pDC);
47
48 BarycentricCoeffs coeffs;
49 SetupBarycentricCoeffs(&coeffs, work);
50
51 SWR_PS_CONTEXT psContext;
52 const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
53 SetupPixelShaderContext<T>(&psContext, samplePos, work);
54
55 uint8_t *pDepthBuffer, *pStencilBuffer;
56 SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.colorHottileEnable, renderBuffers);
57
58 RDTSC_END(BESetup, 0);
59
60 psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
61 psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
62
63 const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
64
65 for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
66 {
67 psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
68 psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps(static_cast<float>(x)));
69
70 const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
71
72 for (uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
73 {
74 #if USE_8x2_TILE_BACKEND
75 const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
76 #endif
77 if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
78 {
79 const uint64_t* pCoverageMask = (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) ? &work.innerCoverageMask : &work.coverageMask[0];
80
81 generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
82 }
83
84 RDTSC_BEGIN(BEBarycentric, pDC->drawId);
85
86 CalcPixelBarycentrics(coeffs, psContext);
87
88 CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
89
90 RDTSC_END(BEBarycentric, 0);
91
92 for (uint32_t sample = 0; sample < T::MultisampleT::numSamples; sample++)
93 {
94 simdmask coverageMask = work.coverageMask[sample] & MASK;
95
96 if (coverageMask)
97 {
98 // offset depth/stencil buffers current sample
99 uint8_t *pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
100 uint8_t *pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
101
102 if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
103 {
104 static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
105
106 const simdscalar z = _simd_load_ps(reinterpret_cast<const float *>(pDepthSample));
107
108 const float minz = state.depthBoundsState.depthBoundsTestMinValue;
109 const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
110
111 coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz);
112 }
113
114 RDTSC_BEGIN(BEBarycentric, pDC->drawId);
115
116 // calculate per sample positions
117 psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
118 psContext.vY.sample = _simd_add_ps(psContext.vY.UL, samplePos.vY(sample));
119
120 CalcSampleBarycentrics(coeffs, psContext);
121
122 // interpolate and quantize z
123 psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
124 psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
125
126 RDTSC_END(BEBarycentric, 0);
127
128 // interpolate user clip distance if available
129 if (state.backendState.clipDistanceMask)
130 {
131 coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask, work.pUserClipBuffer, psContext.vI.sample, psContext.vJ.sample);
132 }
133
134 simdscalar vCoverageMask = _simd_vmask_ps(coverageMask);
135 simdscalar depthPassMask = vCoverageMask;
136 simdscalar stencilPassMask = vCoverageMask;
137
138 // Early-Z?
139 if (T::bCanEarlyZ)
140 {
141 RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId);
142 depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
143 psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
144 AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
145 RDTSC_END(BEEarlyDepthTest, 0);
146
147 // early-exit if no samples passed depth or earlyZ is forced on.
148 if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask))
149 {
150 DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
151 pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
152
153 if (!_simd_movemask_ps(depthPassMask))
154 {
155 work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
156 continue;
157 }
158 }
159 }
160
161 psContext.sampleIndex = sample;
162 psContext.activeMask = _simd_castps_si(vCoverageMask);
163
164 // execute pixel shader
165 RDTSC_BEGIN(BEPixelShader, pDC->drawId);
166 UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
167 state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
168 RDTSC_END(BEPixelShader, 0);
169
170 vCoverageMask = _simd_castsi_ps(psContext.activeMask);
171
172 // late-Z
173 if (!T::bCanEarlyZ)
174 {
175 RDTSC_BEGIN(BELateDepthTest, pDC->drawId);
176 depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
177 psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
178 AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
179 RDTSC_END(BELateDepthTest, 0);
180
181 if (!_simd_movemask_ps(depthPassMask))
182 {
183 // need to call depth/stencil write for stencil write
184 DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
185 pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
186
187 work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
188 continue;
189 }
190 }
191
192 uint32_t statMask = _simd_movemask_ps(depthPassMask);
193 uint32_t statCount = _mm_popcnt_u32(statMask);
194 UPDATE_STAT_BE(DepthPassCount, statCount);
195
196 // output merger
197 RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
198 #if USE_8x2_TILE_BACKEND
199 OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset);
200 #else
201 OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask);
202 #endif
203
204 // do final depth write after all pixel kills
205 if (!state.psState.forceEarlyZ)
206 {
207 DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
208 pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
209 }
210 RDTSC_END(BEOutputMerger, 0);
211 }
212 work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
213 }
214
215 Endtile:
216 ATTR_UNUSED;
217
218 RDTSC_BEGIN(BEEndTile, pDC->drawId);
219
220 if (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
221 {
222 work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
223 }
224
225 #if USE_8x2_TILE_BACKEND
226 if (useAlternateOffset)
227 {
228 DWORD rt;
229 uint32_t rtMask = state.colorHottileEnable;
230 while (_BitScanForward(&rt, rtMask))
231 {
232 rtMask &= ~(1 << rt);
233 psContext.pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
234 }
235 }
236 #else
237 DWORD rt;
238 uint32_t rtMask = state.colorHottileEnable;
239 while (_BitScanForward(&rt, rtMask))
240 {
241 rtMask &= ~(1 << rt);
242 psContext.pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
243 }
244 #endif
245 pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
246 pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
247
248 RDTSC_END(BEEndTile, 0);
249
250 psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
251 psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
252 }
253
254 psContext.vY.UL = _simd_add_ps(psContext.vY.UL, dy);
255 psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
256 }
257
258 RDTSC_END(BESampleRateBackend, 0);
259 }
260
261 // Recursive template used to auto-nest conditionals. Converts dynamic enum function
262 // arguments to static template arguments.
263 template <uint32_t... ArgsT>
264 struct BEChooserSampleRate
265 {
266 // Last Arg Terminator
267 static PFN_BACKEND_FUNC GetFunc(SWR_BACKEND_FUNCS tArg)
268 {
269 switch (tArg)
270 {
271 case SWR_BACKEND_MSAA_SAMPLE_RATE: return BackendSampleRate<SwrBackendTraits<ArgsT...>>; break;
272 case SWR_BACKEND_SINGLE_SAMPLE:
273 case SWR_BACKEND_MSAA_PIXEL_RATE:
274 SWR_ASSERT(0 && "Invalid backend func\n");
275 return nullptr;
276 break;
277 default:
278 SWR_ASSERT(0 && "Invalid backend func\n");
279 return nullptr;
280 break;
281 }
282 }
283
284 // Recursively parse args
285 template <typename... TArgsT>
286 static PFN_BACKEND_FUNC GetFunc(SWR_INPUT_COVERAGE tArg, TArgsT... remainingArgs)
287 {
288 switch (tArg)
289 {
290 case SWR_INPUT_COVERAGE_NONE: return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(remainingArgs...); break;
291 case SWR_INPUT_COVERAGE_NORMAL: return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NORMAL>::GetFunc(remainingArgs...); break;
292 case SWR_INPUT_COVERAGE_INNER_CONSERVATIVE: return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>::GetFunc(remainingArgs...); break;
293 default:
294 SWR_ASSERT(0 && "Invalid sample pattern\n");
295 return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(remainingArgs...);
296 break;
297 }
298 }
299
300 // Recursively parse args
301 template <typename... TArgsT>
302 static PFN_BACKEND_FUNC GetFunc(SWR_MULTISAMPLE_COUNT tArg, TArgsT... remainingArgs)
303 {
304 switch (tArg)
305 {
306 case SWR_MULTISAMPLE_1X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...); break;
307 case SWR_MULTISAMPLE_2X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_2X>::GetFunc(remainingArgs...); break;
308 case SWR_MULTISAMPLE_4X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_4X>::GetFunc(remainingArgs...); break;
309 case SWR_MULTISAMPLE_8X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...); break;
310 case SWR_MULTISAMPLE_16X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...); break;
311 default:
312 SWR_ASSERT(0 && "Invalid sample count\n");
313 return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
314 break;
315 }
316 }
317
318 // Recursively parse args
319 template <typename... TArgsT>
320 static PFN_BACKEND_FUNC GetFunc(bool tArg, TArgsT... remainingArgs)
321 {
322 if (tArg == true)
323 {
324 return BEChooserSampleRate<ArgsT..., 1>::GetFunc(remainingArgs...);
325 }
326
327 return BEChooserSampleRate<ArgsT..., 0>::GetFunc(remainingArgs...);
328 }
329 };
330
331 void InitBackendSampleFuncTable(PFN_BACKEND_FUNC(&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2])
332 {
333 for (uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < SWR_MULTISAMPLE_TYPE_COUNT; sampleCount++)
334 {
335 for (uint32_t inputCoverage = 0; inputCoverage < SWR_INPUT_COVERAGE_COUNT; inputCoverage++)
336 {
337 for (uint32_t centroid = 0; centroid < 2; centroid++)
338 {
339 for (uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
340 {
341 table[sampleCount][inputCoverage][centroid][canEarlyZ] =
342 BEChooserSampleRate<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, false, (SWR_INPUT_COVERAGE)inputCoverage,
343 (centroid > 0), false, (canEarlyZ > 0), (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
344 }
345 }
346 }
347 }
348 }