swr/rasterizer: modernize thread TLB
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / depthstencil.h
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file depthstencil.h
24 *
25 * @brief Implements depth/stencil functionality
26 *
27 ******************************************************************************/
28 #pragma once
29 #include "common/os.h"
30 #include "format_conversion.h"
31
32 INLINE
33 void StencilOp(SWR_STENCILOP op,
34 simdscalar const& mask,
35 simdscalar const& stencilRefps,
36 simdscalar& stencilps)
37 {
38 simdscalari stencil = _simd_castps_si(stencilps);
39
40 switch (op)
41 {
42 case STENCILOP_KEEP:
43 break;
44 case STENCILOP_ZERO:
45 stencilps = _simd_blendv_ps(stencilps, _simd_setzero_ps(), mask);
46 break;
47 case STENCILOP_REPLACE:
48 stencilps = _simd_blendv_ps(stencilps, stencilRefps, mask);
49 break;
50 case STENCILOP_INCRSAT:
51 {
52 simdscalari stencilincr = _simd_adds_epu8(stencil, _simd_set1_epi32(1));
53 stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
54 break;
55 }
56 case STENCILOP_DECRSAT:
57 {
58 simdscalari stencildecr = _simd_subs_epu8(stencil, _simd_set1_epi32(1));
59 stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
60 break;
61 }
62 case STENCILOP_INCR:
63 {
64 simdscalari stencilincr = _simd_add_epi8(stencil, _simd_set1_epi32(1));
65 stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
66 break;
67 }
68 case STENCILOP_DECR:
69 {
70 simdscalari stencildecr = _simd_add_epi8(stencil, _simd_set1_epi32((-1) & 0xff));
71 stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
72 break;
73 }
74 case STENCILOP_INVERT:
75 {
76 simdscalar stencilinvert =
77 _simd_andnot_ps(stencilps, _simd_cmpeq_ps(_simd_setzero_ps(), _simd_setzero_ps()));
78 stencilps = _simd_blendv_ps(stencilps, stencilinvert, mask);
79 break;
80 }
81 default:
82 break;
83 }
84 }
85
86 template <SWR_FORMAT depthFormatT>
87 simdscalar QuantizeDepth(simdscalar const& depth)
88 {
89 SWR_TYPE depthType = FormatTraits<depthFormatT>::GetType(0);
90 uint32_t depthBpc = FormatTraits<depthFormatT>::GetBPC(0);
91
92 if (depthType == SWR_TYPE_FLOAT)
93 {
94 // assume only 32bit float depth supported
95 SWR_ASSERT(depthBpc == 32);
96
97 // matches shader precision, no quantizing needed
98 return depth;
99 }
100
101 // should be unorm depth if not float
102 SWR_ASSERT(depthType == SWR_TYPE_UNORM);
103
104 float quantize = (float)((1 << depthBpc) - 1);
105 simdscalar result = _simd_mul_ps(depth, _simd_set1_ps(quantize));
106 result = _simd_add_ps(result, _simd_set1_ps(0.5f));
107 result = _simd_round_ps(result, _MM_FROUND_TO_ZERO);
108
109 if (depthBpc > 16)
110 {
111 result = _simd_div_ps(result, _simd_set1_ps(quantize));
112 }
113 else
114 {
115 result = _simd_mul_ps(result, _simd_set1_ps(1.0f / quantize));
116 }
117
118 return result;
119 }
120
121 INLINE
122 simdscalar DepthStencilTest(const API_STATE* pState,
123 bool frontFacing,
124 uint32_t viewportIndex,
125 simdscalar const& iZ,
126 uint8_t* pDepthBase,
127 simdscalar const& coverageMask,
128 uint8_t* pStencilBase,
129 simdscalar* pStencilMask)
130 {
131 static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
132 static_assert(KNOB_STENCIL_HOT_TILE_FORMAT == R8_UINT, "Unsupported stencil hot tile format");
133
134 const SWR_DEPTH_STENCIL_STATE* pDSState = &pState->depthStencilState;
135 const SWR_VIEWPORT* pViewport = &pState->vp[viewportIndex];
136
137 simdscalar depthResult = _simd_set1_ps(-1.0f);
138 simdscalar zbuf;
139
140 // clamp Z to viewport [minZ..maxZ]
141 simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
142 simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
143 simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ));
144
145 if (pDSState->depthTestEnable)
146 {
147 switch (pDSState->depthTestFunc)
148 {
149 case ZFUNC_NEVER:
150 depthResult = _simd_setzero_ps();
151 break;
152 case ZFUNC_ALWAYS:
153 break;
154 default:
155 zbuf = _simd_load_ps((const float*)pDepthBase);
156 }
157
158 switch (pDSState->depthTestFunc)
159 {
160 case ZFUNC_LE:
161 depthResult = _simd_cmple_ps(interpZ, zbuf);
162 break;
163 case ZFUNC_LT:
164 depthResult = _simd_cmplt_ps(interpZ, zbuf);
165 break;
166 case ZFUNC_GT:
167 depthResult = _simd_cmpgt_ps(interpZ, zbuf);
168 break;
169 case ZFUNC_GE:
170 depthResult = _simd_cmpge_ps(interpZ, zbuf);
171 break;
172 case ZFUNC_EQ:
173 depthResult = _simd_cmpeq_ps(interpZ, zbuf);
174 break;
175 case ZFUNC_NE:
176 depthResult = _simd_cmpneq_ps(interpZ, zbuf);
177 break;
178 }
179 }
180
181 simdscalar stencilMask = _simd_set1_ps(-1.0f);
182
183 if (pDSState->stencilTestEnable)
184 {
185 uint8_t stencilRefValue;
186 uint32_t stencilTestFunc;
187 uint8_t stencilTestMask;
188 if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
189 {
190 stencilRefValue = pDSState->stencilRefValue;
191 stencilTestFunc = pDSState->stencilTestFunc;
192 stencilTestMask = pDSState->stencilTestMask;
193 }
194 else
195 {
196 stencilRefValue = pDSState->backfaceStencilRefValue;
197 stencilTestFunc = pDSState->backfaceStencilTestFunc;
198 stencilTestMask = pDSState->backfaceStencilTestMask;
199 }
200
201 simdvector sbuf;
202 simdscalar stencilWithMask;
203 simdscalar stencilRef;
204 switch (stencilTestFunc)
205 {
206 case ZFUNC_NEVER:
207 stencilMask = _simd_setzero_ps();
208 break;
209 case ZFUNC_ALWAYS:
210 break;
211 default:
212 LoadSOA<R8_UINT>(pStencilBase, sbuf);
213
214 // apply stencil read mask
215 stencilWithMask = _simd_castsi_ps(
216 _simd_and_si(_simd_castps_si(sbuf.v[0]), _simd_set1_epi32(stencilTestMask)));
217
218 // do stencil compare in float to avoid simd integer emulation in AVX1
219 stencilWithMask = _simd_cvtepi32_ps(_simd_castps_si(stencilWithMask));
220
221 stencilRef = _simd_set1_ps((float)(stencilRefValue & stencilTestMask));
222 break;
223 }
224
225 switch (stencilTestFunc)
226 {
227 case ZFUNC_LE:
228 stencilMask = _simd_cmple_ps(stencilRef, stencilWithMask);
229 break;
230 case ZFUNC_LT:
231 stencilMask = _simd_cmplt_ps(stencilRef, stencilWithMask);
232 break;
233 case ZFUNC_GT:
234 stencilMask = _simd_cmpgt_ps(stencilRef, stencilWithMask);
235 break;
236 case ZFUNC_GE:
237 stencilMask = _simd_cmpge_ps(stencilRef, stencilWithMask);
238 break;
239 case ZFUNC_EQ:
240 stencilMask = _simd_cmpeq_ps(stencilRef, stencilWithMask);
241 break;
242 case ZFUNC_NE:
243 stencilMask = _simd_cmpneq_ps(stencilRef, stencilWithMask);
244 break;
245 }
246 }
247
248 simdscalar depthWriteMask = _simd_and_ps(depthResult, stencilMask);
249 depthWriteMask = _simd_and_ps(depthWriteMask, coverageMask);
250
251 *pStencilMask = stencilMask;
252 return depthWriteMask;
253 }
254
255 INLINE
256 void DepthStencilWrite(const SWR_VIEWPORT* pViewport,
257 const SWR_DEPTH_STENCIL_STATE* pDSState,
258 bool frontFacing,
259 simdscalar const& iZ,
260 uint8_t* pDepthBase,
261 const simdscalar& depthMask,
262 const simdscalar& coverageMask,
263 uint8_t* pStencilBase,
264 const simdscalar& stencilMask)
265 {
266 if (pDSState->depthWriteEnable)
267 {
268 // clamp Z to viewport [minZ..maxZ]
269 simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
270 simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
271 simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ));
272
273 simdscalar vMask = _simd_and_ps(depthMask, coverageMask);
274 _simd_maskstore_ps((float*)pDepthBase, _simd_castps_si(vMask), interpZ);
275 }
276
277 if (pDSState->stencilWriteEnable)
278 {
279 simdvector sbuf;
280 LoadSOA<R8_UINT>(pStencilBase, sbuf);
281 simdscalar stencilbuf = sbuf.v[0];
282
283 uint8_t stencilRefValue;
284 uint32_t stencilFailOp;
285 uint32_t stencilPassDepthPassOp;
286 uint32_t stencilPassDepthFailOp;
287 uint8_t stencilWriteMask;
288 if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
289 {
290 stencilRefValue = pDSState->stencilRefValue;
291 stencilFailOp = pDSState->stencilFailOp;
292 stencilPassDepthPassOp = pDSState->stencilPassDepthPassOp;
293 stencilPassDepthFailOp = pDSState->stencilPassDepthFailOp;
294 stencilWriteMask = pDSState->stencilWriteMask;
295 }
296 else
297 {
298 stencilRefValue = pDSState->backfaceStencilRefValue;
299 stencilFailOp = pDSState->backfaceStencilFailOp;
300 stencilPassDepthPassOp = pDSState->backfaceStencilPassDepthPassOp;
301 stencilPassDepthFailOp = pDSState->backfaceStencilPassDepthFailOp;
302 stencilWriteMask = pDSState->backfaceStencilWriteMask;
303 }
304
305 simdscalar stencilps = stencilbuf;
306 simdscalar stencilRefps = _simd_castsi_ps(_simd_set1_epi32(stencilRefValue));
307
308 simdscalar stencilFailMask = _simd_andnot_ps(stencilMask, coverageMask);
309 simdscalar stencilPassDepthPassMask = _simd_and_ps(stencilMask, depthMask);
310 simdscalar stencilPassDepthFailMask =
311 _simd_and_ps(stencilMask, _simd_andnot_ps(depthMask, _simd_set1_ps(-1)));
312
313 simdscalar origStencil = stencilps;
314
315 StencilOp((SWR_STENCILOP)stencilFailOp, stencilFailMask, stencilRefps, stencilps);
316 StencilOp((SWR_STENCILOP)stencilPassDepthFailOp,
317 stencilPassDepthFailMask,
318 stencilRefps,
319 stencilps);
320 StencilOp((SWR_STENCILOP)stencilPassDepthPassOp,
321 stencilPassDepthPassMask,
322 stencilRefps,
323 stencilps);
324
325 // apply stencil write mask
326 simdscalari vWriteMask = _simd_set1_epi32(stencilWriteMask);
327 stencilps = _simd_and_ps(stencilps, _simd_castsi_ps(vWriteMask));
328 stencilps =
329 _simd_or_ps(_simd_andnot_ps(_simd_castsi_ps(vWriteMask), origStencil), stencilps);
330
331 simdvector stencilResult;
332 stencilResult.v[0] = _simd_blendv_ps(origStencil, stencilps, coverageMask);
333 StoreSOA<R8_UINT>(stencilResult, pStencilBase);
334 }
335 }