1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * @file depthstencil.h
25 * @brief Implements depth/stencil functionality
27 ******************************************************************************/
29 #include "common/os.h"
30 #include "format_conversion.h"
33 void StencilOp(SWR_STENCILOP op
,
34 simdscalar
const& mask
,
35 simdscalar
const& stencilRefps
,
36 simdscalar
& stencilps
)
38 simdscalari stencil
= _simd_castps_si(stencilps
);
45 stencilps
= _simd_blendv_ps(stencilps
, _simd_setzero_ps(), mask
);
47 case STENCILOP_REPLACE
:
48 stencilps
= _simd_blendv_ps(stencilps
, stencilRefps
, mask
);
50 case STENCILOP_INCRSAT
:
52 simdscalari stencilincr
= _simd_adds_epu8(stencil
, _simd_set1_epi32(1));
53 stencilps
= _simd_blendv_ps(stencilps
, _simd_castsi_ps(stencilincr
), mask
);
56 case STENCILOP_DECRSAT
:
58 simdscalari stencildecr
= _simd_subs_epu8(stencil
, _simd_set1_epi32(1));
59 stencilps
= _simd_blendv_ps(stencilps
, _simd_castsi_ps(stencildecr
), mask
);
64 simdscalari stencilincr
= _simd_add_epi8(stencil
, _simd_set1_epi32(1));
65 stencilps
= _simd_blendv_ps(stencilps
, _simd_castsi_ps(stencilincr
), mask
);
70 simdscalari stencildecr
= _simd_add_epi8(stencil
, _simd_set1_epi32((-1) & 0xff));
71 stencilps
= _simd_blendv_ps(stencilps
, _simd_castsi_ps(stencildecr
), mask
);
74 case STENCILOP_INVERT
:
76 simdscalar stencilinvert
=
77 _simd_andnot_ps(stencilps
, _simd_cmpeq_ps(_simd_setzero_ps(), _simd_setzero_ps()));
78 stencilps
= _simd_blendv_ps(stencilps
, stencilinvert
, mask
);
86 template <SWR_FORMAT depthFormatT
>
87 simdscalar
QuantizeDepth(simdscalar
const& depth
)
89 SWR_TYPE depthType
= FormatTraits
<depthFormatT
>::GetType(0);
90 uint32_t depthBpc
= FormatTraits
<depthFormatT
>::GetBPC(0);
92 if (depthType
== SWR_TYPE_FLOAT
)
94 // assume only 32bit float depth supported
95 SWR_ASSERT(depthBpc
== 32);
97 // matches shader precision, no quantizing needed
101 // should be unorm depth if not float
102 SWR_ASSERT(depthType
== SWR_TYPE_UNORM
);
104 float quantize
= (float)((1 << depthBpc
) - 1);
105 simdscalar result
= _simd_mul_ps(depth
, _simd_set1_ps(quantize
));
106 result
= _simd_add_ps(result
, _simd_set1_ps(0.5f
));
107 result
= _simd_round_ps(result
, _MM_FROUND_TO_ZERO
);
111 result
= _simd_div_ps(result
, _simd_set1_ps(quantize
));
115 result
= _simd_mul_ps(result
, _simd_set1_ps(1.0f
/ quantize
));
122 simdscalar
DepthStencilTest(const API_STATE
* pState
,
124 uint32_t viewportIndex
,
125 simdscalar
const& iZ
,
127 simdscalar
const& coverageMask
,
128 uint8_t* pStencilBase
,
129 simdscalar
* pStencilMask
)
131 static_assert(KNOB_DEPTH_HOT_TILE_FORMAT
== R32_FLOAT
, "Unsupported depth hot tile format");
132 static_assert(KNOB_STENCIL_HOT_TILE_FORMAT
== R8_UINT
, "Unsupported stencil hot tile format");
134 const SWR_DEPTH_STENCIL_STATE
* pDSState
= &pState
->depthStencilState
;
135 const SWR_VIEWPORT
* pViewport
= &pState
->vp
[viewportIndex
];
137 simdscalar depthResult
= _simd_set1_ps(-1.0f
);
140 // clamp Z to viewport [minZ..maxZ]
141 simdscalar vMinZ
= _simd_broadcast_ss(&pViewport
->minZ
);
142 simdscalar vMaxZ
= _simd_broadcast_ss(&pViewport
->maxZ
);
143 simdscalar interpZ
= _simd_min_ps(vMaxZ
, _simd_max_ps(vMinZ
, iZ
));
145 if (pDSState
->depthTestEnable
)
147 switch (pDSState
->depthTestFunc
)
150 depthResult
= _simd_setzero_ps();
155 zbuf
= _simd_load_ps((const float*)pDepthBase
);
158 switch (pDSState
->depthTestFunc
)
161 depthResult
= _simd_cmple_ps(interpZ
, zbuf
);
164 depthResult
= _simd_cmplt_ps(interpZ
, zbuf
);
167 depthResult
= _simd_cmpgt_ps(interpZ
, zbuf
);
170 depthResult
= _simd_cmpge_ps(interpZ
, zbuf
);
173 depthResult
= _simd_cmpeq_ps(interpZ
, zbuf
);
176 depthResult
= _simd_cmpneq_ps(interpZ
, zbuf
);
181 simdscalar stencilMask
= _simd_set1_ps(-1.0f
);
183 if (pDSState
->stencilTestEnable
)
185 uint8_t stencilRefValue
;
186 uint32_t stencilTestFunc
;
187 uint8_t stencilTestMask
;
188 if (frontFacing
|| !pDSState
->doubleSidedStencilTestEnable
)
190 stencilRefValue
= pDSState
->stencilRefValue
;
191 stencilTestFunc
= pDSState
->stencilTestFunc
;
192 stencilTestMask
= pDSState
->stencilTestMask
;
196 stencilRefValue
= pDSState
->backfaceStencilRefValue
;
197 stencilTestFunc
= pDSState
->backfaceStencilTestFunc
;
198 stencilTestMask
= pDSState
->backfaceStencilTestMask
;
202 simdscalar stencilWithMask
;
203 simdscalar stencilRef
;
204 switch (stencilTestFunc
)
207 stencilMask
= _simd_setzero_ps();
212 LoadSOA
<R8_UINT
>(pStencilBase
, sbuf
);
214 // apply stencil read mask
215 stencilWithMask
= _simd_castsi_ps(
216 _simd_and_si(_simd_castps_si(sbuf
.v
[0]), _simd_set1_epi32(stencilTestMask
)));
218 // do stencil compare in float to avoid simd integer emulation in AVX1
219 stencilWithMask
= _simd_cvtepi32_ps(_simd_castps_si(stencilWithMask
));
221 stencilRef
= _simd_set1_ps((float)(stencilRefValue
& stencilTestMask
));
225 switch (stencilTestFunc
)
228 stencilMask
= _simd_cmple_ps(stencilRef
, stencilWithMask
);
231 stencilMask
= _simd_cmplt_ps(stencilRef
, stencilWithMask
);
234 stencilMask
= _simd_cmpgt_ps(stencilRef
, stencilWithMask
);
237 stencilMask
= _simd_cmpge_ps(stencilRef
, stencilWithMask
);
240 stencilMask
= _simd_cmpeq_ps(stencilRef
, stencilWithMask
);
243 stencilMask
= _simd_cmpneq_ps(stencilRef
, stencilWithMask
);
248 simdscalar depthWriteMask
= _simd_and_ps(depthResult
, stencilMask
);
249 depthWriteMask
= _simd_and_ps(depthWriteMask
, coverageMask
);
251 *pStencilMask
= stencilMask
;
252 return depthWriteMask
;
256 void DepthStencilWrite(const SWR_VIEWPORT
* pViewport
,
257 const SWR_DEPTH_STENCIL_STATE
* pDSState
,
259 simdscalar
const& iZ
,
261 const simdscalar
& depthMask
,
262 const simdscalar
& coverageMask
,
263 uint8_t* pStencilBase
,
264 const simdscalar
& stencilMask
)
266 if (pDSState
->depthWriteEnable
)
268 // clamp Z to viewport [minZ..maxZ]
269 simdscalar vMinZ
= _simd_broadcast_ss(&pViewport
->minZ
);
270 simdscalar vMaxZ
= _simd_broadcast_ss(&pViewport
->maxZ
);
271 simdscalar interpZ
= _simd_min_ps(vMaxZ
, _simd_max_ps(vMinZ
, iZ
));
273 simdscalar vMask
= _simd_and_ps(depthMask
, coverageMask
);
274 _simd_maskstore_ps((float*)pDepthBase
, _simd_castps_si(vMask
), interpZ
);
277 if (pDSState
->stencilWriteEnable
)
280 LoadSOA
<R8_UINT
>(pStencilBase
, sbuf
);
281 simdscalar stencilbuf
= sbuf
.v
[0];
283 uint8_t stencilRefValue
;
284 uint32_t stencilFailOp
;
285 uint32_t stencilPassDepthPassOp
;
286 uint32_t stencilPassDepthFailOp
;
287 uint8_t stencilWriteMask
;
288 if (frontFacing
|| !pDSState
->doubleSidedStencilTestEnable
)
290 stencilRefValue
= pDSState
->stencilRefValue
;
291 stencilFailOp
= pDSState
->stencilFailOp
;
292 stencilPassDepthPassOp
= pDSState
->stencilPassDepthPassOp
;
293 stencilPassDepthFailOp
= pDSState
->stencilPassDepthFailOp
;
294 stencilWriteMask
= pDSState
->stencilWriteMask
;
298 stencilRefValue
= pDSState
->backfaceStencilRefValue
;
299 stencilFailOp
= pDSState
->backfaceStencilFailOp
;
300 stencilPassDepthPassOp
= pDSState
->backfaceStencilPassDepthPassOp
;
301 stencilPassDepthFailOp
= pDSState
->backfaceStencilPassDepthFailOp
;
302 stencilWriteMask
= pDSState
->backfaceStencilWriteMask
;
305 simdscalar stencilps
= stencilbuf
;
306 simdscalar stencilRefps
= _simd_castsi_ps(_simd_set1_epi32(stencilRefValue
));
308 simdscalar stencilFailMask
= _simd_andnot_ps(stencilMask
, coverageMask
);
309 simdscalar stencilPassDepthPassMask
= _simd_and_ps(stencilMask
, depthMask
);
310 simdscalar stencilPassDepthFailMask
=
311 _simd_and_ps(stencilMask
, _simd_andnot_ps(depthMask
, _simd_set1_ps(-1)));
313 simdscalar origStencil
= stencilps
;
315 StencilOp((SWR_STENCILOP
)stencilFailOp
, stencilFailMask
, stencilRefps
, stencilps
);
316 StencilOp((SWR_STENCILOP
)stencilPassDepthFailOp
,
317 stencilPassDepthFailMask
,
320 StencilOp((SWR_STENCILOP
)stencilPassDepthPassOp
,
321 stencilPassDepthPassMask
,
325 // apply stencil write mask
326 simdscalari vWriteMask
= _simd_set1_epi32(stencilWriteMask
);
327 stencilps
= _simd_and_ps(stencilps
, _simd_castsi_ps(vWriteMask
));
329 _simd_or_ps(_simd_andnot_ps(_simd_castsi_ps(vWriteMask
), origStencil
), stencilps
);
331 simdvector stencilResult
;
332 stencilResult
.v
[0] = _simd_blendv_ps(origStencil
, stencilps
, coverageMask
);
333 StoreSOA
<R8_UINT
>(stencilResult
, pStencilBase
);