1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * @file depthstencil.h
25 * @brief Implements depth/stencil functionality
27 ******************************************************************************/
29 #include "common/os.h"
30 #include "format_conversion.h"
33 void StencilOp(SWR_STENCILOP op
, simdscalar mask
, simdscalar stencilRefps
, simdscalar
&stencilps
)
35 simdscalari stencil
= _simd_castps_si(stencilps
);
42 stencilps
= _simd_blendv_ps(stencilps
, _simd_setzero_ps(), mask
);
44 case STENCILOP_REPLACE
:
45 stencilps
= _simd_blendv_ps(stencilps
, stencilRefps
, mask
);
47 case STENCILOP_INCRSAT
:
49 simdscalari stencilincr
= _simd_adds_epu8(stencil
, _simd_set1_epi32(1));
50 stencilps
= _simd_blendv_ps(stencilps
, _simd_castsi_ps(stencilincr
), mask
);
53 case STENCILOP_DECRSAT
:
55 simdscalari stencildecr
= _simd_subs_epu8(stencil
, _simd_set1_epi32(1));
56 stencilps
= _simd_blendv_ps(stencilps
, _simd_castsi_ps(stencildecr
), mask
);
61 simdscalari stencilincr
= _simd_add_epi8(stencil
, _simd_set1_epi32(1));
62 stencilps
= _simd_blendv_ps(stencilps
, _simd_castsi_ps(stencilincr
), mask
);
67 simdscalari stencildecr
= _simd_add_epi8(stencil
, _simd_set1_epi32((-1) & 0xff));
68 stencilps
= _simd_blendv_ps(stencilps
, _simd_castsi_ps(stencildecr
), mask
);
71 case STENCILOP_INVERT
:
73 simdscalar stencilinvert
= _simd_andnot_ps(stencilps
, _simd_cmpeq_ps(_simd_setzero_ps(), _simd_setzero_ps()));
74 stencilps
= _simd_blendv_ps(stencilps
, stencilinvert
, mask
);
83 template<SWR_FORMAT depthFormatT
>
84 simdscalar
QuantizeDepth(simdscalar depth
)
86 SWR_TYPE depthType
= FormatTraits
<depthFormatT
>::GetType(0);
87 uint32_t depthBpc
= FormatTraits
<depthFormatT
>::GetBPC(0);
89 if (depthType
== SWR_TYPE_FLOAT
)
91 // assume only 32bit float depth supported
92 SWR_ASSERT(depthBpc
== 32);
94 // matches shader precision, no quantizing needed
98 // should be unorm depth if not float
99 SWR_ASSERT(depthType
== SWR_TYPE_UNORM
);
101 float quantize
= (float)((1 << depthBpc
) - 1);
102 simdscalar result
= _simd_mul_ps(depth
, _simd_set1_ps(quantize
));
103 result
= _simd_add_ps(result
, _simd_set1_ps(0.5f
));
104 result
= _simd_round_ps(result
, _MM_FROUND_TO_ZERO
);
108 result
= _simd_div_ps(result
, _simd_set1_ps(quantize
));
112 result
= _simd_mul_ps(result
, _simd_set1_ps(1.0f
/ quantize
));
119 simdscalar
DepthStencilTest(const API_STATE
* pState
,
120 bool frontFacing
, simdscalar interpZ
, uint8_t* pDepthBase
, simdscalar coverageMask
, uint8_t *pStencilBase
,
121 simdscalar
* pStencilMask
)
123 static_assert(KNOB_DEPTH_HOT_TILE_FORMAT
== R32_FLOAT
, "Unsupported depth hot tile format");
124 static_assert(KNOB_STENCIL_HOT_TILE_FORMAT
== R8_UINT
, "Unsupported stencil hot tile format");
126 const SWR_DEPTH_STENCIL_STATE
* pDSState
= &pState
->depthStencilState
;
127 const SWR_VIEWPORT
* pViewport
= &pState
->vp
[0];
129 simdscalar depthResult
= _simd_set1_ps(-1.0f
);
132 // clamp Z to viewport [minZ..maxZ]
133 simdscalar vMinZ
= _simd_broadcast_ss(&pViewport
->minZ
);
134 simdscalar vMaxZ
= _simd_broadcast_ss(&pViewport
->maxZ
);
135 interpZ
= _simd_min_ps(vMaxZ
, _simd_max_ps(vMinZ
, interpZ
));
137 if (pDSState
->depthTestEnable
)
139 switch (pDSState
->depthTestFunc
)
141 case ZFUNC_NEVER
: depthResult
= _simd_setzero_ps(); break;
142 case ZFUNC_ALWAYS
: break;
144 zbuf
= _simd_load_ps((const float*)pDepthBase
);
147 switch (pDSState
->depthTestFunc
)
149 case ZFUNC_LE
: depthResult
= _simd_cmple_ps(interpZ
, zbuf
); break;
150 case ZFUNC_LT
: depthResult
= _simd_cmplt_ps(interpZ
, zbuf
); break;
151 case ZFUNC_GT
: depthResult
= _simd_cmpgt_ps(interpZ
, zbuf
); break;
152 case ZFUNC_GE
: depthResult
= _simd_cmpge_ps(interpZ
, zbuf
); break;
153 case ZFUNC_EQ
: depthResult
= _simd_cmpeq_ps(interpZ
, zbuf
); break;
154 case ZFUNC_NE
: depthResult
= _simd_cmpneq_ps(interpZ
, zbuf
); break;
158 simdscalar stencilMask
= _simd_set1_ps(-1.0f
);
160 if (pDSState
->stencilTestEnable
)
162 uint8_t stencilRefValue
;
163 uint32_t stencilTestFunc
;
164 uint8_t stencilTestMask
;
165 if (frontFacing
|| !pDSState
->doubleSidedStencilTestEnable
)
167 stencilRefValue
= pDSState
->stencilRefValue
;
168 stencilTestFunc
= pDSState
->stencilTestFunc
;
169 stencilTestMask
= pDSState
->stencilTestMask
;
173 stencilRefValue
= pDSState
->backfaceStencilRefValue
;
174 stencilTestFunc
= pDSState
->backfaceStencilTestFunc
;
175 stencilTestMask
= pDSState
->backfaceStencilTestMask
;
179 simdscalar stencilWithMask
;
180 simdscalar stencilRef
;
181 switch(stencilTestFunc
)
183 case ZFUNC_NEVER
: stencilMask
= _simd_setzero_ps(); break;
184 case ZFUNC_ALWAYS
: break;
186 LoadSOA
<R8_UINT
>(pStencilBase
, sbuf
);
188 // apply stencil read mask
189 stencilWithMask
= _simd_castsi_ps(_simd_and_si(_simd_castps_si(sbuf
.v
[0]), _simd_set1_epi32(stencilTestMask
)));
191 // do stencil compare in float to avoid simd integer emulation in AVX1
192 stencilWithMask
= _simd_cvtepi32_ps(_simd_castps_si(stencilWithMask
));
194 stencilRef
= _simd_set1_ps((float)(stencilRefValue
& stencilTestMask
));
198 switch(stencilTestFunc
)
200 case ZFUNC_LE
: stencilMask
= _simd_cmple_ps(stencilRef
, stencilWithMask
); break;
201 case ZFUNC_LT
: stencilMask
= _simd_cmplt_ps(stencilRef
, stencilWithMask
); break;
202 case ZFUNC_GT
: stencilMask
= _simd_cmpgt_ps(stencilRef
, stencilWithMask
); break;
203 case ZFUNC_GE
: stencilMask
= _simd_cmpge_ps(stencilRef
, stencilWithMask
); break;
204 case ZFUNC_EQ
: stencilMask
= _simd_cmpeq_ps(stencilRef
, stencilWithMask
); break;
205 case ZFUNC_NE
: stencilMask
= _simd_cmpneq_ps(stencilRef
, stencilWithMask
); break;
209 simdscalar depthWriteMask
= _simd_and_ps(depthResult
, stencilMask
);
210 depthWriteMask
= _simd_and_ps(depthWriteMask
, coverageMask
);
212 *pStencilMask
= stencilMask
;
213 return depthWriteMask
;
217 void DepthStencilWrite(const SWR_VIEWPORT
* pViewport
, const SWR_DEPTH_STENCIL_STATE
* pDSState
,
218 bool frontFacing
, simdscalar interpZ
, uint8_t* pDepthBase
, const simdscalar
& depthMask
, const simdscalar
& coverageMask
,
219 uint8_t *pStencilBase
, const simdscalar
& stencilMask
)
221 if (pDSState
->depthWriteEnable
)
223 // clamp Z to viewport [minZ..maxZ]
224 simdscalar vMinZ
= _simd_broadcast_ss(&pViewport
->minZ
);
225 simdscalar vMaxZ
= _simd_broadcast_ss(&pViewport
->maxZ
);
226 interpZ
= _simd_min_ps(vMaxZ
, _simd_max_ps(vMinZ
, interpZ
));
228 simdscalar vMask
= _simd_and_ps(depthMask
, coverageMask
);
229 _simd_maskstore_ps((float*)pDepthBase
, _simd_castps_si(vMask
), interpZ
);
232 if (pDSState
->stencilWriteEnable
)
235 LoadSOA
<R8_UINT
>(pStencilBase
, sbuf
);
236 simdscalar stencilbuf
= sbuf
.v
[0];
238 uint8_t stencilRefValue
;
239 uint32_t stencilFailOp
;
240 uint32_t stencilPassDepthPassOp
;
241 uint32_t stencilPassDepthFailOp
;
242 uint8_t stencilWriteMask
;
243 if (frontFacing
|| !pDSState
->doubleSidedStencilTestEnable
)
245 stencilRefValue
= pDSState
->stencilRefValue
;
246 stencilFailOp
= pDSState
->stencilFailOp
;
247 stencilPassDepthPassOp
= pDSState
->stencilPassDepthPassOp
;
248 stencilPassDepthFailOp
= pDSState
->stencilPassDepthFailOp
;
249 stencilWriteMask
= pDSState
->stencilWriteMask
;
253 stencilRefValue
= pDSState
->backfaceStencilRefValue
;
254 stencilFailOp
= pDSState
->backfaceStencilFailOp
;
255 stencilPassDepthPassOp
= pDSState
->backfaceStencilPassDepthPassOp
;
256 stencilPassDepthFailOp
= pDSState
->backfaceStencilPassDepthFailOp
;
257 stencilWriteMask
= pDSState
->backfaceStencilWriteMask
;
260 simdscalar stencilps
= stencilbuf
;
261 simdscalar stencilRefps
= _simd_castsi_ps(_simd_set1_epi32(stencilRefValue
));
263 simdscalar stencilFailMask
= _simd_andnot_ps(stencilMask
, coverageMask
);
264 simdscalar stencilPassDepthPassMask
= _simd_and_ps(stencilMask
, depthMask
);
265 simdscalar stencilPassDepthFailMask
= _simd_and_ps(stencilMask
, _simd_andnot_ps(depthMask
, _simd_set1_ps(-1)));
267 simdscalar origStencil
= stencilps
;
269 StencilOp((SWR_STENCILOP
)stencilFailOp
, stencilFailMask
, stencilRefps
, stencilps
);
270 StencilOp((SWR_STENCILOP
)stencilPassDepthFailOp
, stencilPassDepthFailMask
, stencilRefps
, stencilps
);
271 StencilOp((SWR_STENCILOP
)stencilPassDepthPassOp
, stencilPassDepthPassMask
, stencilRefps
, stencilps
);
273 // apply stencil write mask
274 simdscalari vWriteMask
= _simd_set1_epi32(stencilWriteMask
);
275 stencilps
= _simd_and_ps(stencilps
, _simd_castsi_ps(vWriteMask
));
276 stencilps
= _simd_or_ps(_simd_andnot_ps(_simd_castsi_ps(vWriteMask
), origStencil
), stencilps
);
278 simdvector stencilResult
;
279 stencilResult
.v
[0] = _simd_blendv_ps(origStencil
, stencilps
, coverageMask
);
280 StoreSOA
<R8_UINT
>(stencilResult
, pStencilBase
);