1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Implementation for blending operations.
27 ******************************************************************************/
30 template<bool Color
, bool Alpha
>
32 void GenerateBlendFactor(SWR_BLEND_FACTOR func
, simdvector
&constantColor
, simdvector
&src
, simdvector
&src1
, simdvector
&dst
, simdvector
&out
)
38 case BLENDFACTOR_ZERO
:
39 result
.x
= _simd_setzero_ps();
40 result
.y
= _simd_setzero_ps();
41 result
.z
= _simd_setzero_ps();
42 result
.w
= _simd_setzero_ps();
46 result
.x
= _simd_set1_ps(1.0);
47 result
.y
= _simd_set1_ps(1.0);
48 result
.z
= _simd_set1_ps(1.0);
49 result
.w
= _simd_set1_ps(1.0);
52 case BLENDFACTOR_SRC_COLOR
:
56 case BLENDFACTOR_DST_COLOR
:
60 case BLENDFACTOR_INV_SRC_COLOR
:
61 result
.x
= _simd_sub_ps(_simd_set1_ps(1.0), src
.x
);
62 result
.y
= _simd_sub_ps(_simd_set1_ps(1.0), src
.y
);
63 result
.z
= _simd_sub_ps(_simd_set1_ps(1.0), src
.z
);
64 result
.w
= _simd_sub_ps(_simd_set1_ps(1.0), src
.w
);
67 case BLENDFACTOR_INV_DST_COLOR
:
68 result
.x
= _simd_sub_ps(_simd_set1_ps(1.0), dst
.x
);
69 result
.y
= _simd_sub_ps(_simd_set1_ps(1.0), dst
.y
);
70 result
.z
= _simd_sub_ps(_simd_set1_ps(1.0), dst
.z
);
71 result
.w
= _simd_sub_ps(_simd_set1_ps(1.0), dst
.w
);
74 case BLENDFACTOR_SRC_ALPHA
: result
.x
= src
.w
;
80 case BLENDFACTOR_INV_SRC_ALPHA
:
82 simdscalar oneMinusSrcA
= _simd_sub_ps(_simd_set1_ps(1.0), src
.w
);
83 result
.x
= oneMinusSrcA
;
84 result
.y
= oneMinusSrcA
;
85 result
.z
= oneMinusSrcA
;
86 result
.w
= oneMinusSrcA
;
90 case BLENDFACTOR_DST_ALPHA
: result
.x
= dst
.w
;
96 case BLENDFACTOR_INV_DST_ALPHA
:
98 simdscalar oneMinusDstA
= _simd_sub_ps(_simd_set1_ps(1.0), dst
.w
);
99 result
.x
= oneMinusDstA
;
100 result
.y
= oneMinusDstA
;
101 result
.z
= oneMinusDstA
;
102 result
.w
= oneMinusDstA
;
106 case BLENDFACTOR_SRC_ALPHA_SATURATE
:
108 simdscalar sat
= _simd_min_ps(src
.w
, _simd_sub_ps(_simd_set1_ps(1.0), dst
.w
));
112 result
.w
= _simd_set1_ps(1.0);
116 case BLENDFACTOR_CONST_COLOR
:
117 result
.x
= constantColor
[0];
118 result
.y
= constantColor
[1];
119 result
.z
= constantColor
[2];
120 result
.w
= constantColor
[3];
123 case BLENDFACTOR_CONST_ALPHA
:
124 result
.x
= result
.y
= result
.z
= result
.w
= constantColor
[3];
127 case BLENDFACTOR_INV_CONST_COLOR
:
129 result
.x
= _simd_sub_ps(_simd_set1_ps(1.0f
), constantColor
[0]);
130 result
.y
= _simd_sub_ps(_simd_set1_ps(1.0f
), constantColor
[1]);
131 result
.z
= _simd_sub_ps(_simd_set1_ps(1.0f
), constantColor
[2]);
132 result
.w
= _simd_sub_ps(_simd_set1_ps(1.0f
), constantColor
[3]);
136 case BLENDFACTOR_INV_CONST_ALPHA
:
138 result
.x
= result
.y
= result
.z
= result
.w
= _simd_sub_ps(_simd_set1_ps(1.0f
), constantColor
[3]);
142 case BLENDFACTOR_SRC1_COLOR
:
149 case BLENDFACTOR_SRC1_ALPHA
:
150 result
.x
= result
.y
= result
.z
= result
.w
= src1
.w
;
153 case BLENDFACTOR_INV_SRC1_COLOR
:
154 result
.x
= _simd_sub_ps(_simd_set1_ps(1.0f
), src1
.x
);
155 result
.y
= _simd_sub_ps(_simd_set1_ps(1.0f
), src1
.y
);
156 result
.z
= _simd_sub_ps(_simd_set1_ps(1.0f
), src1
.z
);
157 result
.w
= _simd_sub_ps(_simd_set1_ps(1.0f
), src1
.w
);
160 case BLENDFACTOR_INV_SRC1_ALPHA
:
161 result
.x
= result
.y
= result
.z
= result
.w
= _simd_sub_ps(_simd_set1_ps(1.0f
), src1
.w
);
164 default: SWR_INVALID("Unimplemented blend factor: %d", func
);
180 template<bool Color
, bool Alpha
>
181 INLINE
void BlendFunc(SWR_BLEND_OP blendOp
, simdvector
&src
, simdvector
&srcFactor
, simdvector
&dst
, simdvector
&dstFactor
, simdvector
&out
)
188 result
.x
= _simd_fmadd_ps(srcFactor
.x
, src
.x
, _simd_mul_ps(dstFactor
.x
, dst
.x
));
189 result
.y
= _simd_fmadd_ps(srcFactor
.y
, src
.y
, _simd_mul_ps(dstFactor
.y
, dst
.y
));
190 result
.z
= _simd_fmadd_ps(srcFactor
.z
, src
.z
, _simd_mul_ps(dstFactor
.z
, dst
.z
));
191 result
.w
= _simd_fmadd_ps(srcFactor
.w
, src
.w
, _simd_mul_ps(dstFactor
.w
, dst
.w
));
194 case BLENDOP_SUBTRACT
:
195 result
.x
= _simd_fmsub_ps(srcFactor
.x
, src
.x
, _simd_mul_ps(dstFactor
.x
, dst
.x
));
196 result
.y
= _simd_fmsub_ps(srcFactor
.y
, src
.y
, _simd_mul_ps(dstFactor
.y
, dst
.y
));
197 result
.z
= _simd_fmsub_ps(srcFactor
.z
, src
.z
, _simd_mul_ps(dstFactor
.z
, dst
.z
));
198 result
.w
= _simd_fmsub_ps(srcFactor
.w
, src
.w
, _simd_mul_ps(dstFactor
.w
, dst
.w
));
201 case BLENDOP_REVSUBTRACT
:
202 result
.x
= _simd_fmsub_ps(dstFactor
.x
, dst
.x
, _simd_mul_ps(srcFactor
.x
, src
.x
));
203 result
.y
= _simd_fmsub_ps(dstFactor
.y
, dst
.y
, _simd_mul_ps(srcFactor
.y
, src
.y
));
204 result
.z
= _simd_fmsub_ps(dstFactor
.z
, dst
.z
, _simd_mul_ps(srcFactor
.z
, src
.z
));
205 result
.w
= _simd_fmsub_ps(dstFactor
.w
, dst
.w
, _simd_mul_ps(srcFactor
.w
, src
.w
));
209 result
.x
= _simd_min_ps(_simd_mul_ps(srcFactor
.x
, src
.x
), _simd_mul_ps(dstFactor
.x
, dst
.x
));
210 result
.y
= _simd_min_ps(_simd_mul_ps(srcFactor
.y
, src
.y
), _simd_mul_ps(dstFactor
.y
, dst
.y
));
211 result
.z
= _simd_min_ps(_simd_mul_ps(srcFactor
.z
, src
.z
), _simd_mul_ps(dstFactor
.z
, dst
.z
));
212 result
.w
= _simd_min_ps(_simd_mul_ps(srcFactor
.w
, src
.w
), _simd_mul_ps(dstFactor
.w
, dst
.w
));
216 result
.x
= _simd_max_ps(_simd_mul_ps(srcFactor
.x
, src
.x
), _simd_mul_ps(dstFactor
.x
, dst
.x
));
217 result
.y
= _simd_max_ps(_simd_mul_ps(srcFactor
.y
, src
.y
), _simd_mul_ps(dstFactor
.y
, dst
.y
));
218 result
.z
= _simd_max_ps(_simd_mul_ps(srcFactor
.z
, src
.z
), _simd_mul_ps(dstFactor
.z
, dst
.z
));
219 result
.w
= _simd_max_ps(_simd_mul_ps(srcFactor
.w
, src
.w
), _simd_mul_ps(dstFactor
.w
, dst
.w
));
223 SWR_INVALID("Unimplemented blend function: %d", blendOp
);
238 template<SWR_TYPE type
>
239 INLINE
void Clamp(simdvector
&src
)
247 src
.x
= _simd_max_ps(src
.x
, _simd_setzero_ps());
248 src
.x
= _simd_min_ps(src
.x
, _simd_set1_ps(1.0f
));
250 src
.y
= _simd_max_ps(src
.y
, _simd_setzero_ps());
251 src
.y
= _simd_min_ps(src
.y
, _simd_set1_ps(1.0f
));
253 src
.z
= _simd_max_ps(src
.z
, _simd_setzero_ps());
254 src
.z
= _simd_min_ps(src
.z
, _simd_set1_ps(1.0f
));
256 src
.w
= _simd_max_ps(src
.w
, _simd_setzero_ps());
257 src
.w
= _simd_min_ps(src
.w
, _simd_set1_ps(1.0f
));
261 src
.x
= _simd_max_ps(src
.x
, _simd_set1_ps(-1.0f
));
262 src
.x
= _simd_min_ps(src
.x
, _simd_set1_ps(1.0f
));
264 src
.y
= _simd_max_ps(src
.y
, _simd_set1_ps(-1.0f
));
265 src
.y
= _simd_min_ps(src
.y
, _simd_set1_ps(1.0f
));
267 src
.z
= _simd_max_ps(src
.z
, _simd_set1_ps(-1.0f
));
268 src
.z
= _simd_min_ps(src
.z
, _simd_set1_ps(1.0f
));
270 src
.w
= _simd_max_ps(src
.w
, _simd_set1_ps(-1.0f
));
271 src
.w
= _simd_min_ps(src
.w
, _simd_set1_ps(1.0f
));
275 SWR_INVALID("Unimplemented clamp: %d", type
);
280 template<SWR_TYPE type
>
281 void Blend(const SWR_BLEND_STATE
*pBlendState
, const SWR_RENDER_TARGET_BLEND_STATE
*pState
, simdvector
&src
, simdvector
& src1
, uint8_t *pDst
, simdvector
&result
)
283 // load render target
285 LoadSOA
<KNOB_COLOR_HOT_TILE_FORMAT
>(pDst
, dst
);
287 simdvector constColor
;
288 constColor
.x
= _simd_broadcast_ss(&pBlendState
->constantColor
[0]);
289 constColor
.y
= _simd_broadcast_ss(&pBlendState
->constantColor
[1]);
290 constColor
.z
= _simd_broadcast_ss(&pBlendState
->constantColor
[2]);
291 constColor
.w
= _simd_broadcast_ss(&pBlendState
->constantColor
[3]);
293 // clamp src/dst/constant
297 Clamp
<type
>(constColor
);
299 simdvector srcFactor
, dstFactor
;
300 if (pBlendState
->independentAlphaBlendEnable
)
302 GenerateBlendFactor
<true, false>((SWR_BLEND_FACTOR
)pState
->sourceBlendFactor
, constColor
, src
, src1
, dst
, srcFactor
);
303 GenerateBlendFactor
<false, true>((SWR_BLEND_FACTOR
)pState
->sourceAlphaBlendFactor
, constColor
, src
, src1
, dst
, srcFactor
);
305 GenerateBlendFactor
<true, false>((SWR_BLEND_FACTOR
)pState
->destBlendFactor
, constColor
, src
, src1
, dst
, dstFactor
);
306 GenerateBlendFactor
<false, true>((SWR_BLEND_FACTOR
)pState
->destAlphaBlendFactor
, constColor
, src
, src1
, dst
, dstFactor
);
308 BlendFunc
<true, false>((SWR_BLEND_OP
)pState
->colorBlendFunc
, src
, srcFactor
, dst
, dstFactor
, result
);
309 BlendFunc
<false, true>((SWR_BLEND_OP
)pState
->alphaBlendFunc
, src
, srcFactor
, dst
, dstFactor
, result
);
313 GenerateBlendFactor
<true, true>((SWR_BLEND_FACTOR
)pState
->sourceBlendFactor
, constColor
, src
, src1
, dst
, srcFactor
);
314 GenerateBlendFactor
<true, true>((SWR_BLEND_FACTOR
)pState
->destBlendFactor
, constColor
, src
, src1
, dst
, dstFactor
);
316 BlendFunc
<true, true>((SWR_BLEND_OP
)pState
->colorBlendFunc
, src
, srcFactor
, dst
, dstFactor
, result
);