1 /**************************************************************************
3 * Copyright 2012 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "pipe/p_state.h"
29 #include "util/u_debug.h"
31 #include "gallivm/lp_bld_type.h"
32 #include "gallivm/lp_bld_arit.h"
33 #include "gallivm/lp_bld_const.h"
34 #include "gallivm/lp_bld_logic.h"
35 #include "gallivm/lp_bld_swizzle.h"
36 #include "gallivm/lp_bld_flow.h"
37 #include "gallivm/lp_bld_debug.h"
38 #include "gallivm/lp_bld_pack.h"
40 #include "lp_bld_blend.h"
43 * Is (a OP b) == (b OP a)?
46 lp_build_blend_func_commutative(unsigned func
)
53 case PIPE_BLEND_SUBTRACT
:
54 case PIPE_BLEND_REVERSE_SUBTRACT
:
64 * Whether the blending functions are the reverse of each other.
67 lp_build_blend_func_reverse(unsigned rgb_func
, unsigned alpha_func
)
69 if (rgb_func
== alpha_func
)
71 if (rgb_func
== PIPE_BLEND_SUBTRACT
&& alpha_func
== PIPE_BLEND_REVERSE_SUBTRACT
)
73 if (rgb_func
== PIPE_BLEND_REVERSE_SUBTRACT
&& alpha_func
== PIPE_BLEND_SUBTRACT
)
80 * Whether the blending factors are complementary of each other.
83 lp_build_blend_factor_complementary(unsigned src_factor
, unsigned dst_factor
)
85 STATIC_ASSERT((PIPE_BLENDFACTOR_ZERO
^ 0x10) == PIPE_BLENDFACTOR_ONE
);
86 STATIC_ASSERT((PIPE_BLENDFACTOR_CONST_COLOR
^ 0x10) ==
87 PIPE_BLENDFACTOR_INV_CONST_COLOR
);
88 return dst_factor
== (src_factor
^ 0x10);
93 * Whether this is a inverse blend factor
96 is_inverse_factor(unsigned factor
)
98 STATIC_ASSERT(PIPE_BLENDFACTOR_ZERO
== 0x11);
104 * Calculates the (expanded to wider type) multiplication
105 * of 2 normalized numbers.
108 lp_build_mul_norm_expand(struct lp_build_context
*bld
,
109 LLVMValueRef a
, LLVMValueRef b
,
110 LLVMValueRef
*resl
, LLVMValueRef
*resh
,
111 boolean signedness_differs
)
113 const struct lp_type type
= bld
->type
;
114 struct lp_type wide_type
= lp_wider_type(type
);
115 struct lp_type wide_type2
= wide_type
;
116 struct lp_type type2
= type
;
117 LLVMValueRef al
, ah
, bl
, bh
;
119 assert(lp_check_value(type
, a
));
120 assert(lp_check_value(type
, b
));
121 assert(!type
.floating
&& !type
.fixed
&& type
.norm
);
123 if (a
== bld
->zero
|| b
== bld
->zero
) {
124 LLVMValueRef zero
= LLVMConstNull(lp_build_vec_type(bld
->gallivm
, wide_type
));
130 if (signedness_differs
) {
131 type2
.sign
= !type
.sign
;
132 wide_type2
.sign
= !wide_type2
.sign
;
135 lp_build_unpack2_native(bld
->gallivm
, type
, wide_type
, a
, &al
, &ah
);
136 lp_build_unpack2_native(bld
->gallivm
, type2
, wide_type2
, b
, &bl
, &bh
);
138 *resl
= lp_build_mul_norm(bld
->gallivm
, wide_type
, al
, bl
);
139 *resh
= lp_build_mul_norm(bld
->gallivm
, wide_type
, ah
, bh
);
144 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml
147 lp_build_blend_func(struct lp_build_context
*bld
,
154 return lp_build_add(bld
, term1
, term2
);
155 case PIPE_BLEND_SUBTRACT
:
156 return lp_build_sub(bld
, term1
, term2
);
157 case PIPE_BLEND_REVERSE_SUBTRACT
:
158 return lp_build_sub(bld
, term2
, term1
);
160 return lp_build_min(bld
, term1
, term2
);
162 return lp_build_max(bld
, term1
, term2
);
171 * Performs optimisations and blending independent of SoA/AoS
173 * @param func the blend function
174 * @param factor_src PIPE_BLENDFACTOR_xxx
175 * @param factor_dst PIPE_BLENDFACTOR_xxx
176 * @param src source rgba
177 * @param dst dest rgba
178 * @param src_factor src factor computed value
179 * @param dst_factor dst factor computed value
180 * @param not_alpha_dependent same factors accross all channels of src/dst
182 * not_alpha_dependent should be:
183 * SoA: always true as it is only one channel at a time
184 * AoS: rgb_src_factor == alpha_src_factor && rgb_dst_factor == alpha_dst_factor
186 * Note that pretty much every possible optimisation can only be done on non-unorm targets
187 * due to unorm values not going above 1.0 meaning factorisation can change results.
188 * e.g. (0.9 * 0.9) + (0.9 * 0.9) != 0.9 * (0.9 + 0.9) as result of + is always <= 1.
191 lp_build_blend(struct lp_build_context
*bld
,
197 LLVMValueRef src_factor
,
198 LLVMValueRef dst_factor
,
199 boolean not_alpha_dependent
,
200 boolean optimise_only
)
202 LLVMValueRef result
, src_term
, dst_term
;
204 /* If we are not alpha dependent we can mess with the src/dst factors */
205 if (not_alpha_dependent
) {
206 if (lp_build_blend_factor_complementary(factor_src
, factor_dst
)) {
207 if (func
== PIPE_BLEND_ADD
) {
208 if (factor_src
< factor_dst
) {
209 return lp_build_lerp(bld
, src_factor
, dst
, src
, 0);
211 return lp_build_lerp(bld
, dst_factor
, src
, dst
, 0);
213 } else if (bld
->type
.floating
&& func
== PIPE_BLEND_SUBTRACT
) {
214 result
= lp_build_add(bld
, src
, dst
);
216 if (factor_src
< factor_dst
) {
217 result
= lp_build_mul(bld
, result
, src_factor
);
218 return lp_build_sub(bld
, result
, dst
);
220 result
= lp_build_mul(bld
, result
, dst_factor
);
221 return lp_build_sub(bld
, src
, result
);
223 } else if (bld
->type
.floating
&& func
== PIPE_BLEND_REVERSE_SUBTRACT
) {
224 result
= lp_build_add(bld
, src
, dst
);
226 if (factor_src
< factor_dst
) {
227 result
= lp_build_mul(bld
, result
, src_factor
);
228 return lp_build_sub(bld
, dst
, result
);
230 result
= lp_build_mul(bld
, result
, dst_factor
);
231 return lp_build_sub(bld
, result
, src
);
236 if (bld
->type
.floating
&& factor_src
== factor_dst
) {
237 if (func
== PIPE_BLEND_ADD
||
238 func
== PIPE_BLEND_SUBTRACT
||
239 func
== PIPE_BLEND_REVERSE_SUBTRACT
) {
241 result
= lp_build_blend_func(bld
, func
, src
, dst
);
242 return lp_build_mul(bld
, result
, src_factor
);
250 if ((bld
->type
.norm
&& bld
->type
.sign
) &&
251 (is_inverse_factor(factor_src
) || is_inverse_factor(factor_dst
))) {
253 * With snorm blending, the inverse blend factors range from [0,2]
254 * instead of [-1,1], so the ordinary signed normalized arithmetic
255 * doesn't quite work. Unpack must be unsigned, and the add/sub
256 * must be done with wider type.
257 * (Note that it's not quite obvious what the blend equation wrt to
258 * clamping should actually be based on GL spec in this case, but
259 * really the incoming src values are clamped to [-1,1] (the dst is
260 * always clamped already), and then NO further clamping occurs until
263 struct lp_build_context bldw
;
264 struct lp_type wide_type
= lp_wider_type(bld
->type
);
265 LLVMValueRef src_terml
, src_termh
, dst_terml
, dst_termh
;
266 LLVMValueRef resl
, resh
;
269 * We don't need saturate math for the sub/add, since we have
270 * x+1 bit numbers in x*2 wide type (result is x+2 bits).
271 * (Doesn't really matter on x86 sse2 though as we use saturated
275 lp_build_context_init(&bldw
, bld
->gallivm
, wide_type
);
278 * XXX This is a bit hackish. Note that -128 really should
279 * be -1.0, the same as -127. However, we did not actually clamp
280 * things anywhere (relying on pack intrinsics instead) therefore
281 * we will get -128, and the inverted factor then 255. But the mul
282 * can overflow in this case (rather the rounding fixups for the mul,
283 * -128*255 will be positive).
284 * So we clamp the src and dst up here but only when necessary (we
285 * should do this before calculating blend factors but it's enough
286 * for avoiding overflow).
288 if (is_inverse_factor(factor_src
)) {
289 src
= lp_build_max(bld
, src
,
290 lp_build_const_vec(bld
->gallivm
, bld
->type
, -1.0));
292 if (is_inverse_factor(factor_dst
)) {
293 dst
= lp_build_max(bld
, dst
,
294 lp_build_const_vec(bld
->gallivm
, bld
->type
, -1.0));
297 lp_build_mul_norm_expand(bld
, src
, src_factor
, &src_terml
, &src_termh
,
298 is_inverse_factor(factor_src
) ? TRUE
: FALSE
);
299 lp_build_mul_norm_expand(bld
, dst
, dst_factor
, &dst_terml
, &dst_termh
,
300 is_inverse_factor(factor_dst
) ? TRUE
: FALSE
);
301 resl
= lp_build_blend_func(&bldw
, func
, src_terml
, dst_terml
);
302 resh
= lp_build_blend_func(&bldw
, func
, src_termh
, dst_termh
);
305 * XXX pack2_native is not ok because the values have to be in dst
306 * range. We need native pack though for the correct order on avx2.
307 * Will break on everything not implementing clamping pack intrinsics
308 * (i.e. everything but sse2 and altivec).
310 return lp_build_pack2_native(bld
->gallivm
, wide_type
, bld
->type
, resl
, resh
);
312 src_term
= lp_build_mul(bld
, src
, src_factor
);
313 dst_term
= lp_build_mul(bld
, dst
, dst_factor
);
314 return lp_build_blend_func(bld
, func
, src_term
, dst_term
);
319 lp_build_alpha_to_coverage(struct gallivm_state
*gallivm
,
321 struct lp_build_mask_context
*mask
,
325 struct lp_build_context bld
;
327 LLVMValueRef alpha_ref_value
;
329 lp_build_context_init(&bld
, gallivm
, type
);
331 alpha_ref_value
= lp_build_const_vec(gallivm
, type
, 0.5);
333 test
= lp_build_cmp(&bld
, PIPE_FUNC_GREATER
, alpha
, alpha_ref_value
);
335 lp_build_name(test
, "alpha_to_coverage");
337 lp_build_mask_update(mask
, test
);
340 lp_build_mask_check(mask
);