X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fllvmpipe%2Flp_bld_blend.c;h=02ec55eddbd39854b2e29c03bd1d4fa6cbb0a7a1;hb=01ab218bbc5c8058a99077a6bc3dc9884e9d218a;hp=e070aac378df334049fe4ec2565746ff95e32235;hpb=1dd7bb17c7331f9ecd0bc830b61ada235a56fe6d;p=mesa.git diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.c b/src/gallium/drivers/llvmpipe/lp_bld_blend.c index e070aac378d..02ec55eddbd 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2012 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,293 +25,317 @@ * **************************************************************************/ - -/** - * @file - * Blend LLVM IR generation. - * - * This code is generic -- it should be able to cope both with floating point - * and integer inputs in AOS form. - * - * @author Jose Fonseca - */ - - #include "pipe/p_state.h" +#include "util/u_debug.h" -#include "lp_bld.h" -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_arit.h" -#include "lp_bld_swizzle.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_swizzle.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_pack.h" +#include "lp_bld_blend.h" /** - * We may the same values several times, so we keep them here to avoid - * recomputing them. Also reusing the values allows us to do simplifications - * that LLVM optimization passes wouldn't normally be able to do. + * Is (a OP b) == (b OP a)? */ -struct lp_build_blend_context +boolean +lp_build_blend_func_commutative(unsigned func) { - struct lp_build_context base; - - LLVMValueRef src; - LLVMValueRef dst; - LLVMValueRef const_; - - LLVMValueRef inv_src; - LLVMValueRef inv_dst; - LLVMValueRef inv_const; - LLVMValueRef saturate; - - LLVMValueRef rgb_src_factor; - LLVMValueRef alpha_src_factor; - LLVMValueRef rgb_dst_factor; - LLVMValueRef alpha_dst_factor; -}; - - -static LLVMValueRef -lp_build_blend_factor_unswizzled(struct lp_build_blend_context *bld, - unsigned factor, - boolean alpha) -{ - switch (factor) { - case PIPE_BLENDFACTOR_ZERO: - return bld->base.zero; - case PIPE_BLENDFACTOR_ONE: - return bld->base.one; - case PIPE_BLENDFACTOR_SRC_COLOR: - case PIPE_BLENDFACTOR_SRC_ALPHA: - return bld->src; - case PIPE_BLENDFACTOR_DST_COLOR: - case PIPE_BLENDFACTOR_DST_ALPHA: - return bld->dst; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - if(alpha) - return bld->base.one; - else { - if(!bld->inv_dst) - bld->inv_dst = lp_build_comp(&bld->base, bld->dst); - if(!bld->saturate) - bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst); - return bld->saturate; - } - case PIPE_BLENDFACTOR_CONST_COLOR: - case PIPE_BLENDFACTOR_CONST_ALPHA: - return bld->const_; - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - /* TODO */ - assert(0); - return bld->base.zero; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - if(!bld->inv_src) - bld->inv_src = lp_build_comp(&bld->base, bld->src); - return bld->inv_src; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - if(!bld->inv_dst) - bld->inv_dst = lp_build_comp(&bld->base, bld->dst); - return bld->inv_dst; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - if(!bld->inv_const) - bld->inv_const = lp_build_comp(&bld->base, bld->const_); - return bld->inv_const; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - /* TODO */ - assert(0); - return bld->base.zero; + switch (func) { + case PIPE_BLEND_ADD: + case PIPE_BLEND_MIN: + case PIPE_BLEND_MAX: + return TRUE; + case PIPE_BLEND_SUBTRACT: + case PIPE_BLEND_REVERSE_SUBTRACT: + return FALSE; default: assert(0); - return bld->base.zero; + return TRUE; } } -enum lp_build_blend_swizzle { - LP_BUILD_BLEND_SWIZZLE_RGBA = 0, - LP_BUILD_BLEND_SWIZZLE_AAAA = 1, -}; +/** + * Whether the blending functions are the reverse of each other. + */ +boolean +lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func) +{ + if (rgb_func == alpha_func) + return FALSE; + if (rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT) + return TRUE; + if (rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT) + return TRUE; + return FALSE; +} /** - * How should we shuffle the base factor. + * Whether the blending factors are complementary of each other. */ -static enum lp_build_blend_swizzle -lp_build_blend_factor_swizzle(unsigned factor) +static inline boolean +lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor) { - switch (factor) { - case PIPE_BLENDFACTOR_ONE: - case PIPE_BLENDFACTOR_ZERO: - case PIPE_BLENDFACTOR_SRC_COLOR: - case PIPE_BLENDFACTOR_DST_COLOR: - case PIPE_BLENDFACTOR_CONST_COLOR: - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - case PIPE_BLENDFACTOR_INV_DST_COLOR: - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - return LP_BUILD_BLEND_SWIZZLE_RGBA; - case PIPE_BLENDFACTOR_SRC_ALPHA: - case PIPE_BLENDFACTOR_DST_ALPHA: - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_CONST_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - return LP_BUILD_BLEND_SWIZZLE_AAAA; - default: - assert(0); - return LP_BUILD_BLEND_SWIZZLE_RGBA; - } + STATIC_ASSERT((PIPE_BLENDFACTOR_ZERO ^ 0x10) == PIPE_BLENDFACTOR_ONE); + STATIC_ASSERT((PIPE_BLENDFACTOR_CONST_COLOR ^ 0x10) == + PIPE_BLENDFACTOR_INV_CONST_COLOR); + return dst_factor == (src_factor ^ 0x10); } -static LLVMValueRef -lp_build_blend_swizzle(struct lp_build_blend_context *bld, - LLVMValueRef rgb, - LLVMValueRef alpha, - enum lp_build_blend_swizzle rgb_swizzle, - unsigned alpha_swizzle) +/** + * Whether this is a inverse blend factor + */ +static inline boolean +is_inverse_factor(unsigned factor) { - if(rgb == alpha) { - if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) - return rgb; - if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) - return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle); - } - else { - if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) { - boolean cond[4] = {0, 0, 0, 0}; - cond[alpha_swizzle] = 1; - return lp_build_select_aos(&bld->base, alpha, rgb, cond); - } - if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) { - unsigned char swizzle[4]; - swizzle[0] = alpha_swizzle; - swizzle[1] = alpha_swizzle; - swizzle[2] = alpha_swizzle; - swizzle[3] = alpha_swizzle; - swizzle[alpha_swizzle] += 4; - return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle); - } - } - assert(0); - return bld->base.undef; + STATIC_ASSERT(PIPE_BLENDFACTOR_ZERO == 0x11); + return factor > 0x11; } /** - * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml + * Calculates the (expanded to wider type) multiplication + * of 2 normalized numbers. */ -static LLVMValueRef -lp_build_blend_factor(struct lp_build_blend_context *bld, - LLVMValueRef factor1, - unsigned rgb_factor, - unsigned alpha_factor, - unsigned alpha_swizzle) +static void +lp_build_mul_norm_expand(struct lp_build_context *bld, + LLVMValueRef a, LLVMValueRef b, + LLVMValueRef *resl, LLVMValueRef *resh, + boolean signedness_differs) { - LLVMValueRef rgb_factor_; - LLVMValueRef alpha_factor_; - LLVMValueRef factor2; - enum lp_build_blend_swizzle rgb_swizzle; - - rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE); - alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); + const struct lp_type type = bld->type; + struct lp_type wide_type = lp_wider_type(type); + struct lp_type wide_type2 = wide_type; + struct lp_type type2 = type; + LLVMValueRef al, ah, bl, bh; + + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + assert(!type.floating && !type.fixed && type.norm); + + if (a == bld->zero || b == bld->zero) { + LLVMValueRef zero = LLVMConstNull(lp_build_vec_type(bld->gallivm, wide_type)); + *resl = zero; + *resh = zero; + return; + } - rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor); + if (signedness_differs) { + type2.sign = !type.sign; + wide_type2.sign = !wide_type2.sign; + } - factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle); + lp_build_unpack2_native(bld->gallivm, type, wide_type, a, &al, &ah); + lp_build_unpack2_native(bld->gallivm, type2, wide_type2, b, &bl, &bh); - return lp_build_mul(&bld->base, factor1, factor2); + *resl = lp_build_mul_norm(bld->gallivm, wide_type, al, bl); + *resh = lp_build_mul_norm(bld->gallivm, wide_type, ah, bh); } /** * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml */ -static LLVMValueRef -lp_build_blend_func(struct lp_build_blend_context *bld, +LLVMValueRef +lp_build_blend_func(struct lp_build_context *bld, unsigned func, - LLVMValueRef term1, + LLVMValueRef term1, LLVMValueRef term2) { switch (func) { case PIPE_BLEND_ADD: - return lp_build_add(&bld->base, term1, term2); - break; + return lp_build_add(bld, term1, term2); case PIPE_BLEND_SUBTRACT: - return lp_build_sub(&bld->base, term1, term2); + return lp_build_sub(bld, term1, term2); case PIPE_BLEND_REVERSE_SUBTRACT: - return lp_build_sub(&bld->base, term2, term1); + return lp_build_sub(bld, term2, term1); case PIPE_BLEND_MIN: - return lp_build_min(&bld->base, term1, term2); + return lp_build_min(bld, term1, term2); case PIPE_BLEND_MAX: - return lp_build_max(&bld->base, term1, term2); + return lp_build_max(bld, term1, term2); default: assert(0); - return bld->base.zero; + return bld->zero; } } +/** + * Performs optimisations and blending independent of SoA/AoS + * + * @param func the blend function + * @param factor_src PIPE_BLENDFACTOR_xxx + * @param factor_dst PIPE_BLENDFACTOR_xxx + * @param src source rgba + * @param dst dest rgba + * @param src_factor src factor computed value + * @param dst_factor dst factor computed value + * @param not_alpha_dependent same factors accross all channels of src/dst + * + * not_alpha_dependent should be: + * SoA: always true as it is only one channel at a time + * AoS: rgb_src_factor == alpha_src_factor && rgb_dst_factor == alpha_dst_factor + * + * Note that pretty much every possible optimisation can only be done on non-unorm targets + * due to unorm values not going above 1.0 meaning factorisation can change results. + * e.g. (0.9 * 0.9) + (0.9 * 0.9) != 0.9 * (0.9 + 0.9) as result of + is always <= 1. + */ LLVMValueRef -lp_build_blend(LLVMBuilderRef builder, - const struct pipe_blend_state *blend, - union lp_type type, +lp_build_blend(struct lp_build_context *bld, + unsigned func, + unsigned factor_src, + unsigned factor_dst, LLVMValueRef src, LLVMValueRef dst, - LLVMValueRef const_, - unsigned alpha_swizzle) + LLVMValueRef src_factor, + LLVMValueRef dst_factor, + boolean not_alpha_dependent, + boolean optimise_only) { - struct lp_build_blend_context bld; - LLVMValueRef src_term; - LLVMValueRef dst_term; - - /* It makes no sense to blend unless values are normalized */ - assert(type.norm); - - /* Setup build context */ - memset(&bld, 0, sizeof bld); - bld.base.builder = builder; - bld.base.type = type; - bld.base.undef = lp_build_undef(type); - bld.base.zero = lp_build_zero(type); - bld.base.one = lp_build_one(type); - bld.src = src; - bld.dst = dst; - bld.const_ = const_; - - /* TODO: There are still a few optimization oportunities here. For certain - * combinations it is possible to reorder the operations and therefor saving - * some instructions. */ - - src_term = lp_build_blend_factor(&bld, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle); - dst_term = lp_build_blend_factor(&bld, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle); - -#ifdef DEBUG - LLVMSetValueName(src_term, "src_term"); - LLVMSetValueName(dst_term, "dst_term"); -#endif - - if(blend->rgb_func == blend->alpha_func) { - return lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term); - } - else { - /* Seperate RGB / A functions */ + LLVMValueRef result, src_term, dst_term; + + /* If we are not alpha dependent we can mess with the src/dst factors */ + if (not_alpha_dependent) { + if (lp_build_blend_factor_complementary(factor_src, factor_dst)) { + if (func == PIPE_BLEND_ADD) { + if (factor_src < factor_dst) { + return lp_build_lerp(bld, src_factor, dst, src, 0); + } else { + return lp_build_lerp(bld, dst_factor, src, dst, 0); + } + } else if (bld->type.floating && func == PIPE_BLEND_SUBTRACT) { + result = lp_build_add(bld, src, dst); + + if (factor_src < factor_dst) { + result = lp_build_mul(bld, result, src_factor); + return lp_build_sub(bld, result, dst); + } else { + result = lp_build_mul(bld, result, dst_factor); + return lp_build_sub(bld, src, result); + } + } else if (bld->type.floating && func == PIPE_BLEND_REVERSE_SUBTRACT) { + result = lp_build_add(bld, src, dst); + + if (factor_src < factor_dst) { + result = lp_build_mul(bld, result, src_factor); + return lp_build_sub(bld, dst, result); + } else { + result = lp_build_mul(bld, result, dst_factor); + return lp_build_sub(bld, result, src); + } + } + } - LLVMValueRef rgb; - LLVMValueRef alpha; + if (bld->type.floating && factor_src == factor_dst) { + if (func == PIPE_BLEND_ADD || + func == PIPE_BLEND_SUBTRACT || + func == PIPE_BLEND_REVERSE_SUBTRACT) { + LLVMValueRef result; + result = lp_build_blend_func(bld, func, src, dst); + return lp_build_mul(bld, result, src_factor); + } + } + } - rgb = lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term); - alpha = lp_build_blend_func(&bld, blend->alpha_func, src_term, dst_term); + if (optimise_only) + return NULL; + + if ((bld->type.norm && bld->type.sign) && + (is_inverse_factor(factor_src) || is_inverse_factor(factor_dst))) { + /* + * With snorm blending, the inverse blend factors range from [0,2] + * instead of [-1,1], so the ordinary signed normalized arithmetic + * doesn't quite work. Unpack must be unsigned, and the add/sub + * must be done with wider type. + * (Note that it's not quite obvious what the blend equation wrt to + * clamping should actually be based on GL spec in this case, but + * really the incoming src values are clamped to [-1,1] (the dst is + * always clamped already), and then NO further clamping occurs until + * the end.) + */ + struct lp_build_context bldw; + struct lp_type wide_type = lp_wider_type(bld->type); + LLVMValueRef src_terml, src_termh, dst_terml, dst_termh; + LLVMValueRef resl, resh; + + /* + * We don't need saturate math for the sub/add, since we have + * x+1 bit numbers in x*2 wide type (result is x+2 bits). + * (Doesn't really matter on x86 sse2 though as we use saturated + * intrinsics.) + */ + wide_type.norm = 0; + lp_build_context_init(&bldw, bld->gallivm, wide_type); + + /* + * XXX This is a bit hackish. Note that -128 really should + * be -1.0, the same as -127. However, we did not actually clamp + * things anywhere (relying on pack intrinsics instead) therefore + * we will get -128, and the inverted factor then 255. But the mul + * can overflow in this case (rather the rounding fixups for the mul, + * -128*255 will be positive). + * So we clamp the src and dst up here but only when necessary (we + * should do this before calculating blend factors but it's enough + * for avoiding overflow). + */ + if (is_inverse_factor(factor_src)) { + src = lp_build_max(bld, src, + lp_build_const_vec(bld->gallivm, bld->type, -1.0)); + } + if (is_inverse_factor(factor_dst)) { + dst = lp_build_max(bld, dst, + lp_build_const_vec(bld->gallivm, bld->type, -1.0)); + } - return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); + lp_build_mul_norm_expand(bld, src, src_factor, &src_terml, &src_termh, + is_inverse_factor(factor_src) ? TRUE : FALSE); + lp_build_mul_norm_expand(bld, dst, dst_factor, &dst_terml, &dst_termh, + is_inverse_factor(factor_dst) ? TRUE : FALSE); + resl = lp_build_blend_func(&bldw, func, src_terml, dst_terml); + resh = lp_build_blend_func(&bldw, func, src_termh, dst_termh); + + /* + * XXX pack2_native is not ok because the values have to be in dst + * range. We need native pack though for the correct order on avx2. + * Will break on everything not implementing clamping pack intrinsics + * (i.e. everything but sse2 and altivec). + */ + return lp_build_pack2_native(bld->gallivm, wide_type, bld->type, resl, resh); + } else { + src_term = lp_build_mul(bld, src, src_factor); + dst_term = lp_build_mul(bld, dst, dst_factor); + return lp_build_blend_func(bld, func, src_term, dst_term); } } + +void +lp_build_alpha_to_coverage(struct gallivm_state *gallivm, + struct lp_type type, + struct lp_build_mask_context *mask, + LLVMValueRef alpha, + boolean do_branch) +{ + struct lp_build_context bld; + LLVMValueRef test; + LLVMValueRef alpha_ref_value; + + lp_build_context_init(&bld, gallivm, type); + + alpha_ref_value = lp_build_const_vec(gallivm, type, 0.5); + + test = lp_build_cmp(&bld, PIPE_FUNC_GREATER, alpha, alpha_ref_value); + + lp_build_name(test, "alpha_to_coverage"); + + lp_build_mask_update(mask, test); + + if (do_branch) + lp_build_mask_check(mask); +}