/**************************************************************************
*
- * Copyright 2009 VMware, Inc.
+ * Copyright 2012 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
*
**************************************************************************/
-
-/**
- * @file
- * Blend LLVM IR generation.
- *
- * This code is generic -- it should be able to cope both with floating point
- * and integer inputs in AOS form.
- *
- * @author Jose Fonseca <jfonseca@vmware.com>
- */
-
-
#include "pipe/p_state.h"
+#include "util/u_debug.h"
-#include "lp_bld.h"
-#include "lp_bld_arit.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_logic.h"
+#include "gallivm/lp_bld_swizzle.h"
+#include "gallivm/lp_bld_flow.h"
+#include "gallivm/lp_bld_debug.h"
+#include "lp_bld_blend.h"
/**
- * We may the same bld several times, so we keep them here to avoid
- * recomputing them. Also reusing the bld allows us to do simplifications
- * that LLVM optimization passes wouldn't normally be able to do.
+ * Is (a OP b) == (b OP a)?
*/
-struct lp_build_blend_context
+boolean
+lp_build_blend_func_commutative(unsigned func)
{
- struct lp_build_context base;
-
- LLVMValueRef src;
- LLVMValueRef dst;
- LLVMValueRef const_;
-
- LLVMValueRef inv_src;
- LLVMValueRef inv_dst;
- LLVMValueRef inv_const;
- LLVMValueRef saturate;
-
- LLVMValueRef rgb_src_factor;
- LLVMValueRef alpha_src_factor;
- LLVMValueRef rgb_dst_factor;
- LLVMValueRef alpha_dst_factor;
-};
-
-
-static LLVMValueRef
-lp_build_blend_factor_unswizzled(struct lp_build_blend_context *bld,
- unsigned factor,
- boolean alpha)
-{
- switch (factor) {
- case PIPE_BLENDFACTOR_ZERO:
- return bld->base.zero;
- case PIPE_BLENDFACTOR_ONE:
- return bld->base.one;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- return bld->src;
- case PIPE_BLENDFACTOR_DST_COLOR:
- case PIPE_BLENDFACTOR_DST_ALPHA:
- return bld->dst;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- if(alpha)
- return bld->base.one;
- else {
- if(!bld->inv_dst)
- bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
- if(!bld->saturate)
- bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst);
- return bld->saturate;
- }
- case PIPE_BLENDFACTOR_CONST_COLOR:
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- return bld->const_;
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- /* TODO */
- assert(0);
- return bld->base.zero;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- if(!bld->inv_src)
- bld->inv_src = lp_build_comp(&bld->base, bld->src);
- return bld->inv_src;
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- if(!bld->inv_dst)
- bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
- return bld->inv_dst;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- if(!bld->inv_const)
- bld->inv_const = lp_build_comp(&bld->base, bld->const_);
- return bld->inv_const;
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- /* TODO */
- assert(0);
- return bld->base.zero;
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ case PIPE_BLEND_MIN:
+ case PIPE_BLEND_MAX:
+ return TRUE;
+ case PIPE_BLEND_SUBTRACT:
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return FALSE;
default:
assert(0);
- return bld->base.zero;
+ return TRUE;
}
}
-enum lp_build_blend_swizzle {
- LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
- LP_BUILD_BLEND_SWIZZLE_AAAA = 1,
-};
-
-
/**
- * How should we shuffle the base factor.
+ * Whether the blending functions are the reverse of each other.
*/
-static enum lp_build_blend_swizzle
-lp_build_blend_factor_swizzle(unsigned factor)
+boolean
+lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func)
{
- switch (factor) {
- case PIPE_BLENDFACTOR_ONE:
- case PIPE_BLENDFACTOR_ZERO:
- case PIPE_BLENDFACTOR_SRC_COLOR:
- case PIPE_BLENDFACTOR_DST_COLOR:
- case PIPE_BLENDFACTOR_CONST_COLOR:
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- return LP_BUILD_BLEND_SWIZZLE_RGBA;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- case PIPE_BLENDFACTOR_DST_ALPHA:
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- return LP_BUILD_BLEND_SWIZZLE_AAAA;
- default:
- assert(0);
- return LP_BUILD_BLEND_SWIZZLE_RGBA;
- }
+ if(rgb_func == alpha_func)
+ return FALSE;
+ if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT)
+ return TRUE;
+ if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT)
+ return TRUE;
+ return FALSE;
}
-static LLVMValueRef
-lp_build_blend_swizzle(struct lp_build_blend_context *bld,
- LLVMValueRef rgb,
- LLVMValueRef alpha,
- enum lp_build_blend_swizzle rgb_swizzle,
- unsigned alpha_swizzle)
-{
- const unsigned n = bld->base.type.length;
- LLVMValueRef swizzles[LP_MAX_VECTOR_LENGTH];
- unsigned i, j;
-
- if(rgb == alpha) {
- if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
- return rgb;
-
- alpha = bld->base.undef;
- }
-
- if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA &&
- !bld->base.type.floating) {
-#if 0
- /* Use a select */
- /* FIXME: Unfortunetaly select of vectors do not work */
-
- for(j = 0; j < n; j += 4)
- for(i = 0; i < 4; ++i)
- swizzles[j + i] = LLVMConstInt(LLVMInt1Type(), i == alpha_swizzle ? 0 : 1, 0);
-
- return LLVMBuildSelect(bld->base.builder, LLVMConstVector(swizzles, n), rgb, alpha, "");
-#else
- /* XXX: Use a bitmask, as byte shuffles often end up being translated
- * into many PEXTRB. Ideally LLVM X86 code generation should pick this
- * automatically for us. */
-
- for(j = 0; j < n; j += 4)
- for(i = 0; i < 4; ++i)
- swizzles[j + i] = LLVMConstInt(LLVMIntType(bld->base.type.width), i == alpha_swizzle ? 0 : ~0, 0);
-
- /* TODO: Unfortunately constant propagation prevents from using PANDN. And
- * on SSE4 we have even better -- PBLENDVB */
- return LLVMBuildOr(bld->base.builder,
- LLVMBuildAnd(bld->base.builder, rgb, LLVMConstVector(swizzles, n), ""),
- LLVMBuildAnd(bld->base.builder, alpha, LLVMBuildNot(bld->base.builder, LLVMConstVector(swizzles, n), ""), ""),
- "");
-#endif
- }
-
- for(j = 0; j < n; j += 4) {
- for(i = 0; i < 4; ++i) {
- unsigned swizzle;
-
- if(i == alpha_swizzle && alpha != bld->base.undef) {
- /* Take the alpha from the second shuffle argument */
- swizzle = n + j + alpha_swizzle;
- }
- else if (rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
- /* Take the alpha from the first shuffle argument */
- swizzle = j + alpha_swizzle;
- }
- else {
- swizzle = j + i;
- }
-
- swizzles[j + i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
- }
- }
-
- return LLVMBuildShuffleVector(bld->base.builder, rgb, alpha, LLVMConstVector(swizzles, n), "");
-}
-
-
-static LLVMValueRef
-lp_build_blend_factor(struct lp_build_blend_context *bld,
- LLVMValueRef factor1,
- unsigned rgb_factor,
- unsigned alpha_factor,
- unsigned alpha_swizzle)
+/**
+ * Whether the blending factors are complementary of each other.
+ */
+static INLINE boolean
+lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor)
{
- LLVMValueRef rgb_factor_;
- LLVMValueRef alpha_factor_;
- LLVMValueRef factor2;
- enum lp_build_blend_swizzle rgb_swizzle;
-
- rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE);
- alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
-
- rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
-
- factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle);
-
- return lp_build_mul(&bld->base, factor1, factor2);
+ return dst_factor == (src_factor ^ 0x10);
}
-static LLVMValueRef
-lp_build_blend_func(struct lp_build_blend_context *bld,
+/**
+ * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml
+ */
+LLVMValueRef
+lp_build_blend_func(struct lp_build_context *bld,
unsigned func,
- LLVMValueRef term1,
+ LLVMValueRef term1,
LLVMValueRef term2)
{
switch (func) {
case PIPE_BLEND_ADD:
- return lp_build_add(&bld->base, term1, term2);
- break;
+ return lp_build_add(bld, term1, term2);
case PIPE_BLEND_SUBTRACT:
- return lp_build_sub(&bld->base, term1, term2);
+ return lp_build_sub(bld, term1, term2);
case PIPE_BLEND_REVERSE_SUBTRACT:
- return lp_build_sub(&bld->base, term2, term1);
+ return lp_build_sub(bld, term2, term1);
case PIPE_BLEND_MIN:
- return lp_build_min(&bld->base, term1, term2);
+ return lp_build_min(bld, term1, term2);
case PIPE_BLEND_MAX:
- return lp_build_max(&bld->base, term1, term2);
+ return lp_build_max(bld, term1, term2);
default:
assert(0);
- return bld->base.zero;
+ return bld->zero;
}
}
+/**
+ * Performs optimisations and blending independent of SoA/AoS
+ *
+ * @param func the blend function
+ * @param factor_src PIPE_BLENDFACTOR_xxx
+ * @param factor_dst PIPE_BLENDFACTOR_xxx
+ * @param src source rgba
+ * @param dst dest rgba
+ * @param src_factor src factor computed value
+ * @param dst_factor dst factor computed value
+ * @param not_alpha_dependent same factors accross all channels of src/dst
+ *
+ * not_alpha_dependent should be:
+ * SoA: always true as it is only one channel at a time
+ * AoS: rgb_src_factor == alpha_src_factor && rgb_dst_factor == alpha_dst_factor
+ *
+ * Note that pretty much every possible optimisation can only be done on non-unorm targets
+ * due to unorm values not going above 1.0 meaning factorisation can change results.
+ * e.g. (0.9 * 0.9) + (0.9 * 0.9) != 0.9 * (0.9 + 0.9) as result of + is always <= 1.
+ */
LLVMValueRef
-lp_build_blend(LLVMBuilderRef builder,
- const struct pipe_blend_state *blend,
- union lp_type type,
+lp_build_blend(struct lp_build_context *bld,
+ unsigned func,
+ unsigned factor_src,
+ unsigned factor_dst,
LLVMValueRef src,
LLVMValueRef dst,
- LLVMValueRef const_,
- unsigned alpha_swizzle)
+ LLVMValueRef src_factor,
+ LLVMValueRef dst_factor,
+ boolean not_alpha_dependent,
+ boolean optimise_only)
{
- struct lp_build_blend_context bld;
- LLVMValueRef src_term;
- LLVMValueRef dst_term;
+ LLVMValueRef result, src_term, dst_term;
+
+ /* If we are not alpha dependent we can mess with the src/dst factors */
+ if (not_alpha_dependent) {
+ if (lp_build_blend_factor_complementary(factor_src, factor_dst)) {
+ if (func == PIPE_BLEND_ADD) {
+ if (factor_src < factor_dst) {
+ return lp_build_lerp(bld, src_factor, dst, src, 0);
+ } else {
+ return lp_build_lerp(bld, dst_factor, src, dst, 0);
+ }
+ } else if(bld->type.floating && func == PIPE_BLEND_SUBTRACT) {
+ result = lp_build_add(bld, src, dst);
+
+ if (factor_src < factor_dst) {
+ result = lp_build_mul(bld, result, src_factor);
+ return lp_build_sub(bld, result, dst);
+ } else {
+ result = lp_build_mul(bld, result, dst_factor);
+ return lp_build_sub(bld, src, result);
+ }
+ } else if(bld->type.floating && func == PIPE_BLEND_REVERSE_SUBTRACT) {
+ result = lp_build_add(bld, src, dst);
+
+ if (factor_src < factor_dst) {
+ result = lp_build_mul(bld, result, src_factor);
+ return lp_build_sub(bld, dst, result);
+ } else {
+ result = lp_build_mul(bld, result, dst_factor);
+ return lp_build_sub(bld, result, src);
+ }
+ }
+ }
- /* It makes no sense to blend unless values are normalized */
- assert(type.norm);
+ if (bld->type.floating && factor_src == factor_dst) {
+ if (func == PIPE_BLEND_ADD ||
+ func == PIPE_BLEND_SUBTRACT ||
+ func == PIPE_BLEND_REVERSE_SUBTRACT) {
+ LLVMValueRef result;
+ result = lp_build_blend_func(bld, func, src, dst);
+ return lp_build_mul(bld, result, src_factor);
+ }
+ }
+ }
- /* Setup build context */
- memset(&bld, 0, sizeof bld);
- bld.base.builder = builder;
- bld.base.type = type;
- bld.base.undef = lp_build_undef(type);
- bld.base.zero = lp_build_zero(type);
- bld.base.one = lp_build_one(type);
- bld.src = src;
- bld.dst = dst;
- bld.const_ = const_;
+ if (optimise_only)
+ return NULL;
- /* TODO: There are still a few optimization oportunities here. For certain
- * combinations it is possible to reorder the operations and therefor saving
- * some instructions. */
+ src_term = lp_build_mul(bld, src, src_factor);
+ dst_term = lp_build_mul(bld, dst, dst_factor);
+ return lp_build_blend_func(bld, func, src_term, dst_term);
+}
- src_term = lp_build_blend_factor(&bld, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle);
- dst_term = lp_build_blend_factor(&bld, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle);
+void
+lp_build_alpha_to_coverage(struct gallivm_state *gallivm,
+ struct lp_type type,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef alpha,
+ boolean do_branch)
+{
+ struct lp_build_context bld;
+ LLVMValueRef test;
+ LLVMValueRef alpha_ref_value;
- if(blend->rgb_func == blend->alpha_func) {
- return lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term);
- }
- else {
- /* Seperate RGB / A functions */
+ lp_build_context_init(&bld, gallivm, type);
- LLVMValueRef rgb;
- LLVMValueRef alpha;
+ alpha_ref_value = lp_build_const_vec(gallivm, type, 0.5);
- rgb = lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term);
- alpha = lp_build_blend_func(&bld, blend->alpha_func, src_term, dst_term);
+ test = lp_build_cmp(&bld, PIPE_FUNC_GREATER, alpha, alpha_ref_value);
- return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle);
- }
+ lp_build_name(test, "alpha_to_coverage");
+
+ lp_build_mask_update(mask, test);
+
+ if (do_branch)
+ lp_build_mask_check(mask);
}