/**************************************************************************
*
- * Copyright 2009 VMware, Inc.
+ * Copyright 2012 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
*
**************************************************************************/
-
-/**
- * @file
- * Blend LLVM IR generation.
- *
- * This code is generic -- it should be able to cope both with floating point
- * and integer inputs in AOS form.
- *
- * @author Jose Fonseca <jfonseca@vmware.com>
- */
-
-
#include "pipe/p_state.h"
+#include "util/u_debug.h"
-#include "lp_bld.h"
-#include "lp_bld_arit.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_arit.h"
+#include "lp_bld_blend.h"
/**
- * We may the same values several times, so we keep them here to avoid
- * recomputing them. Also reusing the values allows us to do simplifications
- * that LLVM optimization passes wouldn't normally be able to do.
+ * Is (a OP b) == (b OP a)?
*/
-struct lp_build_blend_values
+boolean
+lp_build_blend_func_commutative(unsigned func)
{
- LLVMBuilderRef builder;
-
- LLVMValueRef undef;
- LLVMValueRef zero;
- LLVMValueRef one;
-
- LLVMValueRef src;
- LLVMValueRef dst;
- LLVMValueRef const_;
-
- LLVMValueRef inv_src;
- LLVMValueRef inv_dst;
- LLVMValueRef inv_const;
- LLVMValueRef saturate;
-
- LLVMValueRef rgb_src_factor;
- LLVMValueRef alpha_src_factor;
- LLVMValueRef rgb_dst_factor;
- LLVMValueRef alpha_dst_factor;
-};
-
-
-static LLVMValueRef
-lp_build_blend_factor_unswizzled(struct lp_build_blend_values *values,
- unsigned factor,
- boolean alpha)
-{
- switch (factor) {
- case PIPE_BLENDFACTOR_ZERO:
- return values->zero;
- case PIPE_BLENDFACTOR_ONE:
- return values->one;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- return values->src;
- case PIPE_BLENDFACTOR_DST_COLOR:
- case PIPE_BLENDFACTOR_DST_ALPHA:
- return values->dst;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- if(alpha)
- return values->one;
- else {
- if(!values->inv_dst)
- values->inv_dst = lp_build_sub(values->builder, values->one, values->dst, values->zero);
- if(!values->saturate)
- values->saturate = lp_build_min_sat(values->builder, values->src, values->inv_dst, values->zero, values->one);
- return values->saturate;
- }
- case PIPE_BLENDFACTOR_CONST_COLOR:
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- return values->const_;
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- /* TODO */
- assert(0);
- return values->zero;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- if(!values->inv_src)
- values->inv_src = lp_build_sub(values->builder, values->one, values->src, values->zero);
- return values->inv_src;
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- if(!values->inv_dst)
- values->inv_dst = lp_build_sub(values->builder, values->one, values->dst, values->zero);
- return values->inv_dst;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- if(!values->inv_const)
- values->inv_const = lp_build_sub(values->builder, values->one, values->const_, values->zero);
- return values->inv_const;
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- /* TODO */
- assert(0);
- return values->zero;
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ case PIPE_BLEND_MIN:
+ case PIPE_BLEND_MAX:
+ return TRUE;
+ case PIPE_BLEND_SUBTRACT:
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return FALSE;
default:
assert(0);
- return values->zero;
+ return TRUE;
}
}
-enum lp_build_blend_swizzle {
- LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
- LP_BUILD_BLEND_SWIZZLE_AAAA = 1,
-};
-
-
/**
- * How should we shuffle the base factor.
+ * Whether the blending functions are the reverse of each other.
*/
-static enum lp_build_blend_swizzle
-lp_build_blend_factor_swizzle(unsigned factor)
+boolean
+lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func)
{
- switch (factor) {
- case PIPE_BLENDFACTOR_ONE:
- case PIPE_BLENDFACTOR_ZERO:
- case PIPE_BLENDFACTOR_SRC_COLOR:
- case PIPE_BLENDFACTOR_DST_COLOR:
- case PIPE_BLENDFACTOR_CONST_COLOR:
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- return LP_BUILD_BLEND_SWIZZLE_RGBA;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- case PIPE_BLENDFACTOR_DST_ALPHA:
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- return LP_BUILD_BLEND_SWIZZLE_AAAA;
- default:
- assert(0);
- return LP_BUILD_BLEND_SWIZZLE_RGBA;
- }
-}
-
-
-static LLVMValueRef
-lp_build_blend_swizzle(struct lp_build_blend_values *values,
- LLVMValueRef rgb,
- LLVMValueRef alpha,
- enum lp_build_blend_swizzle rgb_swizzle,
- unsigned alpha_swizzle,
- unsigned n)
-{
- LLVMValueRef swizzles[LP_MAX_VECTOR_SIZE];
- unsigned i, j;
-
- if(rgb == alpha) {
- if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
- return rgb;
-
- alpha = values->undef;
- }
-
- for(j = 0; j < n; j += 4) {
- for(i = 0; i < 4; ++i) {
- unsigned swizzle;
-
- if(i == alpha_swizzle && alpha != values->undef) {
- /* Take the alpha from the second shuffle argument */
- swizzle = n + j + alpha_swizzle;
- }
- else if (rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
- /* Take the alpha from the first shuffle argument */
- swizzle = j + alpha_swizzle;
- }
- else {
- swizzle = j + i;
- }
-
- swizzles[j + i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
- }
- }
-
- return LLVMBuildShuffleVector(values->builder, rgb, alpha, LLVMConstVector(swizzles, n), "");
+ if(rgb_func == alpha_func)
+ return FALSE;
+ if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT)
+ return TRUE;
+ if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT)
+ return TRUE;
+ return FALSE;
}
-static LLVMValueRef
-lp_build_blend_factor(struct lp_build_blend_values *values,
- LLVMValueRef factor1,
- unsigned rgb_factor,
- unsigned alpha_factor,
- unsigned alpha_swizzle,
- unsigned n)
+/**
+ * Whether the blending factors are complementary of each other.
+ */
+static INLINE boolean
+lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor)
{
- LLVMValueRef rgb_factor_;
- LLVMValueRef alpha_factor_;
- LLVMValueRef factor2;
- enum lp_build_blend_swizzle rgb_swizzle;
-
- rgb_factor_ = lp_build_blend_factor_unswizzled(values, rgb_factor, FALSE);
- alpha_factor_ = lp_build_blend_factor_unswizzled(values, alpha_factor, TRUE);
-
- rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
-
- factor2 = lp_build_blend_swizzle(values, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, n);
-
- return lp_build_mul(values->builder, factor1, factor2, values->zero, values->one);
+ return dst_factor == (src_factor ^ 0x10);
}
-static LLVMValueRef
-lp_build_blend_func(struct lp_build_blend_values *values,
+/**
+ * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml
+ */
+LLVMValueRef
+lp_build_blend_func(struct lp_build_context *bld,
unsigned func,
- LLVMValueRef term1,
+ LLVMValueRef term1,
LLVMValueRef term2)
{
switch (func) {
case PIPE_BLEND_ADD:
- return lp_build_add_sat(values->builder, term1, term2, values->zero, values->one);
- break;
+ return lp_build_add(bld, term1, term2);
case PIPE_BLEND_SUBTRACT:
- return lp_build_sub_sat(values->builder, term1, term2, values->zero, values->one);
+ return lp_build_sub(bld, term1, term2);
case PIPE_BLEND_REVERSE_SUBTRACT:
- return lp_build_sub_sat(values->builder, term2, term1, values->zero, values->one);
+ return lp_build_sub(bld, term2, term1);
case PIPE_BLEND_MIN:
- return lp_build_min_sat(values->builder, term1, term2, values->zero, values->one);
+ return lp_build_min(bld, term1, term2);
case PIPE_BLEND_MAX:
- return lp_build_max_sat(values->builder, term1, term2, values->zero, values->one);
+ return lp_build_max(bld, term1, term2);
default:
assert(0);
- return values->zero;
+ return bld->zero;
}
}
+/**
+ * Performs optimisations and blending independent of SoA/AoS
+ *
+ * @param func the blend function
+ * @param factor_src PIPE_BLENDFACTOR_xxx
+ * @param factor_dst PIPE_BLENDFACTOR_xxx
+ * @param src source rgba
+ * @param dst dest rgba
+ * @param src_factor src factor computed value
+ * @param dst_factor dst factor computed value
+ * @param not_alpha_dependent same factors accross all channels of src/dst
+ *
+ * not_alpha_dependent should be:
+ * SoA: always true as it is only one channel at a time
+ * AoS: rgb_src_factor == alpha_src_factor && rgb_dst_factor == alpha_dst_factor
+ *
+ * Note that pretty much every possible optimisation can only be done on non-unorm targets
+ * due to unorm values not going above 1.0 meaning factorisation can change results.
+ * e.g. (0.9 * 0.9) + (0.9 * 0.9) != 0.9 * (0.9 + 0.9) as result of + is always <= 1.
+ */
LLVMValueRef
-lp_build_blend(LLVMBuilderRef builder,
- const struct pipe_blend_state *blend,
+lp_build_blend(struct lp_build_context *bld,
+ unsigned func,
+ unsigned factor_src,
+ unsigned factor_dst,
LLVMValueRef src,
LLVMValueRef dst,
- LLVMValueRef const_,
- unsigned alpha_swizzle)
+ LLVMValueRef src_factor,
+ LLVMValueRef dst_factor,
+ boolean not_alpha_dependent,
+ boolean optimise_only)
{
- struct lp_build_blend_values values;
- LLVMValueRef src_term;
- LLVMValueRef dst_term;
- LLVMTypeRef type;
- unsigned n;
-
- type = LLVMTypeOf(src);
- n = LLVMGetVectorSize(type);
-
- /*
- * Compute constants
- */
- memset(&values, 0, sizeof values);
- values.builder = builder;
- values.undef = LLVMGetUndef(type);
- values.zero = LLVMConstNull(type);
- values.one = lp_build_const_aos(type, 1.0, 1.0, 1.0, 1.0, NULL);
-
- values.src = src;
- values.dst = dst;
- values.const_ = const_;
-
- /* TODO: There are still a few optimization oportunities here. For certain
- * combinations it is possible to reorder the operations and therefor saving
- * some instructions. */
-
- src_term = lp_build_blend_factor(&values, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle, n);
- dst_term = lp_build_blend_factor(&values, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle, n);
+ LLVMValueRef result, src_term, dst_term;
+
+ /* If we are not alpha dependent we can mess with the src/dst factors */
+ if (not_alpha_dependent) {
+ if (lp_build_blend_factor_complementary(factor_src, factor_dst)) {
+ if (func == PIPE_BLEND_ADD) {
+ if (factor_src < factor_dst) {
+ return lp_build_lerp(bld, src_factor, dst, src, 0);
+ } else {
+ return lp_build_lerp(bld, dst_factor, src, dst, 0);
+ }
+ } else if(bld->type.floating && func == PIPE_BLEND_SUBTRACT) {
+ result = lp_build_add(bld, src, dst);
+
+ if (factor_src < factor_dst) {
+ result = lp_build_mul(bld, result, src_factor);
+ return lp_build_sub(bld, result, dst);
+ } else {
+ result = lp_build_mul(bld, result, dst_factor);
+ return lp_build_sub(bld, src, result);
+ }
+ } else if(bld->type.floating && func == PIPE_BLEND_REVERSE_SUBTRACT) {
+ result = lp_build_add(bld, src, dst);
+
+ if (factor_src < factor_dst) {
+ result = lp_build_mul(bld, result, src_factor);
+ return lp_build_sub(bld, dst, result);
+ } else {
+ result = lp_build_mul(bld, result, dst_factor);
+ return lp_build_sub(bld, result, src);
+ }
+ }
+ }
- if(blend->rgb_func == blend->alpha_func) {
- return lp_build_blend_func(&values, blend->rgb_func, src_term, dst_term);
+ if (bld->type.floating && factor_src == factor_dst) {
+ if (func == PIPE_BLEND_ADD ||
+ func == PIPE_BLEND_SUBTRACT ||
+ func == PIPE_BLEND_REVERSE_SUBTRACT) {
+ LLVMValueRef result;
+ result = lp_build_blend_func(bld, func, src, dst);
+ return lp_build_mul(bld, result, src_factor);
+ }
+ }
}
- else {
- /* Seperate RGB / A functions */
- LLVMValueRef rgb;
- LLVMValueRef alpha;
+ if (optimise_only)
+ return NULL;
- rgb = lp_build_blend_func(&values, blend->rgb_func, src_term, dst_term);
- alpha = lp_build_blend_func(&values, blend->alpha_func, src_term, dst_term);
-
- return lp_build_blend_swizzle(&values, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle, n);
- }
+ src_term = lp_build_mul(bld, src, src_factor);
+ dst_term = lp_build_mul(bld, dst, dst_factor);
+ return lp_build_blend_func(bld, func, src_term, dst_term);
}