* Blending in SoA is much faster than AoS, especially when separate rgb/alpha
* factors/functions are used, since no channel masking/shuffling is necessary
* and we can achieve the full throughput of the SIMD operations. Furthermore
- * the fragment shader output is also in SoA, so it fits nicely with the rest of
- * the fragment pipeline.
+ * the fragment shader output is also in SoA, so it fits nicely with the rest
+ * of the fragment pipeline.
*
* The drawback is that to be displayed the color buffer needs to be in AoS
* layout, so we need to tile/untile the color buffer before/after rendering.
#include "pipe/p_state.h"
+#include "util/u_debug.h"
-#include "lp_bld_type.h"
-#include "lp_bld_arit.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_arit.h"
#include "lp_bld_blend.h"
/**
- * We may the same values several times, so we keep them here to avoid
+ * We may use the same values several times, so we keep them here to avoid
* recomputing them. Also reusing the values allows us to do simplifications
* that LLVM optimization passes wouldn't normally be able to do.
*/
/**
* We store all factors in a table in order to eliminate redundant
* multiplications later.
+ * Indexes are: factor[src,dst][color,term][r,g,b,a]
*/
LLVMValueRef factor[2][2][4];
/**
* Table with all terms.
+ * Indexes are: term[src,dst][r,g,b,a]
*/
LLVMValueRef term[2][4];
};
+/**
+ * Build a single SOA blend factor for a color channel.
+ * \param i the color channel in [0,3]
+ */
static LLVMValueRef
lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld,
unsigned factor, unsigned i)
/**
* Generate blend code in SOA mode.
+ * \param rt render target index (to index the blend / colormask state)
* \param src src/fragment color
* \param dst dst/framebuffer color
* \param con constant blend color
lp_build_blend_soa(LLVMBuilderRef builder,
const struct pipe_blend_state *blend,
struct lp_type type,
+ unsigned rt,
LLVMValueRef src[4],
LLVMValueRef dst[4],
LLVMValueRef con[4],
struct lp_build_blend_soa_context bld;
unsigned i, j, k;
+ assert(rt < PIPE_MAX_COLOR_BUFS);
+
/* Setup build context */
memset(&bld, 0, sizeof bld);
lp_build_context_init(&bld.base, builder, type);
}
for (i = 0; i < 4; ++i) {
- if (blend->rt[0].colormask & (1 << i)) {
+ /* only compute blending for the color channels enabled for writing */
+ if (blend->rt[rt].colormask & (1 << i)) {
if (blend->logicop_enable) {
if(!type.floating) {
res[i] = lp_build_logicop(builder, blend->logicop_func, src[i], dst[i]);
else
res[i] = dst[i];
}
- else if (blend->rt[0].blend_enable) {
- unsigned src_factor = i < 3 ? blend->rt[0].rgb_src_factor : blend->rt[0].alpha_src_factor;
- unsigned dst_factor = i < 3 ? blend->rt[0].rgb_dst_factor : blend->rt[0].alpha_dst_factor;
- unsigned func = i < 3 ? blend->rt[0].rgb_func : blend->rt[0].alpha_func;
+ else if (blend->rt[rt].blend_enable) {
+ unsigned src_factor = i < 3 ? blend->rt[rt].rgb_src_factor : blend->rt[rt].alpha_src_factor;
+ unsigned dst_factor = i < 3 ? blend->rt[rt].rgb_dst_factor : blend->rt[rt].alpha_dst_factor;
+ unsigned func = i < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func;
boolean func_commutative = lp_build_blend_func_commutative(func);
/* It makes no sense to blend unless values are normalized */
bld.term[k][i] = bld.term[k][j];
else
bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], bld.factor[k][1][i]);
+
+ if (src_factor == PIPE_BLENDFACTOR_ZERO &&
+ (dst_factor == PIPE_BLENDFACTOR_DST_ALPHA ||
+ dst_factor == PIPE_BLENDFACTOR_INV_DST_ALPHA)) {
+ /* XXX special case these combos to work around an apparent
+ * bug in LLVM.
+ * This hack disables the check for multiplication by zero
+ * in lp_bld_mul(). When we optimize away the
+ * multiplication, something goes wrong during code
+ * generation and we segfault at runtime.
+ */
+ LLVMValueRef zeroSave = bld.base.zero;
+ bld.base.zero = NULL;
+ bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i],
+ bld.factor[k][1][i]);
+ bld.base.zero = zeroSave;
+ }
}
/*
/* See if this function has been previously applied */
for(j = 0; j < i; ++j) {
- unsigned prev_func = j < 3 ? blend->rt[0].rgb_func : blend->rt[0].alpha_func;
+ unsigned prev_func = j < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func;
unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func);
if((!func_reverse &&