X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fgallium%2Fdrivers%2Fllvmpipe%2Flp_bld_blend_soa.c;h=4d5bc9642d928cc328e35d883bbc8dd1208e3f48;hb=9344ab382a1765c1a7c2560e771485edf4954fe2;hp=9511299d558daea791b6c04e130fe5bd8bd8505e;hpb=2a661c383fee65bc4413541e706925fa3e9b9cf5;p=mesa.git diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c index 9511299d558..4d5bc9642d9 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2009-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -33,8 +33,8 @@ * Blending in SoA is much faster than AoS, especially when separate rgb/alpha * factors/functions are used, since no channel masking/shuffling is necessary * and we can achieve the full throughput of the SIMD operations. Furthermore - * the fragment shader output is also in SoA, so it fits nicely with the rest of - * the fragment pipeline. + * the fragment shader output is also in SoA, so it fits nicely with the rest + * of the fragment pipeline. * * The drawback is that to be displayed the color buffer needs to be in AoS * layout, so we need to tile/untile the color buffer before/after rendering. @@ -69,15 +69,16 @@ #include "pipe/p_state.h" +#include "util/u_debug.h" -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_arit.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_init.h" #include "lp_bld_blend.h" /** - * We may the same values several times, so we keep them here to avoid + * We may use the same values several times, so we keep them here to avoid * recomputing them. Also reusing the values allows us to do simplifications * that LLVM optimization passes wouldn't normally be able to do. */ @@ -98,16 +99,22 @@ struct lp_build_blend_soa_context /** * We store all factors in a table in order to eliminate redundant * multiplications later. + * Indexes are: factor[src,dst][color,term][r,g,b,a] */ LLVMValueRef factor[2][2][4]; /** * Table with all terms. + * Indexes are: term[src,dst][r,g,b,a] */ LLVMValueRef term[2][4]; }; +/** + * Build a single SOA blend factor for a color channel. + * \param i the color channel in [0,3] + */ static LLVMValueRef lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld, unsigned factor, unsigned i) @@ -189,28 +196,40 @@ lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld, } +static boolean +lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor) +{ + return dst_factor == (src_factor ^ 0x10); +} + + /** * Generate blend code in SOA mode. + * \param rt render target index (to index the blend / colormask state) * \param src src/fragment color * \param dst dst/framebuffer color * \param con constant blend color * \param res the result/output */ void -lp_build_blend_soa(LLVMBuilderRef builder, +lp_build_blend_soa(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, struct lp_type type, + unsigned rt, LLVMValueRef src[4], LLVMValueRef dst[4], LLVMValueRef con[4], LLVMValueRef res[4]) { + LLVMBuilderRef builder = gallivm->builder; struct lp_build_blend_soa_context bld; unsigned i, j, k; + assert(rt < PIPE_MAX_COLOR_BUFS); + /* Setup build context */ memset(&bld, 0, sizeof bld); - lp_build_context_init(&bld.base, builder, type); + lp_build_context_init(&bld.base, gallivm, type); for (i = 0; i < 4; ++i) { bld.src[i] = src[i]; bld.dst[i] = dst[i]; @@ -218,7 +237,8 @@ lp_build_blend_soa(LLVMBuilderRef builder, } for (i = 0; i < 4; ++i) { - if (blend->colormask & (1 << i)) { + /* only compute blending for the color channels enabled for writing */ + if (blend->rt[rt].colormask & (1 << i)) { if (blend->logicop_enable) { if(!type.floating) { res[i] = lp_build_logicop(builder, blend->logicop_func, src[i], dst[i]); @@ -226,14 +246,47 @@ lp_build_blend_soa(LLVMBuilderRef builder, else res[i] = dst[i]; } - else if (blend->blend_enable) { - unsigned src_factor = i < 3 ? blend->rgb_src_factor : blend->alpha_src_factor; - unsigned dst_factor = i < 3 ? blend->rgb_dst_factor : blend->alpha_dst_factor; - unsigned func = i < 3 ? blend->rgb_func : blend->alpha_func; + else if (blend->rt[rt].blend_enable) { + unsigned src_factor = i < 3 ? blend->rt[rt].rgb_src_factor : blend->rt[rt].alpha_src_factor; + unsigned dst_factor = i < 3 ? blend->rt[rt].rgb_dst_factor : blend->rt[rt].alpha_dst_factor; + unsigned func = i < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func; boolean func_commutative = lp_build_blend_func_commutative(func); - /* It makes no sense to blend unless values are normalized */ - assert(type.norm); + if (func == PIPE_BLEND_ADD && + lp_build_blend_factor_complementary(src_factor, dst_factor) && 0) { + /* + * Special case linear interpolation, (i.e., complementary factors). + */ + + LLVMValueRef weight; + if (src_factor < dst_factor) { + weight = lp_build_blend_soa_factor(&bld, src_factor, i); + res[i] = lp_build_lerp(&bld.base, weight, dst[i], src[i]); + } else { + weight = lp_build_blend_soa_factor(&bld, dst_factor, i); + res[i] = lp_build_lerp(&bld.base, weight, src[i], dst[i]); + } + continue; + } + + if ((func == PIPE_BLEND_ADD || + func == PIPE_BLEND_SUBTRACT || + func == PIPE_BLEND_REVERSE_SUBTRACT) && + src_factor == dst_factor && + type.floating) { + /* + * Special common factor. + * + * XXX: Only for floating points for now, since saturation will + * cause different results. + */ + + LLVMValueRef factor; + factor = lp_build_blend_soa_factor(&bld, src_factor, i); + res[i] = lp_build_blend_func(&bld.base, func, src[i], dst[i]); + res[i] = lp_build_mul(&bld.base, res[i], factor); + continue; + } /* * Compute src/dst factors. @@ -262,6 +315,23 @@ lp_build_blend_soa(LLVMBuilderRef builder, bld.term[k][i] = bld.term[k][j]; else bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], bld.factor[k][1][i]); + + if (src_factor == PIPE_BLENDFACTOR_ZERO && + (dst_factor == PIPE_BLENDFACTOR_DST_ALPHA || + dst_factor == PIPE_BLENDFACTOR_INV_DST_ALPHA)) { + /* XXX special case these combos to work around an apparent + * bug in LLVM. + * This hack disables the check for multiplication by zero + * in lp_bld_mul(). When we optimize away the + * multiplication, something goes wrong during code + * generation and we segfault at runtime. + */ + LLVMValueRef zeroSave = bld.base.zero; + bld.base.zero = NULL; + bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], + bld.factor[k][1][i]); + bld.base.zero = zeroSave; + } } /* @@ -270,7 +340,7 @@ lp_build_blend_soa(LLVMBuilderRef builder, /* See if this function has been previously applied */ for(j = 0; j < i; ++j) { - unsigned prev_func = j < 3 ? blend->rgb_func : blend->alpha_func; + unsigned prev_func = j < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func; unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func); if((!func_reverse &&