From: José Fonseca Date: Sun, 9 Aug 2009 11:39:38 +0000 (+0100) Subject: llvmpipe: SoA blending. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2529ed5616b1b152766a3355444260b88184cd6e;p=mesa.git llvmpipe: SoA blending. Throughput seems to be 4x higher. --- diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index aca4f21b9ed..0a8e6e8fad6 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -11,6 +11,8 @@ llvmpipe = env.ConvenienceLibrary( 'lp_fs_sse.c', 'lp_fs_llvm.c', 'lp_bld_arit.c', + 'lp_bld_blend_aos.c', + 'lp_bld_blend_soa.c', 'lp_bld_const.c', 'lp_bld_conv.c', 'lp_bld_intr.c', @@ -20,7 +22,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_store.c', 'lp_bld_loop.c', 'lp_bld_logicop.c', - 'lp_bld_blend.c', 'lp_bld_swizzle.c', 'lp_bld_type.c', 'lp_clear.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld.h b/src/gallium/drivers/llvmpipe/lp_bld.h index e9d9c25a800..a725cbb4740 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld.h +++ b/src/gallium/drivers/llvmpipe/lp_bld.h @@ -45,7 +45,6 @@ #include "pipe/p_format.h" -struct pipe_blend_state; union lp_type; @@ -132,14 +131,4 @@ lp_build_logicop(LLVMBuilderRef builder, LLVMValueRef dst); -LLVMValueRef -lp_build_blend(LLVMBuilderRef builder, - const struct pipe_blend_state *blend, - union lp_type type, - LLVMValueRef src, - LLVMValueRef dst, - LLVMValueRef const_, - unsigned alpha_swizzle); - - #endif /* !LP_BLD_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.c b/src/gallium/drivers/llvmpipe/lp_bld_blend.c deleted file mode 100644 index a144469b354..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.c +++ /dev/null @@ -1,313 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * @file - * Blend LLVM IR generation. - * - * This code is generic -- it should be able to cope both with floating point - * and integer inputs in AOS form. - * - * @author Jose Fonseca - */ - - -#include "pipe/p_state.h" - -#include "lp_bld.h" -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_arit.h" -#include "lp_bld_swizzle.h" - - -/** - * We may the same values several times, so we keep them here to avoid - * recomputing them. Also reusing the values allows us to do simplifications - * that LLVM optimization passes wouldn't normally be able to do. - */ -struct lp_build_blend_context -{ - struct lp_build_context base; - - LLVMValueRef src; - LLVMValueRef dst; - LLVMValueRef const_; - - LLVMValueRef inv_src; - LLVMValueRef inv_dst; - LLVMValueRef inv_const; - LLVMValueRef saturate; - - LLVMValueRef rgb_src_factor; - LLVMValueRef alpha_src_factor; - LLVMValueRef rgb_dst_factor; - LLVMValueRef alpha_dst_factor; -}; - - -static LLVMValueRef -lp_build_blend_factor_unswizzled(struct lp_build_blend_context *bld, - unsigned factor, - boolean alpha) -{ - switch (factor) { - case PIPE_BLENDFACTOR_ZERO: - return bld->base.zero; - case PIPE_BLENDFACTOR_ONE: - return bld->base.one; - case PIPE_BLENDFACTOR_SRC_COLOR: - case PIPE_BLENDFACTOR_SRC_ALPHA: - return bld->src; - case PIPE_BLENDFACTOR_DST_COLOR: - case PIPE_BLENDFACTOR_DST_ALPHA: - return bld->dst; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - if(alpha) - return bld->base.one; - else { - if(!bld->inv_dst) - bld->inv_dst = lp_build_comp(&bld->base, bld->dst); - if(!bld->saturate) - bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst); - return bld->saturate; - } - case PIPE_BLENDFACTOR_CONST_COLOR: - case PIPE_BLENDFACTOR_CONST_ALPHA: - return bld->const_; - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - /* TODO */ - assert(0); - return bld->base.zero; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - if(!bld->inv_src) - bld->inv_src = lp_build_comp(&bld->base, bld->src); - return bld->inv_src; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - if(!bld->inv_dst) - bld->inv_dst = lp_build_comp(&bld->base, bld->dst); - return bld->inv_dst; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - if(!bld->inv_const) - bld->inv_const = lp_build_comp(&bld->base, bld->const_); - return bld->inv_const; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - /* TODO */ - assert(0); - return bld->base.zero; - default: - assert(0); - return bld->base.zero; - } -} - - -enum lp_build_blend_swizzle { - LP_BUILD_BLEND_SWIZZLE_RGBA = 0, - LP_BUILD_BLEND_SWIZZLE_AAAA = 1, -}; - - -/** - * How should we shuffle the base factor. - */ -static enum lp_build_blend_swizzle -lp_build_blend_factor_swizzle(unsigned factor) -{ - switch (factor) { - case PIPE_BLENDFACTOR_ONE: - case PIPE_BLENDFACTOR_ZERO: - case PIPE_BLENDFACTOR_SRC_COLOR: - case PIPE_BLENDFACTOR_DST_COLOR: - case PIPE_BLENDFACTOR_CONST_COLOR: - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - case PIPE_BLENDFACTOR_INV_DST_COLOR: - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - return LP_BUILD_BLEND_SWIZZLE_RGBA; - case PIPE_BLENDFACTOR_SRC_ALPHA: - case PIPE_BLENDFACTOR_DST_ALPHA: - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_CONST_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - return LP_BUILD_BLEND_SWIZZLE_AAAA; - default: - assert(0); - return LP_BUILD_BLEND_SWIZZLE_RGBA; - } -} - - -static LLVMValueRef -lp_build_blend_swizzle(struct lp_build_blend_context *bld, - LLVMValueRef rgb, - LLVMValueRef alpha, - enum lp_build_blend_swizzle rgb_swizzle, - unsigned alpha_swizzle) -{ - if(rgb == alpha) { - if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) - return rgb; - if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) - return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle); - } - else { - if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) { - boolean cond[4] = {0, 0, 0, 0}; - cond[alpha_swizzle] = 1; - return lp_build_select_aos(&bld->base, alpha, rgb, cond); - } - if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) { - unsigned char swizzle[4]; - swizzle[0] = alpha_swizzle; - swizzle[1] = alpha_swizzle; - swizzle[2] = alpha_swizzle; - swizzle[3] = alpha_swizzle; - swizzle[alpha_swizzle] += 4; - return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle); - } - } - assert(0); - return bld->base.undef; -} - - -/** - * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml - */ -static LLVMValueRef -lp_build_blend_factor(struct lp_build_blend_context *bld, - LLVMValueRef factor1, - unsigned rgb_factor, - unsigned alpha_factor, - unsigned alpha_swizzle) -{ - LLVMValueRef rgb_factor_; - LLVMValueRef alpha_factor_; - LLVMValueRef factor2; - enum lp_build_blend_swizzle rgb_swizzle; - - rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE); - alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); - - rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor); - - factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle); - - return lp_build_mul(&bld->base, factor1, factor2); -} - - -/** - * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml - */ -static LLVMValueRef -lp_build_blend_func(struct lp_build_blend_context *bld, - unsigned func, - LLVMValueRef term1, - LLVMValueRef term2) -{ - switch (func) { - case PIPE_BLEND_ADD: - return lp_build_add(&bld->base, term1, term2); - break; - case PIPE_BLEND_SUBTRACT: - return lp_build_sub(&bld->base, term1, term2); - case PIPE_BLEND_REVERSE_SUBTRACT: - return lp_build_sub(&bld->base, term2, term1); - case PIPE_BLEND_MIN: - return lp_build_min(&bld->base, term1, term2); - case PIPE_BLEND_MAX: - return lp_build_max(&bld->base, term1, term2); - default: - assert(0); - return bld->base.zero; - } -} - - -LLVMValueRef -lp_build_blend(LLVMBuilderRef builder, - const struct pipe_blend_state *blend, - union lp_type type, - LLVMValueRef src, - LLVMValueRef dst, - LLVMValueRef const_, - unsigned alpha_swizzle) -{ - struct lp_build_blend_context bld; - LLVMValueRef src_term; - LLVMValueRef dst_term; - - /* It makes no sense to blend unless values are normalized */ - assert(type.norm); - - /* Setup build context */ - memset(&bld, 0, sizeof bld); - lp_build_context_init(&bld.base, builder, type); - bld.src = src; - bld.dst = dst; - bld.const_ = const_; - - /* TODO: There are still a few optimization oportunities here. For certain - * combinations it is possible to reorder the operations and therefor saving - * some instructions. */ - - src_term = lp_build_blend_factor(&bld, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle); - dst_term = lp_build_blend_factor(&bld, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle); - -#ifdef DEBUG - LLVMSetValueName(src_term, "src_term"); - LLVMSetValueName(dst_term, "dst_term"); -#endif - - if(blend->rgb_func == blend->alpha_func) { - return lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term); - } - else { - /* Seperate RGB / A functions */ - - LLVMValueRef rgb; - LLVMValueRef alpha; - - rgb = lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term); - alpha = lp_build_blend_func(&bld, blend->alpha_func, src_term, dst_term); - - return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); - } -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h new file mode 100644 index 00000000000..36f53dae935 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h @@ -0,0 +1,94 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef LP_BLD_BLEND_H +#define LP_BLD_BLEND_H + + +/** + * @file + * LLVM IR building helpers interfaces. + * + * We use LLVM-C bindings for now. They are not documented, but follow the C++ + * interfaces very closely, and appear to be complete enough for code + * genration. See + * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html + * for a standalone example. + */ + +#include + +#include "pipe/p_format.h" + + +struct pipe_blend_state; +union lp_type; +struct lp_build_context; + + +/** + * Whether the blending function is commutative or not. + */ +boolean +lp_build_blend_func_commutative(unsigned func); + + +/** + * Whether the blending functions are the reverse of each other. + */ +boolean +lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func); + + +LLVMValueRef +lp_build_blend_func(struct lp_build_context *bld, + unsigned func, + LLVMValueRef term1, + LLVMValueRef term2); + + +LLVMValueRef +lp_build_blend_aos(LLVMBuilderRef builder, + const struct pipe_blend_state *blend, + union lp_type type, + LLVMValueRef src, + LLVMValueRef dst, + LLVMValueRef const_, + unsigned alpha_swizzle); + + +void +lp_build_blend_soa(LLVMBuilderRef builder, + const struct pipe_blend_state *blend, + union lp_type type, + LLVMValueRef src[4], + LLVMValueRef dst[4], + LLVMValueRef const_[4], + LLVMValueRef res[4]); + + +#endif /* !LP_BLD_BLEND_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c new file mode 100644 index 00000000000..e4a57af94c7 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -0,0 +1,341 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Blend LLVM IR generation -- AOS form. + * + * @author Jose Fonseca + */ + + +#include "pipe/p_state.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_blend.h" + + +/** + * We may the same values several times, so we keep them here to avoid + * recomputing them. Also reusing the values allows us to do simplifications + * that LLVM optimization passes wouldn't normally be able to do. + */ +struct lp_build_blend_aos_context +{ + struct lp_build_context base; + + LLVMValueRef src; + LLVMValueRef dst; + LLVMValueRef const_; + + LLVMValueRef inv_src; + LLVMValueRef inv_dst; + LLVMValueRef inv_const; + LLVMValueRef saturate; + + LLVMValueRef rgb_src_factor; + LLVMValueRef alpha_src_factor; + LLVMValueRef rgb_dst_factor; + LLVMValueRef alpha_dst_factor; +}; + + +static LLVMValueRef +lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld, + unsigned factor, + boolean alpha) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: + return bld->base.zero; + case PIPE_BLENDFACTOR_ONE: + return bld->base.one; + case PIPE_BLENDFACTOR_SRC_COLOR: + case PIPE_BLENDFACTOR_SRC_ALPHA: + return bld->src; + case PIPE_BLENDFACTOR_DST_COLOR: + case PIPE_BLENDFACTOR_DST_ALPHA: + return bld->dst; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + if(alpha) + return bld->base.one; + else { + if(!bld->inv_dst) + bld->inv_dst = lp_build_comp(&bld->base, bld->dst); + if(!bld->saturate) + bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst); + return bld->saturate; + } + case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_CONST_ALPHA: + return bld->const_; + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + /* TODO */ + assert(0); + return bld->base.zero; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + if(!bld->inv_src) + bld->inv_src = lp_build_comp(&bld->base, bld->src); + return bld->inv_src; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + if(!bld->inv_dst) + bld->inv_dst = lp_build_comp(&bld->base, bld->dst); + return bld->inv_dst; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + if(!bld->inv_const) + bld->inv_const = lp_build_comp(&bld->base, bld->const_); + return bld->inv_const; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* TODO */ + assert(0); + return bld->base.zero; + default: + assert(0); + return bld->base.zero; + } +} + + +enum lp_build_blend_swizzle { + LP_BUILD_BLEND_SWIZZLE_RGBA = 0, + LP_BUILD_BLEND_SWIZZLE_AAAA = 1, +}; + + +/** + * How should we shuffle the base factor. + */ +static enum lp_build_blend_swizzle +lp_build_blend_factor_swizzle(unsigned factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ONE: + case PIPE_BLENDFACTOR_ZERO: + case PIPE_BLENDFACTOR_SRC_COLOR: + case PIPE_BLENDFACTOR_DST_COLOR: + case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + case PIPE_BLENDFACTOR_INV_DST_COLOR: + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + return LP_BUILD_BLEND_SWIZZLE_RGBA; + case PIPE_BLENDFACTOR_SRC_ALPHA: + case PIPE_BLENDFACTOR_DST_ALPHA: + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + return LP_BUILD_BLEND_SWIZZLE_AAAA; + default: + assert(0); + return LP_BUILD_BLEND_SWIZZLE_RGBA; + } +} + + +static LLVMValueRef +lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld, + LLVMValueRef rgb, + LLVMValueRef alpha, + enum lp_build_blend_swizzle rgb_swizzle, + unsigned alpha_swizzle) +{ + if(rgb == alpha) { + if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) + return rgb; + if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) + return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle); + } + else { + if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) { + boolean cond[4] = {0, 0, 0, 0}; + cond[alpha_swizzle] = 1; + return lp_build_select_aos(&bld->base, alpha, rgb, cond); + } + if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) { + unsigned char swizzle[4]; + swizzle[0] = alpha_swizzle; + swizzle[1] = alpha_swizzle; + swizzle[2] = alpha_swizzle; + swizzle[3] = alpha_swizzle; + swizzle[alpha_swizzle] += 4; + return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle); + } + } + assert(0); + return bld->base.undef; +} + + +/** + * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml + */ +static LLVMValueRef +lp_build_blend_factor(struct lp_build_blend_aos_context *bld, + LLVMValueRef factor1, + unsigned rgb_factor, + unsigned alpha_factor, + unsigned alpha_swizzle) +{ + LLVMValueRef rgb_factor_; + LLVMValueRef alpha_factor_; + LLVMValueRef factor2; + enum lp_build_blend_swizzle rgb_swizzle; + + rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE); + alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); + + rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor); + + factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle); + + return lp_build_mul(&bld->base, factor1, factor2); +} + + +boolean +lp_build_blend_func_commutative(unsigned func) +{ + switch (func) { + case PIPE_BLEND_ADD: + case PIPE_BLEND_MIN: + case PIPE_BLEND_MAX: + return TRUE; + case PIPE_BLEND_SUBTRACT: + case PIPE_BLEND_REVERSE_SUBTRACT: + return FALSE; + default: + assert(0); + return TRUE; + } +} + + +boolean +lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func) +{ + if(rgb_func == alpha_func) + return FALSE; + if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT) + return TRUE; + if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT) + return TRUE; + return FALSE; +} + + +/** + * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml + */ +LLVMValueRef +lp_build_blend_func(struct lp_build_context *bld, + unsigned func, + LLVMValueRef term1, + LLVMValueRef term2) +{ + switch (func) { + case PIPE_BLEND_ADD: + return lp_build_add(bld, term1, term2); + break; + case PIPE_BLEND_SUBTRACT: + return lp_build_sub(bld, term1, term2); + case PIPE_BLEND_REVERSE_SUBTRACT: + return lp_build_sub(bld, term2, term1); + case PIPE_BLEND_MIN: + return lp_build_min(bld, term1, term2); + case PIPE_BLEND_MAX: + return lp_build_max(bld, term1, term2); + default: + assert(0); + return bld->zero; + } +} + + +LLVMValueRef +lp_build_blend_aos(LLVMBuilderRef builder, + const struct pipe_blend_state *blend, + union lp_type type, + LLVMValueRef src, + LLVMValueRef dst, + LLVMValueRef const_, + unsigned alpha_swizzle) +{ + struct lp_build_blend_aos_context bld; + LLVMValueRef src_term; + LLVMValueRef dst_term; + + /* It makes no sense to blend unless values are normalized */ + assert(type.norm); + + /* Setup build context */ + memset(&bld, 0, sizeof bld); + lp_build_context_init(&bld.base, builder, type); + bld.src = src; + bld.dst = dst; + bld.const_ = const_; + + /* TODO: There are still a few optimization opportunities here. For certain + * combinations it is possible to reorder the operations and therefore saving + * some instructions. */ + + src_term = lp_build_blend_factor(&bld, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle); + dst_term = lp_build_blend_factor(&bld, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle); + +#ifdef DEBUG + LLVMSetValueName(src_term, "src_term"); + LLVMSetValueName(dst_term, "dst_term"); +#endif + + if(blend->rgb_func == blend->alpha_func) { + return lp_build_blend_func(&bld.base, blend->rgb_func, src_term, dst_term); + } + else { + /* Seperate RGB / A functions */ + + LLVMValueRef rgb; + LLVMValueRef alpha; + + rgb = lp_build_blend_func(&bld.base, blend->rgb_func, src_term, dst_term); + alpha = lp_build_blend_func(&bld.base, blend->alpha_func, src_term, dst_term); + + return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c new file mode 100644 index 00000000000..1ef1718cfda --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c @@ -0,0 +1,237 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Blend LLVM IR generation -- SoA. + * + * @author Jose Fonseca + */ + + +#include "pipe/p_state.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_blend.h" + + +/** + * We may the same values several times, so we keep them here to avoid + * recomputing them. Also reusing the values allows us to do simplifications + * that LLVM optimization passes wouldn't normally be able to do. + */ +struct lp_build_blend_soa_context +{ + struct lp_build_context base; + + LLVMValueRef src[4]; + LLVMValueRef dst[4]; + LLVMValueRef con[4]; + + LLVMValueRef inv_src[4]; + LLVMValueRef inv_dst[4]; + LLVMValueRef inv_con[4]; + + LLVMValueRef src_alpha_saturate; + + /** + * We store all factors in a table in order to eliminate redundant + * multiplications later. + */ + LLVMValueRef factor[2][8]; + + /** + * Table with all terms. + */ + LLVMValueRef term[8]; +}; + + +static LLVMValueRef +lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld, + unsigned factor, unsigned i) +{ + /* + * Compute src/first term RGB + */ + switch (factor) { + case PIPE_BLENDFACTOR_ONE: + return bld->base.one; + case PIPE_BLENDFACTOR_SRC_COLOR: + return bld->src[i]; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return bld->src[3]; + case PIPE_BLENDFACTOR_DST_COLOR: + return bld->dst[i]; + case PIPE_BLENDFACTOR_DST_ALPHA: + return bld->dst[3]; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + if(i == 3) + return bld->base.one; + else { + if(!bld->inv_dst[3]) + bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]); + if(!bld->src_alpha_saturate) + bld->src_alpha_saturate = lp_build_min(&bld->base, bld->src[3], bld->inv_dst[3]); + return bld->src_alpha_saturate; + } + case PIPE_BLENDFACTOR_CONST_COLOR: + return bld->con[i]; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return bld->con[3]; + case PIPE_BLENDFACTOR_SRC1_COLOR: + /* TODO */ + assert(0); + return bld->base.zero; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + /* TODO */ + assert(0); + return bld->base.zero; + case PIPE_BLENDFACTOR_ZERO: + return bld->base.zero; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + if(!bld->inv_src[i]) + bld->inv_src[i] = lp_build_comp(&bld->base, bld->src[i]); + return bld->inv_src[i]; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + if(!bld->inv_src[3]) + bld->inv_src[3] = lp_build_comp(&bld->base, bld->src[3]); + return bld->inv_src[3]; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + if(!bld->inv_dst[i]) + bld->inv_dst[i] = lp_build_comp(&bld->base, bld->dst[i]); + return bld->inv_dst[i]; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + if(!bld->inv_dst[3]) + bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]); + return bld->inv_dst[3]; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + if(!bld->inv_con[i]) + bld->inv_con[i] = lp_build_comp(&bld->base, bld->con[i]); + return bld->inv_con[i]; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + if(!bld->inv_con[3]) + bld->inv_con[3] = lp_build_comp(&bld->base, bld->con[3]); + return bld->inv_con[3]; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + /* TODO */ + assert(0); + return bld->base.zero; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* TODO */ + assert(0); + return bld->base.zero; + default: + assert(0); + return bld->base.zero; + } +} + + +void +lp_build_blend_soa(LLVMBuilderRef builder, + const struct pipe_blend_state *blend, + union lp_type type, + LLVMValueRef src[4], + LLVMValueRef dst[4], + LLVMValueRef con[4], + LLVMValueRef res[4]) +{ + struct lp_build_blend_soa_context bld; + unsigned i, j; + + /* Setup build context */ + memset(&bld, 0, sizeof bld); + lp_build_context_init(&bld.base, builder, type); + for (i = 0; i < 4; ++i) { + bld.src[i] = src[i]; + bld.dst[i] = dst[i]; + bld.con[i] = con[i]; + } + + /* + * Compute src/dst factors. + */ + for (i = 0; i < 4; ++i) { + unsigned src_factor = i < 3 ? blend->rgb_src_factor : blend->alpha_src_factor; + unsigned dst_factor = i < 3 ? blend->rgb_dst_factor : blend->alpha_dst_factor; + bld.factor[0][0 + i] = src[i]; + bld.factor[1][0 + i] = lp_build_blend_soa_factor(&bld, src_factor, i); + bld.factor[0][4 + i] = dst[i]; + bld.factor[1][4 + i] = lp_build_blend_soa_factor(&bld, dst_factor, i); + } + + /* + * Compute src/dst terms + */ + for (i = 0; i < 8; ++i) { + + /* See if this multiplication has been previously computed */ + for(j = 0; j < i; ++j) { + if((bld.factor[0][j] == bld.factor[0][i] && + bld.factor[1][j] == bld.factor[1][i]) || + (bld.factor[0][j] == bld.factor[1][i] && + bld.factor[1][j] == bld.factor[0][i])) + break; + } + + if(j < i) + bld.term[i] = bld.term[j]; + else + bld.term[i] = lp_build_mul(&bld.base, bld.factor[0][i], bld.factor[1][i]); + } + + /* + * Combine terms + */ + for (i = 0; i < 4; ++i) { + unsigned func = i < 3 ? blend->rgb_func : blend->alpha_func; + boolean func_commutative = lp_build_blend_func_commutative(func); + + /* See if this function has been previously applied */ + for(j = 0; j < i; ++j) { + unsigned prev_func = j < 3 ? blend->rgb_func : blend->alpha_func; + unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func); + + if((!func_reverse && + bld.factor[0 + j] == bld.factor[0 + i] && + bld.factor[4 + j] == bld.factor[4 + i]) || + ((func_commutative || func_reverse) && + bld.factor[0 + j] == bld.factor[4 + i] && + bld.factor[4 + j] == bld.factor[0 + i])) + break; + } + + if(j < i) + res[i] = res[j]; + else + res[i] = lp_build_blend_func(&bld.base, func, bld.term[i + 0], bld.term[i + 4]); + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index c8901fea984..8bf5508bd4e 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -37,12 +37,19 @@ */ -#include "lp_bld.h" #include "lp_bld_type.h" #include "lp_bld_arit.h" +#include "lp_bld_blend.h" #include "lp_test.h" +enum vector_mode +{ + AoS = 0, + SoA = 1 +}; + + typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res); @@ -52,6 +59,7 @@ write_tsv_header(FILE *fp) fprintf(fp, "result\t" "cycles_per_channel\t" + "mode\t" "type\t" "sep_func\t" "sep_src_factor\t" @@ -70,13 +78,22 @@ write_tsv_header(FILE *fp) static void write_tsv_row(FILE *fp, const struct pipe_blend_state *blend, + enum vector_mode mode, union lp_type type, double cycles, boolean success) { fprintf(fp, "%s\t", success ? "pass" : "fail"); - fprintf(fp, "%.1f\t", cycles / type.length); + if (mode == AoS) { + fprintf(fp, "%.1f\t", cycles / type.length); + fprintf(fp, "aos\t"); + } + + if (mode == SoA) { + fprintf(fp, "%.1f\t", cycles / (4 * type.length)); + fprintf(fp, "soa\t"); + } fprintf(fp, "%s%u%sx%u\t", type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")), @@ -106,10 +123,19 @@ write_tsv_row(FILE *fp, static void dump_blend_type(FILE *fp, const struct pipe_blend_state *blend, + enum vector_mode mode, union lp_type type) { + fprintf(fp, "%s", mode ? "soa" : "aos"); + + fprintf(fp, " type=%s%u%sx%u", + type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")), + type.width, + type.norm ? "n" : "", + type.length); + fprintf(fp, - "%s=%s %s=%s %s=%s %s=%s %s=%s %s=%s", + " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s", "rgb_func", debug_dump_blend_func(blend->rgb_func, TRUE), "rgb_src_factor", debug_dump_blend_factor(blend->rgb_src_factor, TRUE), "rgb_dst_factor", debug_dump_blend_factor(blend->rgb_dst_factor, TRUE), @@ -117,12 +143,6 @@ dump_blend_type(FILE *fp, "alpha_src_factor", debug_dump_blend_factor(blend->alpha_src_factor, TRUE), "alpha_dst_factor", debug_dump_blend_factor(blend->alpha_dst_factor, TRUE)); - fprintf(fp, " type=%s%u%sx%u", - type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")), - type.width, - type.norm ? "n" : "", - type.length); - fprintf(fp, " ...\n"); fflush(fp); } @@ -131,6 +151,7 @@ dump_blend_type(FILE *fp, static LLVMValueRef add_blend_test(LLVMModuleRef module, const struct pipe_blend_state *blend, + enum vector_mode mode, union lp_type type) { LLVMTypeRef ret_type; @@ -143,10 +164,6 @@ add_blend_test(LLVMModuleRef module, LLVMValueRef res_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; - LLVMValueRef src; - LLVMValueRef dst; - LLVMValueRef con; - LLVMValueRef res; ret_type = LLVMInt64Type(); vec_type = lp_build_vec_type(type); @@ -163,15 +180,51 @@ add_blend_test(LLVMModuleRef module, builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - src = LLVMBuildLoad(builder, src_ptr, "src"); - dst = LLVMBuildLoad(builder, dst_ptr, "dst"); - con = LLVMBuildLoad(builder, const_ptr, "const"); + if (mode == AoS) { + LLVMValueRef src; + LLVMValueRef dst; + LLVMValueRef con; + LLVMValueRef res; + + src = LLVMBuildLoad(builder, src_ptr, "src"); + dst = LLVMBuildLoad(builder, dst_ptr, "dst"); + con = LLVMBuildLoad(builder, const_ptr, "const"); - res = lp_build_blend(builder, blend, type, src, dst, con, 3); + res = lp_build_blend_aos(builder, blend, type, src, dst, con, 3); - LLVMSetValueName(res, "res"); + LLVMSetValueName(res, "res"); - LLVMBuildStore(builder, res, res_ptr); + LLVMBuildStore(builder, res, res_ptr); + } + + if (mode == SoA) { + LLVMValueRef src[4]; + LLVMValueRef dst[4]; + LLVMValueRef con[4]; + LLVMValueRef res[4]; + char src_name[5] = "src?"; + char dst_name[5] = "dst?"; + char con_name[5] = "con?"; + char res_name[5] = "res?"; + unsigned i; + + for(i = 0; i < 4; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + con_name[3] = dst_name[3] = src_name[3] = "rgba"[i]; + src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), src_name); + dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), dst_name); + con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), con_name); + } + + lp_build_blend_soa(builder, blend, type, src, dst, con, res); + + for(i = 0; i < 4; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + res_name[3] = "rgba"[i]; + LLVMSetValueName(res[i], res_name); + LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, "")); + } + } LLVMBuildRetVoid(builder);; @@ -415,6 +468,7 @@ static boolean test_one(unsigned verbose, FILE *fp, const struct pipe_blend_state *blend, + enum vector_mode mode, union lp_type type) { LLVMModuleRef module = NULL; @@ -431,11 +485,11 @@ test_one(unsigned verbose, unsigned i, j; if(verbose >= 1) - dump_blend_type(stdout, blend, type); + dump_blend_type(stdout, blend, mode, type); module = LLVMModuleCreateWithName("test"); - func = add_blend_test(module, blend, type); + func = add_blend_test(module, blend, mode, type); if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { LLVMDumpModule(module); @@ -446,7 +500,7 @@ test_one(unsigned verbose, provider = LLVMCreateModuleProviderForExistingModule(module); if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { if(verbose < 1) - dump_blend_type(stderr, blend, type); + dump_blend_type(stderr, blend, mode, type); fprintf(stderr, "%s\n", error); LLVMDisposeMessage(error); abort(); @@ -474,66 +528,148 @@ test_one(unsigned verbose, success = TRUE; for(i = 0; i < n && success; ++i) { - uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - int64_t start_counter = 0; - int64_t end_counter = 0; - - random_vec(type, src); - random_vec(type, dst); - random_vec(type, con); - - { - double fsrc[LP_MAX_VECTOR_LENGTH]; - double fdst[LP_MAX_VECTOR_LENGTH]; - double fcon[LP_MAX_VECTOR_LENGTH]; - double fref[LP_MAX_VECTOR_LENGTH]; - - read_vec(type, src, fsrc); - read_vec(type, dst, fdst); - read_vec(type, con, fcon); - - for(j = 0; j < type.length; j += 4) - compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j); - - write_vec(type, ref, fref); + if(mode == AoS) { + uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + int64_t start_counter = 0; + int64_t end_counter = 0; + + random_vec(type, src); + random_vec(type, dst); + random_vec(type, con); + + { + double fsrc[LP_MAX_VECTOR_LENGTH]; + double fdst[LP_MAX_VECTOR_LENGTH]; + double fcon[LP_MAX_VECTOR_LENGTH]; + double fref[LP_MAX_VECTOR_LENGTH]; + + read_vec(type, src, fsrc); + read_vec(type, dst, fdst); + read_vec(type, con, fcon); + + for(j = 0; j < type.length; j += 4) + compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j); + + write_vec(type, ref, fref); + } + + start_counter = rdtsc(); + blend_test_ptr(src, dst, con, res); + end_counter = rdtsc(); + + cycles[i] = end_counter - start_counter; + + if(!compare_vec(type, res, ref)) { + success = FALSE; + + if(verbose < 1) + dump_blend_type(stderr, blend, mode, type); + fprintf(stderr, "MISMATCH\n"); + + fprintf(stderr, " Src: "); + dump_vec(stderr, type, src); + fprintf(stderr, "\n"); + + fprintf(stderr, " Dst: "); + dump_vec(stderr, type, dst); + fprintf(stderr, "\n"); + + fprintf(stderr, " Con: "); + dump_vec(stderr, type, con); + fprintf(stderr, "\n"); + + fprintf(stderr, " Res: "); + dump_vec(stderr, type, res); + fprintf(stderr, "\n"); + + fprintf(stderr, " Ref: "); + dump_vec(stderr, type, ref); + fprintf(stderr, "\n"); + } } - start_counter = rdtsc(); - blend_test_ptr(src, dst, con, res); - end_counter = rdtsc(); + if(mode == SoA) { + const unsigned stride = type.length*type.width/8; + uint8_t src[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t dst[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t con[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t res[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t ref[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + int64_t start_counter = 0; + int64_t end_counter = 0; + boolean mismatch; + + for(j = 0; j < 4; ++j) { + random_vec(type, src + j*stride); + random_vec(type, dst + j*stride); + random_vec(type, con + j*stride); + } - cycles[i] = end_counter - start_counter; + { + double fsrc[4]; + double fdst[4]; + double fcon[4]; + double fref[4]; + unsigned k; + + for(k = 0; k < type.length; ++k) { + for(j = 0; j < 4; ++j) { + fsrc[j] = read_elem(type, src + j*stride, k); + fdst[j] = read_elem(type, dst + j*stride, k); + fcon[j] = read_elem(type, con + j*stride, k); + } - success = compare_vec(type, res, ref); + compute_blend_ref(blend, fsrc, fdst, fcon, fref); - if (!success) { - if(verbose < 1) - dump_blend_type(stderr, blend, type); - fprintf(stderr, "MISMATCH\n"); + for(j = 0; j < 4; ++j) + write_elem(type, ref + j*stride, k, fref[j]); + } + } + + start_counter = rdtsc(); + blend_test_ptr(src, dst, con, res); + end_counter = rdtsc(); + + cycles[i] = end_counter - start_counter; + + mismatch = FALSE; + for (j = 0; j < 4; ++j) + if(!compare_vec(type, res + j*stride, ref + j*stride)) + mismatch = TRUE; - fprintf(stderr, " Src: "); - dump_vec(stderr, type, src); - fprintf(stderr, "\n"); + if (mismatch) { + success = FALSE; - fprintf(stderr, " Dst: "); - dump_vec(stderr, type, dst); - fprintf(stderr, "\n"); + if(verbose < 1) + dump_blend_type(stderr, blend, mode, type); + fprintf(stderr, "MISMATCH\n"); + for(j = 0; j < 4; ++j) { + char channel = "RGBA"[j]; + fprintf(stderr, " Src%c: ", channel); + dump_vec(stderr, type, src + j*stride); + fprintf(stderr, "\n"); - fprintf(stderr, " Con: "); - dump_vec(stderr, type, con); - fprintf(stderr, "\n"); + fprintf(stderr, " Dst%c: ", channel); + dump_vec(stderr, type, dst + j*stride); + fprintf(stderr, "\n"); - fprintf(stderr, " Res: "); - dump_vec(stderr, type, res); - fprintf(stderr, "\n"); + fprintf(stderr, " Con%c: ", channel); + dump_vec(stderr, type, con + j*stride); + fprintf(stderr, "\n"); - fprintf(stderr, " Ref: "); - dump_vec(stderr, type, ref); - fprintf(stderr, "\n"); + fprintf(stderr, " Res%c: ", channel); + dump_vec(stderr, type, res + j*stride); + fprintf(stderr, "\n"); + + fprintf(stderr, " Ref%c: ", channel); + dump_vec(stderr, type, ref + j*stride); + fprintf(stderr, "\n"); + } + } } } @@ -569,7 +705,7 @@ test_one(unsigned verbose, } if(fp) - write_tsv_row(fp, blend, type, cycles_avg, success); + write_tsv_row(fp, blend, mode, type, cycles_avg, success); if (!success) { if(verbose < 2) @@ -650,6 +786,7 @@ test_all(unsigned verbose, FILE *fp) const unsigned *alpha_src_factor; const unsigned *alpha_dst_factor; struct pipe_blend_state blend; + enum vector_mode mode; const union lp_type *type; bool success = TRUE; @@ -659,24 +796,26 @@ test_all(unsigned verbose, FILE *fp) for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) { for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) { for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) { - for(type = blend_types; type < &blend_types[num_types]; ++type) { - - if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || - *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) - continue; - - memset(&blend, 0, sizeof blend); - blend.blend_enable = 1; - blend.rgb_func = *rgb_func; - blend.rgb_src_factor = *rgb_src_factor; - blend.rgb_dst_factor = *rgb_dst_factor; - blend.alpha_func = *alpha_func; - blend.alpha_src_factor = *alpha_src_factor; - blend.alpha_dst_factor = *alpha_dst_factor; - - if(!test_one(verbose, fp, &blend, *type)) - success = FALSE; - + for(mode = 0; mode < 2; ++mode) { + for(type = blend_types; type < &blend_types[num_types]; ++type) { + + if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) + continue; + + memset(&blend, 0, sizeof blend); + blend.blend_enable = 1; + blend.rgb_func = *rgb_func; + blend.rgb_src_factor = *rgb_src_factor; + blend.rgb_dst_factor = *rgb_dst_factor; + blend.alpha_func = *alpha_func; + blend.alpha_src_factor = *alpha_src_factor; + blend.alpha_dst_factor = *alpha_dst_factor; + + if(!test_one(verbose, fp, &blend, mode, *type)) + success = FALSE; + + } } } } @@ -699,6 +838,7 @@ test_some(unsigned verbose, FILE *fp, unsigned long n) const unsigned *alpha_src_factor; const unsigned *alpha_dst_factor; struct pipe_blend_state blend; + enum vector_mode mode; const union lp_type *type; unsigned long i; bool success = TRUE; @@ -717,20 +857,21 @@ test_some(unsigned verbose, FILE *fp, unsigned long n) alpha_dst_factor = &blend_factors[random() % num_factors]; } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE); - for(type = blend_types; type < &blend_types[num_types]; ++type) { + mode = random() & 1; - memset(&blend, 0, sizeof blend); - blend.blend_enable = 1; - blend.rgb_func = *rgb_func; - blend.rgb_src_factor = *rgb_src_factor; - blend.rgb_dst_factor = *rgb_dst_factor; - blend.alpha_func = *alpha_func; - blend.alpha_src_factor = *alpha_src_factor; - blend.alpha_dst_factor = *alpha_dst_factor; + type = &blend_types[random() % num_types]; - if(!test_one(verbose, fp, &blend, *type)) - success = FALSE; - } + memset(&blend, 0, sizeof blend); + blend.blend_enable = 1; + blend.rgb_func = *rgb_func; + blend.rgb_src_factor = *rgb_src_factor; + blend.rgb_dst_factor = *rgb_dst_factor; + blend.alpha_func = *alpha_func; + blend.alpha_src_factor = *alpha_src_factor; + blend.alpha_dst_factor = *alpha_dst_factor; + + if(!test_one(verbose, fp, &blend, mode, *type)) + success = FALSE; } return success;