From 2529ed5616b1b152766a3355444260b88184cd6e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jos=C3=A9=20Fonseca?= Date: Sun, 9 Aug 2009 12:39:38 +0100 Subject: [PATCH] llvmpipe: SoA blending. Throughput seems to be 4x higher. --- src/gallium/drivers/llvmpipe/SConscript | 3 +- src/gallium/drivers/llvmpipe/lp_bld.h | 11 - src/gallium/drivers/llvmpipe/lp_bld_blend.h | 94 +++++ .../{lp_bld_blend.c => lp_bld_blend_aos.c} | 88 +++-- .../drivers/llvmpipe/lp_bld_blend_soa.c | 237 ++++++++++++ src/gallium/drivers/llvmpipe/lp_test_blend.c | 347 ++++++++++++------ 6 files changed, 635 insertions(+), 145 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_blend.h rename src/gallium/drivers/llvmpipe/{lp_bld_blend.c => lp_bld_blend_aos.c} (80%) create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index aca4f21b9ed..0a8e6e8fad6 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -11,6 +11,8 @@ llvmpipe = env.ConvenienceLibrary( 'lp_fs_sse.c', 'lp_fs_llvm.c', 'lp_bld_arit.c', + 'lp_bld_blend_aos.c', + 'lp_bld_blend_soa.c', 'lp_bld_const.c', 'lp_bld_conv.c', 'lp_bld_intr.c', @@ -20,7 +22,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_store.c', 'lp_bld_loop.c', 'lp_bld_logicop.c', - 'lp_bld_blend.c', 'lp_bld_swizzle.c', 'lp_bld_type.c', 'lp_clear.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld.h b/src/gallium/drivers/llvmpipe/lp_bld.h index e9d9c25a800..a725cbb4740 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld.h +++ b/src/gallium/drivers/llvmpipe/lp_bld.h @@ -45,7 +45,6 @@ #include "pipe/p_format.h" -struct pipe_blend_state; union lp_type; @@ -132,14 +131,4 @@ lp_build_logicop(LLVMBuilderRef builder, LLVMValueRef dst); -LLVMValueRef -lp_build_blend(LLVMBuilderRef builder, - const struct pipe_blend_state *blend, - union lp_type type, - LLVMValueRef src, - LLVMValueRef dst, - LLVMValueRef const_, - unsigned alpha_swizzle); - - #endif /* !LP_BLD_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h new file mode 100644 index 00000000000..36f53dae935 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h @@ -0,0 +1,94 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef LP_BLD_BLEND_H +#define LP_BLD_BLEND_H + + +/** + * @file + * LLVM IR building helpers interfaces. + * + * We use LLVM-C bindings for now. They are not documented, but follow the C++ + * interfaces very closely, and appear to be complete enough for code + * genration. See + * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html + * for a standalone example. + */ + +#include + +#include "pipe/p_format.h" + + +struct pipe_blend_state; +union lp_type; +struct lp_build_context; + + +/** + * Whether the blending function is commutative or not. + */ +boolean +lp_build_blend_func_commutative(unsigned func); + + +/** + * Whether the blending functions are the reverse of each other. + */ +boolean +lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func); + + +LLVMValueRef +lp_build_blend_func(struct lp_build_context *bld, + unsigned func, + LLVMValueRef term1, + LLVMValueRef term2); + + +LLVMValueRef +lp_build_blend_aos(LLVMBuilderRef builder, + const struct pipe_blend_state *blend, + union lp_type type, + LLVMValueRef src, + LLVMValueRef dst, + LLVMValueRef const_, + unsigned alpha_swizzle); + + +void +lp_build_blend_soa(LLVMBuilderRef builder, + const struct pipe_blend_state *blend, + union lp_type type, + LLVMValueRef src[4], + LLVMValueRef dst[4], + LLVMValueRef const_[4], + LLVMValueRef res[4]); + + +#endif /* !LP_BLD_BLEND_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c similarity index 80% rename from src/gallium/drivers/llvmpipe/lp_bld_blend.c rename to src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index a144469b354..e4a57af94c7 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -28,10 +28,7 @@ /** * @file - * Blend LLVM IR generation. - * - * This code is generic -- it should be able to cope both with floating point - * and integer inputs in AOS form. + * Blend LLVM IR generation -- AOS form. * * @author Jose Fonseca */ @@ -39,11 +36,11 @@ #include "pipe/p_state.h" -#include "lp_bld.h" #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_arit.h" #include "lp_bld_swizzle.h" +#include "lp_bld_blend.h" /** @@ -51,7 +48,7 @@ * recomputing them. Also reusing the values allows us to do simplifications * that LLVM optimization passes wouldn't normally be able to do. */ -struct lp_build_blend_context +struct lp_build_blend_aos_context { struct lp_build_context base; @@ -72,7 +69,7 @@ struct lp_build_blend_context static LLVMValueRef -lp_build_blend_factor_unswizzled(struct lp_build_blend_context *bld, +lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld, unsigned factor, boolean alpha) { @@ -174,7 +171,7 @@ lp_build_blend_factor_swizzle(unsigned factor) static LLVMValueRef -lp_build_blend_swizzle(struct lp_build_blend_context *bld, +lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld, LLVMValueRef rgb, LLVMValueRef alpha, enum lp_build_blend_swizzle rgb_swizzle, @@ -211,7 +208,7 @@ lp_build_blend_swizzle(struct lp_build_blend_context *bld, * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml */ static LLVMValueRef -lp_build_blend_factor(struct lp_build_blend_context *bld, +lp_build_blend_factor(struct lp_build_blend_aos_context *bld, LLVMValueRef factor1, unsigned rgb_factor, unsigned alpha_factor, @@ -233,44 +230,75 @@ lp_build_blend_factor(struct lp_build_blend_context *bld, } +boolean +lp_build_blend_func_commutative(unsigned func) +{ + switch (func) { + case PIPE_BLEND_ADD: + case PIPE_BLEND_MIN: + case PIPE_BLEND_MAX: + return TRUE; + case PIPE_BLEND_SUBTRACT: + case PIPE_BLEND_REVERSE_SUBTRACT: + return FALSE; + default: + assert(0); + return TRUE; + } +} + + +boolean +lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func) +{ + if(rgb_func == alpha_func) + return FALSE; + if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT) + return TRUE; + if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT) + return TRUE; + return FALSE; +} + + /** * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml */ -static LLVMValueRef -lp_build_blend_func(struct lp_build_blend_context *bld, +LLVMValueRef +lp_build_blend_func(struct lp_build_context *bld, unsigned func, LLVMValueRef term1, LLVMValueRef term2) { switch (func) { case PIPE_BLEND_ADD: - return lp_build_add(&bld->base, term1, term2); + return lp_build_add(bld, term1, term2); break; case PIPE_BLEND_SUBTRACT: - return lp_build_sub(&bld->base, term1, term2); + return lp_build_sub(bld, term1, term2); case PIPE_BLEND_REVERSE_SUBTRACT: - return lp_build_sub(&bld->base, term2, term1); + return lp_build_sub(bld, term2, term1); case PIPE_BLEND_MIN: - return lp_build_min(&bld->base, term1, term2); + return lp_build_min(bld, term1, term2); case PIPE_BLEND_MAX: - return lp_build_max(&bld->base, term1, term2); + return lp_build_max(bld, term1, term2); default: assert(0); - return bld->base.zero; + return bld->zero; } } LLVMValueRef -lp_build_blend(LLVMBuilderRef builder, - const struct pipe_blend_state *blend, - union lp_type type, - LLVMValueRef src, - LLVMValueRef dst, - LLVMValueRef const_, - unsigned alpha_swizzle) +lp_build_blend_aos(LLVMBuilderRef builder, + const struct pipe_blend_state *blend, + union lp_type type, + LLVMValueRef src, + LLVMValueRef dst, + LLVMValueRef const_, + unsigned alpha_swizzle) { - struct lp_build_blend_context bld; + struct lp_build_blend_aos_context bld; LLVMValueRef src_term; LLVMValueRef dst_term; @@ -284,8 +312,8 @@ lp_build_blend(LLVMBuilderRef builder, bld.dst = dst; bld.const_ = const_; - /* TODO: There are still a few optimization oportunities here. For certain - * combinations it is possible to reorder the operations and therefor saving + /* TODO: There are still a few optimization opportunities here. For certain + * combinations it is possible to reorder the operations and therefore saving * some instructions. */ src_term = lp_build_blend_factor(&bld, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle); @@ -297,7 +325,7 @@ lp_build_blend(LLVMBuilderRef builder, #endif if(blend->rgb_func == blend->alpha_func) { - return lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term); + return lp_build_blend_func(&bld.base, blend->rgb_func, src_term, dst_term); } else { /* Seperate RGB / A functions */ @@ -305,8 +333,8 @@ lp_build_blend(LLVMBuilderRef builder, LLVMValueRef rgb; LLVMValueRef alpha; - rgb = lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term); - alpha = lp_build_blend_func(&bld, blend->alpha_func, src_term, dst_term); + rgb = lp_build_blend_func(&bld.base, blend->rgb_func, src_term, dst_term); + alpha = lp_build_blend_func(&bld.base, blend->alpha_func, src_term, dst_term); return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c new file mode 100644 index 00000000000..1ef1718cfda --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c @@ -0,0 +1,237 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Blend LLVM IR generation -- SoA. + * + * @author Jose Fonseca + */ + + +#include "pipe/p_state.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_blend.h" + + +/** + * We may the same values several times, so we keep them here to avoid + * recomputing them. Also reusing the values allows us to do simplifications + * that LLVM optimization passes wouldn't normally be able to do. + */ +struct lp_build_blend_soa_context +{ + struct lp_build_context base; + + LLVMValueRef src[4]; + LLVMValueRef dst[4]; + LLVMValueRef con[4]; + + LLVMValueRef inv_src[4]; + LLVMValueRef inv_dst[4]; + LLVMValueRef inv_con[4]; + + LLVMValueRef src_alpha_saturate; + + /** + * We store all factors in a table in order to eliminate redundant + * multiplications later. + */ + LLVMValueRef factor[2][8]; + + /** + * Table with all terms. + */ + LLVMValueRef term[8]; +}; + + +static LLVMValueRef +lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld, + unsigned factor, unsigned i) +{ + /* + * Compute src/first term RGB + */ + switch (factor) { + case PIPE_BLENDFACTOR_ONE: + return bld->base.one; + case PIPE_BLENDFACTOR_SRC_COLOR: + return bld->src[i]; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return bld->src[3]; + case PIPE_BLENDFACTOR_DST_COLOR: + return bld->dst[i]; + case PIPE_BLENDFACTOR_DST_ALPHA: + return bld->dst[3]; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + if(i == 3) + return bld->base.one; + else { + if(!bld->inv_dst[3]) + bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]); + if(!bld->src_alpha_saturate) + bld->src_alpha_saturate = lp_build_min(&bld->base, bld->src[3], bld->inv_dst[3]); + return bld->src_alpha_saturate; + } + case PIPE_BLENDFACTOR_CONST_COLOR: + return bld->con[i]; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return bld->con[3]; + case PIPE_BLENDFACTOR_SRC1_COLOR: + /* TODO */ + assert(0); + return bld->base.zero; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + /* TODO */ + assert(0); + return bld->base.zero; + case PIPE_BLENDFACTOR_ZERO: + return bld->base.zero; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + if(!bld->inv_src[i]) + bld->inv_src[i] = lp_build_comp(&bld->base, bld->src[i]); + return bld->inv_src[i]; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + if(!bld->inv_src[3]) + bld->inv_src[3] = lp_build_comp(&bld->base, bld->src[3]); + return bld->inv_src[3]; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + if(!bld->inv_dst[i]) + bld->inv_dst[i] = lp_build_comp(&bld->base, bld->dst[i]); + return bld->inv_dst[i]; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + if(!bld->inv_dst[3]) + bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]); + return bld->inv_dst[3]; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + if(!bld->inv_con[i]) + bld->inv_con[i] = lp_build_comp(&bld->base, bld->con[i]); + return bld->inv_con[i]; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + if(!bld->inv_con[3]) + bld->inv_con[3] = lp_build_comp(&bld->base, bld->con[3]); + return bld->inv_con[3]; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + /* TODO */ + assert(0); + return bld->base.zero; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* TODO */ + assert(0); + return bld->base.zero; + default: + assert(0); + return bld->base.zero; + } +} + + +void +lp_build_blend_soa(LLVMBuilderRef builder, + const struct pipe_blend_state *blend, + union lp_type type, + LLVMValueRef src[4], + LLVMValueRef dst[4], + LLVMValueRef con[4], + LLVMValueRef res[4]) +{ + struct lp_build_blend_soa_context bld; + unsigned i, j; + + /* Setup build context */ + memset(&bld, 0, sizeof bld); + lp_build_context_init(&bld.base, builder, type); + for (i = 0; i < 4; ++i) { + bld.src[i] = src[i]; + bld.dst[i] = dst[i]; + bld.con[i] = con[i]; + } + + /* + * Compute src/dst factors. + */ + for (i = 0; i < 4; ++i) { + unsigned src_factor = i < 3 ? blend->rgb_src_factor : blend->alpha_src_factor; + unsigned dst_factor = i < 3 ? blend->rgb_dst_factor : blend->alpha_dst_factor; + bld.factor[0][0 + i] = src[i]; + bld.factor[1][0 + i] = lp_build_blend_soa_factor(&bld, src_factor, i); + bld.factor[0][4 + i] = dst[i]; + bld.factor[1][4 + i] = lp_build_blend_soa_factor(&bld, dst_factor, i); + } + + /* + * Compute src/dst terms + */ + for (i = 0; i < 8; ++i) { + + /* See if this multiplication has been previously computed */ + for(j = 0; j < i; ++j) { + if((bld.factor[0][j] == bld.factor[0][i] && + bld.factor[1][j] == bld.factor[1][i]) || + (bld.factor[0][j] == bld.factor[1][i] && + bld.factor[1][j] == bld.factor[0][i])) + break; + } + + if(j < i) + bld.term[i] = bld.term[j]; + else + bld.term[i] = lp_build_mul(&bld.base, bld.factor[0][i], bld.factor[1][i]); + } + + /* + * Combine terms + */ + for (i = 0; i < 4; ++i) { + unsigned func = i < 3 ? blend->rgb_func : blend->alpha_func; + boolean func_commutative = lp_build_blend_func_commutative(func); + + /* See if this function has been previously applied */ + for(j = 0; j < i; ++j) { + unsigned prev_func = j < 3 ? blend->rgb_func : blend->alpha_func; + unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func); + + if((!func_reverse && + bld.factor[0 + j] == bld.factor[0 + i] && + bld.factor[4 + j] == bld.factor[4 + i]) || + ((func_commutative || func_reverse) && + bld.factor[0 + j] == bld.factor[4 + i] && + bld.factor[4 + j] == bld.factor[0 + i])) + break; + } + + if(j < i) + res[i] = res[j]; + else + res[i] = lp_build_blend_func(&bld.base, func, bld.term[i + 0], bld.term[i + 4]); + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index c8901fea984..8bf5508bd4e 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -37,12 +37,19 @@ */ -#include "lp_bld.h" #include "lp_bld_type.h" #include "lp_bld_arit.h" +#include "lp_bld_blend.h" #include "lp_test.h" +enum vector_mode +{ + AoS = 0, + SoA = 1 +}; + + typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res); @@ -52,6 +59,7 @@ write_tsv_header(FILE *fp) fprintf(fp, "result\t" "cycles_per_channel\t" + "mode\t" "type\t" "sep_func\t" "sep_src_factor\t" @@ -70,13 +78,22 @@ write_tsv_header(FILE *fp) static void write_tsv_row(FILE *fp, const struct pipe_blend_state *blend, + enum vector_mode mode, union lp_type type, double cycles, boolean success) { fprintf(fp, "%s\t", success ? "pass" : "fail"); - fprintf(fp, "%.1f\t", cycles / type.length); + if (mode == AoS) { + fprintf(fp, "%.1f\t", cycles / type.length); + fprintf(fp, "aos\t"); + } + + if (mode == SoA) { + fprintf(fp, "%.1f\t", cycles / (4 * type.length)); + fprintf(fp, "soa\t"); + } fprintf(fp, "%s%u%sx%u\t", type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")), @@ -106,10 +123,19 @@ write_tsv_row(FILE *fp, static void dump_blend_type(FILE *fp, const struct pipe_blend_state *blend, + enum vector_mode mode, union lp_type type) { + fprintf(fp, "%s", mode ? "soa" : "aos"); + + fprintf(fp, " type=%s%u%sx%u", + type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")), + type.width, + type.norm ? "n" : "", + type.length); + fprintf(fp, - "%s=%s %s=%s %s=%s %s=%s %s=%s %s=%s", + " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s", "rgb_func", debug_dump_blend_func(blend->rgb_func, TRUE), "rgb_src_factor", debug_dump_blend_factor(blend->rgb_src_factor, TRUE), "rgb_dst_factor", debug_dump_blend_factor(blend->rgb_dst_factor, TRUE), @@ -117,12 +143,6 @@ dump_blend_type(FILE *fp, "alpha_src_factor", debug_dump_blend_factor(blend->alpha_src_factor, TRUE), "alpha_dst_factor", debug_dump_blend_factor(blend->alpha_dst_factor, TRUE)); - fprintf(fp, " type=%s%u%sx%u", - type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")), - type.width, - type.norm ? "n" : "", - type.length); - fprintf(fp, " ...\n"); fflush(fp); } @@ -131,6 +151,7 @@ dump_blend_type(FILE *fp, static LLVMValueRef add_blend_test(LLVMModuleRef module, const struct pipe_blend_state *blend, + enum vector_mode mode, union lp_type type) { LLVMTypeRef ret_type; @@ -143,10 +164,6 @@ add_blend_test(LLVMModuleRef module, LLVMValueRef res_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; - LLVMValueRef src; - LLVMValueRef dst; - LLVMValueRef con; - LLVMValueRef res; ret_type = LLVMInt64Type(); vec_type = lp_build_vec_type(type); @@ -163,15 +180,51 @@ add_blend_test(LLVMModuleRef module, builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - src = LLVMBuildLoad(builder, src_ptr, "src"); - dst = LLVMBuildLoad(builder, dst_ptr, "dst"); - con = LLVMBuildLoad(builder, const_ptr, "const"); + if (mode == AoS) { + LLVMValueRef src; + LLVMValueRef dst; + LLVMValueRef con; + LLVMValueRef res; + + src = LLVMBuildLoad(builder, src_ptr, "src"); + dst = LLVMBuildLoad(builder, dst_ptr, "dst"); + con = LLVMBuildLoad(builder, const_ptr, "const"); - res = lp_build_blend(builder, blend, type, src, dst, con, 3); + res = lp_build_blend_aos(builder, blend, type, src, dst, con, 3); - LLVMSetValueName(res, "res"); + LLVMSetValueName(res, "res"); - LLVMBuildStore(builder, res, res_ptr); + LLVMBuildStore(builder, res, res_ptr); + } + + if (mode == SoA) { + LLVMValueRef src[4]; + LLVMValueRef dst[4]; + LLVMValueRef con[4]; + LLVMValueRef res[4]; + char src_name[5] = "src?"; + char dst_name[5] = "dst?"; + char con_name[5] = "con?"; + char res_name[5] = "res?"; + unsigned i; + + for(i = 0; i < 4; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + con_name[3] = dst_name[3] = src_name[3] = "rgba"[i]; + src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), src_name); + dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), dst_name); + con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), con_name); + } + + lp_build_blend_soa(builder, blend, type, src, dst, con, res); + + for(i = 0; i < 4; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + res_name[3] = "rgba"[i]; + LLVMSetValueName(res[i], res_name); + LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, "")); + } + } LLVMBuildRetVoid(builder);; @@ -415,6 +468,7 @@ static boolean test_one(unsigned verbose, FILE *fp, const struct pipe_blend_state *blend, + enum vector_mode mode, union lp_type type) { LLVMModuleRef module = NULL; @@ -431,11 +485,11 @@ test_one(unsigned verbose, unsigned i, j; if(verbose >= 1) - dump_blend_type(stdout, blend, type); + dump_blend_type(stdout, blend, mode, type); module = LLVMModuleCreateWithName("test"); - func = add_blend_test(module, blend, type); + func = add_blend_test(module, blend, mode, type); if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { LLVMDumpModule(module); @@ -446,7 +500,7 @@ test_one(unsigned verbose, provider = LLVMCreateModuleProviderForExistingModule(module); if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { if(verbose < 1) - dump_blend_type(stderr, blend, type); + dump_blend_type(stderr, blend, mode, type); fprintf(stderr, "%s\n", error); LLVMDisposeMessage(error); abort(); @@ -474,66 +528,148 @@ test_one(unsigned verbose, success = TRUE; for(i = 0; i < n && success; ++i) { - uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - int64_t start_counter = 0; - int64_t end_counter = 0; - - random_vec(type, src); - random_vec(type, dst); - random_vec(type, con); - - { - double fsrc[LP_MAX_VECTOR_LENGTH]; - double fdst[LP_MAX_VECTOR_LENGTH]; - double fcon[LP_MAX_VECTOR_LENGTH]; - double fref[LP_MAX_VECTOR_LENGTH]; - - read_vec(type, src, fsrc); - read_vec(type, dst, fdst); - read_vec(type, con, fcon); - - for(j = 0; j < type.length; j += 4) - compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j); - - write_vec(type, ref, fref); + if(mode == AoS) { + uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + int64_t start_counter = 0; + int64_t end_counter = 0; + + random_vec(type, src); + random_vec(type, dst); + random_vec(type, con); + + { + double fsrc[LP_MAX_VECTOR_LENGTH]; + double fdst[LP_MAX_VECTOR_LENGTH]; + double fcon[LP_MAX_VECTOR_LENGTH]; + double fref[LP_MAX_VECTOR_LENGTH]; + + read_vec(type, src, fsrc); + read_vec(type, dst, fdst); + read_vec(type, con, fcon); + + for(j = 0; j < type.length; j += 4) + compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j); + + write_vec(type, ref, fref); + } + + start_counter = rdtsc(); + blend_test_ptr(src, dst, con, res); + end_counter = rdtsc(); + + cycles[i] = end_counter - start_counter; + + if(!compare_vec(type, res, ref)) { + success = FALSE; + + if(verbose < 1) + dump_blend_type(stderr, blend, mode, type); + fprintf(stderr, "MISMATCH\n"); + + fprintf(stderr, " Src: "); + dump_vec(stderr, type, src); + fprintf(stderr, "\n"); + + fprintf(stderr, " Dst: "); + dump_vec(stderr, type, dst); + fprintf(stderr, "\n"); + + fprintf(stderr, " Con: "); + dump_vec(stderr, type, con); + fprintf(stderr, "\n"); + + fprintf(stderr, " Res: "); + dump_vec(stderr, type, res); + fprintf(stderr, "\n"); + + fprintf(stderr, " Ref: "); + dump_vec(stderr, type, ref); + fprintf(stderr, "\n"); + } } - start_counter = rdtsc(); - blend_test_ptr(src, dst, con, res); - end_counter = rdtsc(); + if(mode == SoA) { + const unsigned stride = type.length*type.width/8; + uint8_t src[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t dst[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t con[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t res[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t ref[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + int64_t start_counter = 0; + int64_t end_counter = 0; + boolean mismatch; + + for(j = 0; j < 4; ++j) { + random_vec(type, src + j*stride); + random_vec(type, dst + j*stride); + random_vec(type, con + j*stride); + } - cycles[i] = end_counter - start_counter; + { + double fsrc[4]; + double fdst[4]; + double fcon[4]; + double fref[4]; + unsigned k; + + for(k = 0; k < type.length; ++k) { + for(j = 0; j < 4; ++j) { + fsrc[j] = read_elem(type, src + j*stride, k); + fdst[j] = read_elem(type, dst + j*stride, k); + fcon[j] = read_elem(type, con + j*stride, k); + } - success = compare_vec(type, res, ref); + compute_blend_ref(blend, fsrc, fdst, fcon, fref); - if (!success) { - if(verbose < 1) - dump_blend_type(stderr, blend, type); - fprintf(stderr, "MISMATCH\n"); + for(j = 0; j < 4; ++j) + write_elem(type, ref + j*stride, k, fref[j]); + } + } + + start_counter = rdtsc(); + blend_test_ptr(src, dst, con, res); + end_counter = rdtsc(); + + cycles[i] = end_counter - start_counter; + + mismatch = FALSE; + for (j = 0; j < 4; ++j) + if(!compare_vec(type, res + j*stride, ref + j*stride)) + mismatch = TRUE; - fprintf(stderr, " Src: "); - dump_vec(stderr, type, src); - fprintf(stderr, "\n"); + if (mismatch) { + success = FALSE; - fprintf(stderr, " Dst: "); - dump_vec(stderr, type, dst); - fprintf(stderr, "\n"); + if(verbose < 1) + dump_blend_type(stderr, blend, mode, type); + fprintf(stderr, "MISMATCH\n"); + for(j = 0; j < 4; ++j) { + char channel = "RGBA"[j]; + fprintf(stderr, " Src%c: ", channel); + dump_vec(stderr, type, src + j*stride); + fprintf(stderr, "\n"); - fprintf(stderr, " Con: "); - dump_vec(stderr, type, con); - fprintf(stderr, "\n"); + fprintf(stderr, " Dst%c: ", channel); + dump_vec(stderr, type, dst + j*stride); + fprintf(stderr, "\n"); - fprintf(stderr, " Res: "); - dump_vec(stderr, type, res); - fprintf(stderr, "\n"); + fprintf(stderr, " Con%c: ", channel); + dump_vec(stderr, type, con + j*stride); + fprintf(stderr, "\n"); - fprintf(stderr, " Ref: "); - dump_vec(stderr, type, ref); - fprintf(stderr, "\n"); + fprintf(stderr, " Res%c: ", channel); + dump_vec(stderr, type, res + j*stride); + fprintf(stderr, "\n"); + + fprintf(stderr, " Ref%c: ", channel); + dump_vec(stderr, type, ref + j*stride); + fprintf(stderr, "\n"); + } + } } } @@ -569,7 +705,7 @@ test_one(unsigned verbose, } if(fp) - write_tsv_row(fp, blend, type, cycles_avg, success); + write_tsv_row(fp, blend, mode, type, cycles_avg, success); if (!success) { if(verbose < 2) @@ -650,6 +786,7 @@ test_all(unsigned verbose, FILE *fp) const unsigned *alpha_src_factor; const unsigned *alpha_dst_factor; struct pipe_blend_state blend; + enum vector_mode mode; const union lp_type *type; bool success = TRUE; @@ -659,24 +796,26 @@ test_all(unsigned verbose, FILE *fp) for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) { for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) { for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) { - for(type = blend_types; type < &blend_types[num_types]; ++type) { - - if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || - *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) - continue; - - memset(&blend, 0, sizeof blend); - blend.blend_enable = 1; - blend.rgb_func = *rgb_func; - blend.rgb_src_factor = *rgb_src_factor; - blend.rgb_dst_factor = *rgb_dst_factor; - blend.alpha_func = *alpha_func; - blend.alpha_src_factor = *alpha_src_factor; - blend.alpha_dst_factor = *alpha_dst_factor; - - if(!test_one(verbose, fp, &blend, *type)) - success = FALSE; - + for(mode = 0; mode < 2; ++mode) { + for(type = blend_types; type < &blend_types[num_types]; ++type) { + + if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) + continue; + + memset(&blend, 0, sizeof blend); + blend.blend_enable = 1; + blend.rgb_func = *rgb_func; + blend.rgb_src_factor = *rgb_src_factor; + blend.rgb_dst_factor = *rgb_dst_factor; + blend.alpha_func = *alpha_func; + blend.alpha_src_factor = *alpha_src_factor; + blend.alpha_dst_factor = *alpha_dst_factor; + + if(!test_one(verbose, fp, &blend, mode, *type)) + success = FALSE; + + } } } } @@ -699,6 +838,7 @@ test_some(unsigned verbose, FILE *fp, unsigned long n) const unsigned *alpha_src_factor; const unsigned *alpha_dst_factor; struct pipe_blend_state blend; + enum vector_mode mode; const union lp_type *type; unsigned long i; bool success = TRUE; @@ -717,20 +857,21 @@ test_some(unsigned verbose, FILE *fp, unsigned long n) alpha_dst_factor = &blend_factors[random() % num_factors]; } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE); - for(type = blend_types; type < &blend_types[num_types]; ++type) { + mode = random() & 1; - memset(&blend, 0, sizeof blend); - blend.blend_enable = 1; - blend.rgb_func = *rgb_func; - blend.rgb_src_factor = *rgb_src_factor; - blend.rgb_dst_factor = *rgb_dst_factor; - blend.alpha_func = *alpha_func; - blend.alpha_src_factor = *alpha_src_factor; - blend.alpha_dst_factor = *alpha_dst_factor; + type = &blend_types[random() % num_types]; - if(!test_one(verbose, fp, &blend, *type)) - success = FALSE; - } + memset(&blend, 0, sizeof blend); + blend.blend_enable = 1; + blend.rgb_func = *rgb_func; + blend.rgb_src_factor = *rgb_src_factor; + blend.rgb_dst_factor = *rgb_dst_factor; + blend.alpha_func = *alpha_func; + blend.alpha_src_factor = *alpha_src_factor; + blend.alpha_dst_factor = *alpha_dst_factor; + + if(!test_one(verbose, fp, &blend, mode, *type)) + success = FALSE; } return success; -- 2.30.2