Throughput seems to be 4x higher.
'lp_fs_sse.c',
'lp_fs_llvm.c',
'lp_bld_arit.c',
+ 'lp_bld_blend_aos.c',
+ 'lp_bld_blend_soa.c',
'lp_bld_const.c',
'lp_bld_conv.c',
'lp_bld_intr.c',
'lp_bld_store.c',
'lp_bld_loop.c',
'lp_bld_logicop.c',
- 'lp_bld_blend.c',
'lp_bld_swizzle.c',
'lp_bld_type.c',
'lp_clear.c',
#include "pipe/p_format.h"
-struct pipe_blend_state;
union lp_type;
LLVMValueRef dst);
-LLVMValueRef
-lp_build_blend(LLVMBuilderRef builder,
- const struct pipe_blend_state *blend,
- union lp_type type,
- LLVMValueRef src,
- LLVMValueRef dst,
- LLVMValueRef const_,
- unsigned alpha_swizzle);
-
-
#endif /* !LP_BLD_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * @file
- * Blend LLVM IR generation.
- *
- * This code is generic -- it should be able to cope both with floating point
- * and integer inputs in AOS form.
- *
- * @author Jose Fonseca <jfonseca@vmware.com>
- */
-
-
-#include "pipe/p_state.h"
-
-#include "lp_bld.h"
-#include "lp_bld_type.h"
-#include "lp_bld_const.h"
-#include "lp_bld_arit.h"
-#include "lp_bld_swizzle.h"
-
-
-/**
- * We may the same values several times, so we keep them here to avoid
- * recomputing them. Also reusing the values allows us to do simplifications
- * that LLVM optimization passes wouldn't normally be able to do.
- */
-struct lp_build_blend_context
-{
- struct lp_build_context base;
-
- LLVMValueRef src;
- LLVMValueRef dst;
- LLVMValueRef const_;
-
- LLVMValueRef inv_src;
- LLVMValueRef inv_dst;
- LLVMValueRef inv_const;
- LLVMValueRef saturate;
-
- LLVMValueRef rgb_src_factor;
- LLVMValueRef alpha_src_factor;
- LLVMValueRef rgb_dst_factor;
- LLVMValueRef alpha_dst_factor;
-};
-
-
-static LLVMValueRef
-lp_build_blend_factor_unswizzled(struct lp_build_blend_context *bld,
- unsigned factor,
- boolean alpha)
-{
- switch (factor) {
- case PIPE_BLENDFACTOR_ZERO:
- return bld->base.zero;
- case PIPE_BLENDFACTOR_ONE:
- return bld->base.one;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- return bld->src;
- case PIPE_BLENDFACTOR_DST_COLOR:
- case PIPE_BLENDFACTOR_DST_ALPHA:
- return bld->dst;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- if(alpha)
- return bld->base.one;
- else {
- if(!bld->inv_dst)
- bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
- if(!bld->saturate)
- bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst);
- return bld->saturate;
- }
- case PIPE_BLENDFACTOR_CONST_COLOR:
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- return bld->const_;
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- /* TODO */
- assert(0);
- return bld->base.zero;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- if(!bld->inv_src)
- bld->inv_src = lp_build_comp(&bld->base, bld->src);
- return bld->inv_src;
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- if(!bld->inv_dst)
- bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
- return bld->inv_dst;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- if(!bld->inv_const)
- bld->inv_const = lp_build_comp(&bld->base, bld->const_);
- return bld->inv_const;
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- /* TODO */
- assert(0);
- return bld->base.zero;
- default:
- assert(0);
- return bld->base.zero;
- }
-}
-
-
-enum lp_build_blend_swizzle {
- LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
- LP_BUILD_BLEND_SWIZZLE_AAAA = 1,
-};
-
-
-/**
- * How should we shuffle the base factor.
- */
-static enum lp_build_blend_swizzle
-lp_build_blend_factor_swizzle(unsigned factor)
-{
- switch (factor) {
- case PIPE_BLENDFACTOR_ONE:
- case PIPE_BLENDFACTOR_ZERO:
- case PIPE_BLENDFACTOR_SRC_COLOR:
- case PIPE_BLENDFACTOR_DST_COLOR:
- case PIPE_BLENDFACTOR_CONST_COLOR:
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- return LP_BUILD_BLEND_SWIZZLE_RGBA;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- case PIPE_BLENDFACTOR_DST_ALPHA:
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- return LP_BUILD_BLEND_SWIZZLE_AAAA;
- default:
- assert(0);
- return LP_BUILD_BLEND_SWIZZLE_RGBA;
- }
-}
-
-
-static LLVMValueRef
-lp_build_blend_swizzle(struct lp_build_blend_context *bld,
- LLVMValueRef rgb,
- LLVMValueRef alpha,
- enum lp_build_blend_swizzle rgb_swizzle,
- unsigned alpha_swizzle)
-{
- if(rgb == alpha) {
- if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
- return rgb;
- if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA)
- return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle);
- }
- else {
- if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) {
- boolean cond[4] = {0, 0, 0, 0};
- cond[alpha_swizzle] = 1;
- return lp_build_select_aos(&bld->base, alpha, rgb, cond);
- }
- if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
- unsigned char swizzle[4];
- swizzle[0] = alpha_swizzle;
- swizzle[1] = alpha_swizzle;
- swizzle[2] = alpha_swizzle;
- swizzle[3] = alpha_swizzle;
- swizzle[alpha_swizzle] += 4;
- return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle);
- }
- }
- assert(0);
- return bld->base.undef;
-}
-
-
-/**
- * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
- */
-static LLVMValueRef
-lp_build_blend_factor(struct lp_build_blend_context *bld,
- LLVMValueRef factor1,
- unsigned rgb_factor,
- unsigned alpha_factor,
- unsigned alpha_swizzle)
-{
- LLVMValueRef rgb_factor_;
- LLVMValueRef alpha_factor_;
- LLVMValueRef factor2;
- enum lp_build_blend_swizzle rgb_swizzle;
-
- rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE);
- alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
-
- rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
-
- factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle);
-
- return lp_build_mul(&bld->base, factor1, factor2);
-}
-
-
-/**
- * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml
- */
-static LLVMValueRef
-lp_build_blend_func(struct lp_build_blend_context *bld,
- unsigned func,
- LLVMValueRef term1,
- LLVMValueRef term2)
-{
- switch (func) {
- case PIPE_BLEND_ADD:
- return lp_build_add(&bld->base, term1, term2);
- break;
- case PIPE_BLEND_SUBTRACT:
- return lp_build_sub(&bld->base, term1, term2);
- case PIPE_BLEND_REVERSE_SUBTRACT:
- return lp_build_sub(&bld->base, term2, term1);
- case PIPE_BLEND_MIN:
- return lp_build_min(&bld->base, term1, term2);
- case PIPE_BLEND_MAX:
- return lp_build_max(&bld->base, term1, term2);
- default:
- assert(0);
- return bld->base.zero;
- }
-}
-
-
-LLVMValueRef
-lp_build_blend(LLVMBuilderRef builder,
- const struct pipe_blend_state *blend,
- union lp_type type,
- LLVMValueRef src,
- LLVMValueRef dst,
- LLVMValueRef const_,
- unsigned alpha_swizzle)
-{
- struct lp_build_blend_context bld;
- LLVMValueRef src_term;
- LLVMValueRef dst_term;
-
- /* It makes no sense to blend unless values are normalized */
- assert(type.norm);
-
- /* Setup build context */
- memset(&bld, 0, sizeof bld);
- lp_build_context_init(&bld.base, builder, type);
- bld.src = src;
- bld.dst = dst;
- bld.const_ = const_;
-
- /* TODO: There are still a few optimization oportunities here. For certain
- * combinations it is possible to reorder the operations and therefor saving
- * some instructions. */
-
- src_term = lp_build_blend_factor(&bld, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle);
- dst_term = lp_build_blend_factor(&bld, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle);
-
-#ifdef DEBUG
- LLVMSetValueName(src_term, "src_term");
- LLVMSetValueName(dst_term, "dst_term");
-#endif
-
- if(blend->rgb_func == blend->alpha_func) {
- return lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term);
- }
- else {
- /* Seperate RGB / A functions */
-
- LLVMValueRef rgb;
- LLVMValueRef alpha;
-
- rgb = lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term);
- alpha = lp_build_blend_func(&bld, blend->alpha_func, src_term, dst_term);
-
- return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle);
- }
-}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_BLD_BLEND_H
+#define LP_BLD_BLEND_H
+
+
+/**
+ * @file
+ * LLVM IR building helpers interfaces.
+ *
+ * We use LLVM-C bindings for now. They are not documented, but follow the C++
+ * interfaces very closely, and appear to be complete enough for code
+ * genration. See
+ * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
+ * for a standalone example.
+ */
+
+#include <llvm-c/Core.h>
+
+#include "pipe/p_format.h"
+
+
+struct pipe_blend_state;
+union lp_type;
+struct lp_build_context;
+
+
+/**
+ * Whether the blending function is commutative or not.
+ */
+boolean
+lp_build_blend_func_commutative(unsigned func);
+
+
+/**
+ * Whether the blending functions are the reverse of each other.
+ */
+boolean
+lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func);
+
+
+LLVMValueRef
+lp_build_blend_func(struct lp_build_context *bld,
+ unsigned func,
+ LLVMValueRef term1,
+ LLVMValueRef term2);
+
+
+LLVMValueRef
+lp_build_blend_aos(LLVMBuilderRef builder,
+ const struct pipe_blend_state *blend,
+ union lp_type type,
+ LLVMValueRef src,
+ LLVMValueRef dst,
+ LLVMValueRef const_,
+ unsigned alpha_swizzle);
+
+
+void
+lp_build_blend_soa(LLVMBuilderRef builder,
+ const struct pipe_blend_state *blend,
+ union lp_type type,
+ LLVMValueRef src[4],
+ LLVMValueRef dst[4],
+ LLVMValueRef const_[4],
+ LLVMValueRef res[4]);
+
+
+#endif /* !LP_BLD_BLEND_H */
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Blend LLVM IR generation -- AOS form.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "pipe/p_state.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_blend.h"
+
+
+/**
+ * We may the same values several times, so we keep them here to avoid
+ * recomputing them. Also reusing the values allows us to do simplifications
+ * that LLVM optimization passes wouldn't normally be able to do.
+ */
+struct lp_build_blend_aos_context
+{
+ struct lp_build_context base;
+
+ LLVMValueRef src;
+ LLVMValueRef dst;
+ LLVMValueRef const_;
+
+ LLVMValueRef inv_src;
+ LLVMValueRef inv_dst;
+ LLVMValueRef inv_const;
+ LLVMValueRef saturate;
+
+ LLVMValueRef rgb_src_factor;
+ LLVMValueRef alpha_src_factor;
+ LLVMValueRef rgb_dst_factor;
+ LLVMValueRef alpha_dst_factor;
+};
+
+
+static LLVMValueRef
+lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
+ unsigned factor,
+ boolean alpha)
+{
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ZERO:
+ return bld->base.zero;
+ case PIPE_BLENDFACTOR_ONE:
+ return bld->base.one;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return bld->src;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return bld->dst;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ if(alpha)
+ return bld->base.one;
+ else {
+ if(!bld->inv_dst)
+ bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
+ if(!bld->saturate)
+ bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst);
+ return bld->saturate;
+ }
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return bld->const_;
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ /* TODO */
+ assert(0);
+ return bld->base.zero;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ if(!bld->inv_src)
+ bld->inv_src = lp_build_comp(&bld->base, bld->src);
+ return bld->inv_src;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ if(!bld->inv_dst)
+ bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
+ return bld->inv_dst;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ if(!bld->inv_const)
+ bld->inv_const = lp_build_comp(&bld->base, bld->const_);
+ return bld->inv_const;
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ /* TODO */
+ assert(0);
+ return bld->base.zero;
+ default:
+ assert(0);
+ return bld->base.zero;
+ }
+}
+
+
+enum lp_build_blend_swizzle {
+ LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
+ LP_BUILD_BLEND_SWIZZLE_AAAA = 1,
+};
+
+
+/**
+ * How should we shuffle the base factor.
+ */
+static enum lp_build_blend_swizzle
+lp_build_blend_factor_swizzle(unsigned factor)
+{
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ case PIPE_BLENDFACTOR_ZERO:
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ return LP_BUILD_BLEND_SWIZZLE_RGBA;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ return LP_BUILD_BLEND_SWIZZLE_AAAA;
+ default:
+ assert(0);
+ return LP_BUILD_BLEND_SWIZZLE_RGBA;
+ }
+}
+
+
+static LLVMValueRef
+lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
+ LLVMValueRef rgb,
+ LLVMValueRef alpha,
+ enum lp_build_blend_swizzle rgb_swizzle,
+ unsigned alpha_swizzle)
+{
+ if(rgb == alpha) {
+ if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
+ return rgb;
+ if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA)
+ return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle);
+ }
+ else {
+ if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) {
+ boolean cond[4] = {0, 0, 0, 0};
+ cond[alpha_swizzle] = 1;
+ return lp_build_select_aos(&bld->base, alpha, rgb, cond);
+ }
+ if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
+ unsigned char swizzle[4];
+ swizzle[0] = alpha_swizzle;
+ swizzle[1] = alpha_swizzle;
+ swizzle[2] = alpha_swizzle;
+ swizzle[3] = alpha_swizzle;
+ swizzle[alpha_swizzle] += 4;
+ return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle);
+ }
+ }
+ assert(0);
+ return bld->base.undef;
+}
+
+
+/**
+ * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
+ */
+static LLVMValueRef
+lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
+ LLVMValueRef factor1,
+ unsigned rgb_factor,
+ unsigned alpha_factor,
+ unsigned alpha_swizzle)
+{
+ LLVMValueRef rgb_factor_;
+ LLVMValueRef alpha_factor_;
+ LLVMValueRef factor2;
+ enum lp_build_blend_swizzle rgb_swizzle;
+
+ rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE);
+ alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
+
+ rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
+
+ factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle);
+
+ return lp_build_mul(&bld->base, factor1, factor2);
+}
+
+
+boolean
+lp_build_blend_func_commutative(unsigned func)
+{
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ case PIPE_BLEND_MIN:
+ case PIPE_BLEND_MAX:
+ return TRUE;
+ case PIPE_BLEND_SUBTRACT:
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return FALSE;
+ default:
+ assert(0);
+ return TRUE;
+ }
+}
+
+
+boolean
+lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func)
+{
+ if(rgb_func == alpha_func)
+ return FALSE;
+ if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT)
+ return TRUE;
+ if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT)
+ return TRUE;
+ return FALSE;
+}
+
+
+/**
+ * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml
+ */
+LLVMValueRef
+lp_build_blend_func(struct lp_build_context *bld,
+ unsigned func,
+ LLVMValueRef term1,
+ LLVMValueRef term2)
+{
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ return lp_build_add(bld, term1, term2);
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ return lp_build_sub(bld, term1, term2);
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return lp_build_sub(bld, term2, term1);
+ case PIPE_BLEND_MIN:
+ return lp_build_min(bld, term1, term2);
+ case PIPE_BLEND_MAX:
+ return lp_build_max(bld, term1, term2);
+ default:
+ assert(0);
+ return bld->zero;
+ }
+}
+
+
+LLVMValueRef
+lp_build_blend_aos(LLVMBuilderRef builder,
+ const struct pipe_blend_state *blend,
+ union lp_type type,
+ LLVMValueRef src,
+ LLVMValueRef dst,
+ LLVMValueRef const_,
+ unsigned alpha_swizzle)
+{
+ struct lp_build_blend_aos_context bld;
+ LLVMValueRef src_term;
+ LLVMValueRef dst_term;
+
+ /* It makes no sense to blend unless values are normalized */
+ assert(type.norm);
+
+ /* Setup build context */
+ memset(&bld, 0, sizeof bld);
+ lp_build_context_init(&bld.base, builder, type);
+ bld.src = src;
+ bld.dst = dst;
+ bld.const_ = const_;
+
+ /* TODO: There are still a few optimization opportunities here. For certain
+ * combinations it is possible to reorder the operations and therefore saving
+ * some instructions. */
+
+ src_term = lp_build_blend_factor(&bld, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle);
+ dst_term = lp_build_blend_factor(&bld, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle);
+
+#ifdef DEBUG
+ LLVMSetValueName(src_term, "src_term");
+ LLVMSetValueName(dst_term, "dst_term");
+#endif
+
+ if(blend->rgb_func == blend->alpha_func) {
+ return lp_build_blend_func(&bld.base, blend->rgb_func, src_term, dst_term);
+ }
+ else {
+ /* Seperate RGB / A functions */
+
+ LLVMValueRef rgb;
+ LLVMValueRef alpha;
+
+ rgb = lp_build_blend_func(&bld.base, blend->rgb_func, src_term, dst_term);
+ alpha = lp_build_blend_func(&bld.base, blend->alpha_func, src_term, dst_term);
+
+ return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle);
+ }
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Blend LLVM IR generation -- SoA.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "pipe/p_state.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_blend.h"
+
+
+/**
+ * We may the same values several times, so we keep them here to avoid
+ * recomputing them. Also reusing the values allows us to do simplifications
+ * that LLVM optimization passes wouldn't normally be able to do.
+ */
+struct lp_build_blend_soa_context
+{
+ struct lp_build_context base;
+
+ LLVMValueRef src[4];
+ LLVMValueRef dst[4];
+ LLVMValueRef con[4];
+
+ LLVMValueRef inv_src[4];
+ LLVMValueRef inv_dst[4];
+ LLVMValueRef inv_con[4];
+
+ LLVMValueRef src_alpha_saturate;
+
+ /**
+ * We store all factors in a table in order to eliminate redundant
+ * multiplications later.
+ */
+ LLVMValueRef factor[2][8];
+
+ /**
+ * Table with all terms.
+ */
+ LLVMValueRef term[8];
+};
+
+
+static LLVMValueRef
+lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld,
+ unsigned factor, unsigned i)
+{
+ /*
+ * Compute src/first term RGB
+ */
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ return bld->base.one;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return bld->src[i];
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return bld->src[3];
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return bld->dst[i];
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return bld->dst[3];
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ if(i == 3)
+ return bld->base.one;
+ else {
+ if(!bld->inv_dst[3])
+ bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]);
+ if(!bld->src_alpha_saturate)
+ bld->src_alpha_saturate = lp_build_min(&bld->base, bld->src[3], bld->inv_dst[3]);
+ return bld->src_alpha_saturate;
+ }
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return bld->con[i];
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return bld->con[3];
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ /* TODO */
+ assert(0);
+ return bld->base.zero;
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ /* TODO */
+ assert(0);
+ return bld->base.zero;
+ case PIPE_BLENDFACTOR_ZERO:
+ return bld->base.zero;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ if(!bld->inv_src[i])
+ bld->inv_src[i] = lp_build_comp(&bld->base, bld->src[i]);
+ return bld->inv_src[i];
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ if(!bld->inv_src[3])
+ bld->inv_src[3] = lp_build_comp(&bld->base, bld->src[3]);
+ return bld->inv_src[3];
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ if(!bld->inv_dst[i])
+ bld->inv_dst[i] = lp_build_comp(&bld->base, bld->dst[i]);
+ return bld->inv_dst[i];
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ if(!bld->inv_dst[3])
+ bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]);
+ return bld->inv_dst[3];
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ if(!bld->inv_con[i])
+ bld->inv_con[i] = lp_build_comp(&bld->base, bld->con[i]);
+ return bld->inv_con[i];
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ if(!bld->inv_con[3])
+ bld->inv_con[3] = lp_build_comp(&bld->base, bld->con[3]);
+ return bld->inv_con[3];
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ /* TODO */
+ assert(0);
+ return bld->base.zero;
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ /* TODO */
+ assert(0);
+ return bld->base.zero;
+ default:
+ assert(0);
+ return bld->base.zero;
+ }
+}
+
+
+void
+lp_build_blend_soa(LLVMBuilderRef builder,
+ const struct pipe_blend_state *blend,
+ union lp_type type,
+ LLVMValueRef src[4],
+ LLVMValueRef dst[4],
+ LLVMValueRef con[4],
+ LLVMValueRef res[4])
+{
+ struct lp_build_blend_soa_context bld;
+ unsigned i, j;
+
+ /* Setup build context */
+ memset(&bld, 0, sizeof bld);
+ lp_build_context_init(&bld.base, builder, type);
+ for (i = 0; i < 4; ++i) {
+ bld.src[i] = src[i];
+ bld.dst[i] = dst[i];
+ bld.con[i] = con[i];
+ }
+
+ /*
+ * Compute src/dst factors.
+ */
+ for (i = 0; i < 4; ++i) {
+ unsigned src_factor = i < 3 ? blend->rgb_src_factor : blend->alpha_src_factor;
+ unsigned dst_factor = i < 3 ? blend->rgb_dst_factor : blend->alpha_dst_factor;
+ bld.factor[0][0 + i] = src[i];
+ bld.factor[1][0 + i] = lp_build_blend_soa_factor(&bld, src_factor, i);
+ bld.factor[0][4 + i] = dst[i];
+ bld.factor[1][4 + i] = lp_build_blend_soa_factor(&bld, dst_factor, i);
+ }
+
+ /*
+ * Compute src/dst terms
+ */
+ for (i = 0; i < 8; ++i) {
+
+ /* See if this multiplication has been previously computed */
+ for(j = 0; j < i; ++j) {
+ if((bld.factor[0][j] == bld.factor[0][i] &&
+ bld.factor[1][j] == bld.factor[1][i]) ||
+ (bld.factor[0][j] == bld.factor[1][i] &&
+ bld.factor[1][j] == bld.factor[0][i]))
+ break;
+ }
+
+ if(j < i)
+ bld.term[i] = bld.term[j];
+ else
+ bld.term[i] = lp_build_mul(&bld.base, bld.factor[0][i], bld.factor[1][i]);
+ }
+
+ /*
+ * Combine terms
+ */
+ for (i = 0; i < 4; ++i) {
+ unsigned func = i < 3 ? blend->rgb_func : blend->alpha_func;
+ boolean func_commutative = lp_build_blend_func_commutative(func);
+
+ /* See if this function has been previously applied */
+ for(j = 0; j < i; ++j) {
+ unsigned prev_func = j < 3 ? blend->rgb_func : blend->alpha_func;
+ unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func);
+
+ if((!func_reverse &&
+ bld.factor[0 + j] == bld.factor[0 + i] &&
+ bld.factor[4 + j] == bld.factor[4 + i]) ||
+ ((func_commutative || func_reverse) &&
+ bld.factor[0 + j] == bld.factor[4 + i] &&
+ bld.factor[4 + j] == bld.factor[0 + i]))
+ break;
+ }
+
+ if(j < i)
+ res[i] = res[j];
+ else
+ res[i] = lp_build_blend_func(&bld.base, func, bld.term[i + 0], bld.term[i + 4]);
+ }
+}
*/
-#include "lp_bld.h"
#include "lp_bld_type.h"
#include "lp_bld_arit.h"
+#include "lp_bld_blend.h"
#include "lp_test.h"
+enum vector_mode
+{
+ AoS = 0,
+ SoA = 1
+};
+
+
typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
fprintf(fp,
"result\t"
"cycles_per_channel\t"
+ "mode\t"
"type\t"
"sep_func\t"
"sep_src_factor\t"
static void
write_tsv_row(FILE *fp,
const struct pipe_blend_state *blend,
+ enum vector_mode mode,
union lp_type type,
double cycles,
boolean success)
{
fprintf(fp, "%s\t", success ? "pass" : "fail");
- fprintf(fp, "%.1f\t", cycles / type.length);
+ if (mode == AoS) {
+ fprintf(fp, "%.1f\t", cycles / type.length);
+ fprintf(fp, "aos\t");
+ }
+
+ if (mode == SoA) {
+ fprintf(fp, "%.1f\t", cycles / (4 * type.length));
+ fprintf(fp, "soa\t");
+ }
fprintf(fp, "%s%u%sx%u\t",
type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
static void
dump_blend_type(FILE *fp,
const struct pipe_blend_state *blend,
+ enum vector_mode mode,
union lp_type type)
{
+ fprintf(fp, "%s", mode ? "soa" : "aos");
+
+ fprintf(fp, " type=%s%u%sx%u",
+ type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
+ type.width,
+ type.norm ? "n" : "",
+ type.length);
+
fprintf(fp,
- "%s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
+ " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
"rgb_func", debug_dump_blend_func(blend->rgb_func, TRUE),
"rgb_src_factor", debug_dump_blend_factor(blend->rgb_src_factor, TRUE),
"rgb_dst_factor", debug_dump_blend_factor(blend->rgb_dst_factor, TRUE),
"alpha_src_factor", debug_dump_blend_factor(blend->alpha_src_factor, TRUE),
"alpha_dst_factor", debug_dump_blend_factor(blend->alpha_dst_factor, TRUE));
- fprintf(fp, " type=%s%u%sx%u",
- type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
- type.width,
- type.norm ? "n" : "",
- type.length);
-
fprintf(fp, " ...\n");
fflush(fp);
}
static LLVMValueRef
add_blend_test(LLVMModuleRef module,
const struct pipe_blend_state *blend,
+ enum vector_mode mode,
union lp_type type)
{
LLVMTypeRef ret_type;
LLVMValueRef res_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
- LLVMValueRef src;
- LLVMValueRef dst;
- LLVMValueRef con;
- LLVMValueRef res;
ret_type = LLVMInt64Type();
vec_type = lp_build_vec_type(type);
builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, block);
- src = LLVMBuildLoad(builder, src_ptr, "src");
- dst = LLVMBuildLoad(builder, dst_ptr, "dst");
- con = LLVMBuildLoad(builder, const_ptr, "const");
+ if (mode == AoS) {
+ LLVMValueRef src;
+ LLVMValueRef dst;
+ LLVMValueRef con;
+ LLVMValueRef res;
+
+ src = LLVMBuildLoad(builder, src_ptr, "src");
+ dst = LLVMBuildLoad(builder, dst_ptr, "dst");
+ con = LLVMBuildLoad(builder, const_ptr, "const");
- res = lp_build_blend(builder, blend, type, src, dst, con, 3);
+ res = lp_build_blend_aos(builder, blend, type, src, dst, con, 3);
- LLVMSetValueName(res, "res");
+ LLVMSetValueName(res, "res");
- LLVMBuildStore(builder, res, res_ptr);
+ LLVMBuildStore(builder, res, res_ptr);
+ }
+
+ if (mode == SoA) {
+ LLVMValueRef src[4];
+ LLVMValueRef dst[4];
+ LLVMValueRef con[4];
+ LLVMValueRef res[4];
+ char src_name[5] = "src?";
+ char dst_name[5] = "dst?";
+ char con_name[5] = "con?";
+ char res_name[5] = "res?";
+ unsigned i;
+
+ for(i = 0; i < 4; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ con_name[3] = dst_name[3] = src_name[3] = "rgba"[i];
+ src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), src_name);
+ dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), dst_name);
+ con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), con_name);
+ }
+
+ lp_build_blend_soa(builder, blend, type, src, dst, con, res);
+
+ for(i = 0; i < 4; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ res_name[3] = "rgba"[i];
+ LLVMSetValueName(res[i], res_name);
+ LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
+ }
+ }
LLVMBuildRetVoid(builder);;
test_one(unsigned verbose,
FILE *fp,
const struct pipe_blend_state *blend,
+ enum vector_mode mode,
union lp_type type)
{
LLVMModuleRef module = NULL;
unsigned i, j;
if(verbose >= 1)
- dump_blend_type(stdout, blend, type);
+ dump_blend_type(stdout, blend, mode, type);
module = LLVMModuleCreateWithName("test");
- func = add_blend_test(module, blend, type);
+ func = add_blend_test(module, blend, mode, type);
if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
LLVMDumpModule(module);
provider = LLVMCreateModuleProviderForExistingModule(module);
if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
if(verbose < 1)
- dump_blend_type(stderr, blend, type);
+ dump_blend_type(stderr, blend, mode, type);
fprintf(stderr, "%s\n", error);
LLVMDisposeMessage(error);
abort();
success = TRUE;
for(i = 0; i < n && success; ++i) {
- uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- int64_t start_counter = 0;
- int64_t end_counter = 0;
-
- random_vec(type, src);
- random_vec(type, dst);
- random_vec(type, con);
-
- {
- double fsrc[LP_MAX_VECTOR_LENGTH];
- double fdst[LP_MAX_VECTOR_LENGTH];
- double fcon[LP_MAX_VECTOR_LENGTH];
- double fref[LP_MAX_VECTOR_LENGTH];
-
- read_vec(type, src, fsrc);
- read_vec(type, dst, fdst);
- read_vec(type, con, fcon);
-
- for(j = 0; j < type.length; j += 4)
- compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
-
- write_vec(type, ref, fref);
+ if(mode == AoS) {
+ uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ int64_t start_counter = 0;
+ int64_t end_counter = 0;
+
+ random_vec(type, src);
+ random_vec(type, dst);
+ random_vec(type, con);
+
+ {
+ double fsrc[LP_MAX_VECTOR_LENGTH];
+ double fdst[LP_MAX_VECTOR_LENGTH];
+ double fcon[LP_MAX_VECTOR_LENGTH];
+ double fref[LP_MAX_VECTOR_LENGTH];
+
+ read_vec(type, src, fsrc);
+ read_vec(type, dst, fdst);
+ read_vec(type, con, fcon);
+
+ for(j = 0; j < type.length; j += 4)
+ compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
+
+ write_vec(type, ref, fref);
+ }
+
+ start_counter = rdtsc();
+ blend_test_ptr(src, dst, con, res);
+ end_counter = rdtsc();
+
+ cycles[i] = end_counter - start_counter;
+
+ if(!compare_vec(type, res, ref)) {
+ success = FALSE;
+
+ if(verbose < 1)
+ dump_blend_type(stderr, blend, mode, type);
+ fprintf(stderr, "MISMATCH\n");
+
+ fprintf(stderr, " Src: ");
+ dump_vec(stderr, type, src);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Dst: ");
+ dump_vec(stderr, type, dst);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Con: ");
+ dump_vec(stderr, type, con);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Res: ");
+ dump_vec(stderr, type, res);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Ref: ");
+ dump_vec(stderr, type, ref);
+ fprintf(stderr, "\n");
+ }
}
- start_counter = rdtsc();
- blend_test_ptr(src, dst, con, res);
- end_counter = rdtsc();
+ if(mode == SoA) {
+ const unsigned stride = type.length*type.width/8;
+ uint8_t src[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t dst[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t con[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t res[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t ref[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ int64_t start_counter = 0;
+ int64_t end_counter = 0;
+ boolean mismatch;
+
+ for(j = 0; j < 4; ++j) {
+ random_vec(type, src + j*stride);
+ random_vec(type, dst + j*stride);
+ random_vec(type, con + j*stride);
+ }
- cycles[i] = end_counter - start_counter;
+ {
+ double fsrc[4];
+ double fdst[4];
+ double fcon[4];
+ double fref[4];
+ unsigned k;
+
+ for(k = 0; k < type.length; ++k) {
+ for(j = 0; j < 4; ++j) {
+ fsrc[j] = read_elem(type, src + j*stride, k);
+ fdst[j] = read_elem(type, dst + j*stride, k);
+ fcon[j] = read_elem(type, con + j*stride, k);
+ }
- success = compare_vec(type, res, ref);
+ compute_blend_ref(blend, fsrc, fdst, fcon, fref);
- if (!success) {
- if(verbose < 1)
- dump_blend_type(stderr, blend, type);
- fprintf(stderr, "MISMATCH\n");
+ for(j = 0; j < 4; ++j)
+ write_elem(type, ref + j*stride, k, fref[j]);
+ }
+ }
+
+ start_counter = rdtsc();
+ blend_test_ptr(src, dst, con, res);
+ end_counter = rdtsc();
+
+ cycles[i] = end_counter - start_counter;
+
+ mismatch = FALSE;
+ for (j = 0; j < 4; ++j)
+ if(!compare_vec(type, res + j*stride, ref + j*stride))
+ mismatch = TRUE;
- fprintf(stderr, " Src: ");
- dump_vec(stderr, type, src);
- fprintf(stderr, "\n");
+ if (mismatch) {
+ success = FALSE;
- fprintf(stderr, " Dst: ");
- dump_vec(stderr, type, dst);
- fprintf(stderr, "\n");
+ if(verbose < 1)
+ dump_blend_type(stderr, blend, mode, type);
+ fprintf(stderr, "MISMATCH\n");
+ for(j = 0; j < 4; ++j) {
+ char channel = "RGBA"[j];
+ fprintf(stderr, " Src%c: ", channel);
+ dump_vec(stderr, type, src + j*stride);
+ fprintf(stderr, "\n");
- fprintf(stderr, " Con: ");
- dump_vec(stderr, type, con);
- fprintf(stderr, "\n");
+ fprintf(stderr, " Dst%c: ", channel);
+ dump_vec(stderr, type, dst + j*stride);
+ fprintf(stderr, "\n");
- fprintf(stderr, " Res: ");
- dump_vec(stderr, type, res);
- fprintf(stderr, "\n");
+ fprintf(stderr, " Con%c: ", channel);
+ dump_vec(stderr, type, con + j*stride);
+ fprintf(stderr, "\n");
- fprintf(stderr, " Ref: ");
- dump_vec(stderr, type, ref);
- fprintf(stderr, "\n");
+ fprintf(stderr, " Res%c: ", channel);
+ dump_vec(stderr, type, res + j*stride);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Ref%c: ", channel);
+ dump_vec(stderr, type, ref + j*stride);
+ fprintf(stderr, "\n");
+ }
+ }
}
}
}
if(fp)
- write_tsv_row(fp, blend, type, cycles_avg, success);
+ write_tsv_row(fp, blend, mode, type, cycles_avg, success);
if (!success) {
if(verbose < 2)
const unsigned *alpha_src_factor;
const unsigned *alpha_dst_factor;
struct pipe_blend_state blend;
+ enum vector_mode mode;
const union lp_type *type;
bool success = TRUE;
for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
- for(type = blend_types; type < &blend_types[num_types]; ++type) {
-
- if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
- *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
- continue;
-
- memset(&blend, 0, sizeof blend);
- blend.blend_enable = 1;
- blend.rgb_func = *rgb_func;
- blend.rgb_src_factor = *rgb_src_factor;
- blend.rgb_dst_factor = *rgb_dst_factor;
- blend.alpha_func = *alpha_func;
- blend.alpha_src_factor = *alpha_src_factor;
- blend.alpha_dst_factor = *alpha_dst_factor;
-
- if(!test_one(verbose, fp, &blend, *type))
- success = FALSE;
-
+ for(mode = 0; mode < 2; ++mode) {
+ for(type = blend_types; type < &blend_types[num_types]; ++type) {
+
+ if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
+ continue;
+
+ memset(&blend, 0, sizeof blend);
+ blend.blend_enable = 1;
+ blend.rgb_func = *rgb_func;
+ blend.rgb_src_factor = *rgb_src_factor;
+ blend.rgb_dst_factor = *rgb_dst_factor;
+ blend.alpha_func = *alpha_func;
+ blend.alpha_src_factor = *alpha_src_factor;
+ blend.alpha_dst_factor = *alpha_dst_factor;
+
+ if(!test_one(verbose, fp, &blend, mode, *type))
+ success = FALSE;
+
+ }
}
}
}
const unsigned *alpha_src_factor;
const unsigned *alpha_dst_factor;
struct pipe_blend_state blend;
+ enum vector_mode mode;
const union lp_type *type;
unsigned long i;
bool success = TRUE;
alpha_dst_factor = &blend_factors[random() % num_factors];
} while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
- for(type = blend_types; type < &blend_types[num_types]; ++type) {
+ mode = random() & 1;
- memset(&blend, 0, sizeof blend);
- blend.blend_enable = 1;
- blend.rgb_func = *rgb_func;
- blend.rgb_src_factor = *rgb_src_factor;
- blend.rgb_dst_factor = *rgb_dst_factor;
- blend.alpha_func = *alpha_func;
- blend.alpha_src_factor = *alpha_src_factor;
- blend.alpha_dst_factor = *alpha_dst_factor;
+ type = &blend_types[random() % num_types];
- if(!test_one(verbose, fp, &blend, *type))
- success = FALSE;
- }
+ memset(&blend, 0, sizeof blend);
+ blend.blend_enable = 1;
+ blend.rgb_func = *rgb_func;
+ blend.rgb_src_factor = *rgb_src_factor;
+ blend.rgb_dst_factor = *rgb_dst_factor;
+ blend.alpha_func = *alpha_func;
+ blend.alpha_src_factor = *alpha_src_factor;
+ blend.alpha_dst_factor = *alpha_dst_factor;
+
+ if(!test_one(verbose, fp, &blend, mode, *type))
+ success = FALSE;
}
return success;