From 9a8e9f4595b66ea094b293da1afcded8f06ab3d6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jos=C3=A9=20Fonseca?= Date: Wed, 22 Sep 2010 14:48:28 +0100 Subject: [PATCH] llvmpipe: Special case complementary and identify blend factors in SoA. One multiplication instead of two. Also fix floating point random number generation and verification. TODO: Do the same for AoS blending. --- .../drivers/llvmpipe/lp_bld_blend_aos.c | 3 -- .../drivers/llvmpipe/lp_bld_blend_soa.c | 46 +++++++++++++++++-- src/gallium/drivers/llvmpipe/lp_test_blend.c | 41 ++++++----------- src/gallium/drivers/llvmpipe/lp_test_main.c | 33 ++++++++----- 4 files changed, 79 insertions(+), 44 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index b5924cbb7dc..d1c9b88f9bb 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -320,9 +320,6 @@ lp_build_blend_aos(LLVMBuilderRef builder, if(!blend->rt[rt].blend_enable) return src; - /* It makes no sense to blend unless values are normalized */ - assert(type.norm); - /* Setup build context */ memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.base, builder, type); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c index b9c7a6ceed6..30d261e979f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2009-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -195,6 +195,13 @@ lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld, } +static boolean +lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor) +{ + return dst_factor == (src_factor ^ 0x10); +} + + /** * Generate blend code in SOA mode. * \param rt render target index (to index the blend / colormask state) @@ -243,8 +250,41 @@ lp_build_blend_soa(LLVMBuilderRef builder, unsigned func = i < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func; boolean func_commutative = lp_build_blend_func_commutative(func); - /* It makes no sense to blend unless values are normalized */ - assert(type.norm); + if (func == PIPE_BLEND_ADD && + lp_build_blend_factor_complementary(src_factor, dst_factor) && 0) { + /* + * Special case linear interpolation, (i.e., complementary factors). + */ + + LLVMValueRef weight; + if (src_factor < dst_factor) { + weight = lp_build_blend_soa_factor(&bld, src_factor, i); + res[i] = lp_build_lerp(&bld.base, weight, dst[i], src[i]); + } else { + weight = lp_build_blend_soa_factor(&bld, dst_factor, i); + res[i] = lp_build_lerp(&bld.base, weight, src[i], dst[i]); + } + continue; + } + + if ((func == PIPE_BLEND_ADD || + func == PIPE_BLEND_SUBTRACT || + func == PIPE_BLEND_REVERSE_SUBTRACT) && + src_factor == dst_factor && + type.floating) { + /* + * Special common factor. + * + * XXX: Only for floating points for now, since saturation will + * cause different results. + */ + + LLVMValueRef factor; + factor = lp_build_blend_soa_factor(&bld, src_factor, i); + res[i] = lp_build_blend_func(&bld.base, func, src[i], dst[i]); + res[i] = lp_build_mul(&bld.base, res[i], factor); + continue; + } /* * Compute src/dst factors. diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index d0389f0cb0b..8b6b5e1298f 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -243,19 +243,6 @@ add_blend_test(LLVMModuleRef module, } -/** Add and limit result to ceiling of 1.0 */ -#define ADD_SAT(R, A, B) \ -do { \ - R = (A) + (B); if (R > 1.0f) R = 1.0f; \ -} while (0) - -/** Subtract and limit result to floor of 0.0 */ -#define SUB_SAT(R, A, B) \ -do { \ - R = (A) - (B); if (R < 0.0f) R = 0.0f; \ -} while (0) - - static void compute_blend_ref_term(unsigned rgb_factor, unsigned alpha_factor, @@ -423,19 +410,19 @@ compute_blend_ref(const struct pipe_blend_state *blend, */ switch (blend->rt[0].rgb_func) { case PIPE_BLEND_ADD: - ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */ - ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */ - ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */ + res[0] = src_term[0] + dst_term[0]; /* R */ + res[1] = src_term[1] + dst_term[1]; /* G */ + res[2] = src_term[2] + dst_term[2]; /* B */ break; case PIPE_BLEND_SUBTRACT: - SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */ - SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */ - SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */ + res[0] = src_term[0] - dst_term[0]; /* R */ + res[1] = src_term[1] - dst_term[1]; /* G */ + res[2] = src_term[2] - dst_term[2]; /* B */ break; case PIPE_BLEND_REVERSE_SUBTRACT: - SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */ - SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */ - SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */ + res[0] = dst_term[0] - src_term[0]; /* R */ + res[1] = dst_term[1] - src_term[1]; /* G */ + res[2] = dst_term[2] - src_term[2]; /* B */ break; case PIPE_BLEND_MIN: res[0] = MIN2(src_term[0], dst_term[0]); /* R */ @@ -456,13 +443,13 @@ compute_blend_ref(const struct pipe_blend_state *blend, */ switch (blend->rt[0].alpha_func) { case PIPE_BLEND_ADD: - ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */ + res[3] = src_term[3] + dst_term[3]; /* A */ break; case PIPE_BLEND_SUBTRACT: - SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */ + res[3] = src_term[3] - dst_term[3]; /* A */ break; case PIPE_BLEND_REVERSE_SUBTRACT: - SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */ + res[3] = dst_term[3] - src_term[3]; /* A */ break; case PIPE_BLEND_MIN: res[3] = MIN2(src_term[3], dst_term[3]); /* A */ @@ -676,6 +663,8 @@ test_one(unsigned verbose, fprintf(stderr, " Ref%c: ", channel); dump_vec(stderr, type, ref + j*stride); fprintf(stderr, "\n"); + + fprintf(stderr, "\n"); } } } @@ -773,7 +762,7 @@ blend_funcs[] = { const struct lp_type blend_types[] = { /* float, fixed, sign, norm, width, len */ - { TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */ + { TRUE, FALSE, TRUE, FALSE, 32, 4 }, /* f32 x 4 */ { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */ }; diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 7bbbc61d4c2..7a0d06ae2c8 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -205,16 +205,19 @@ random_elem(struct lp_type type, void *dst, unsigned index) assert(index < type.length); value = (double)rand()/(double)RAND_MAX; if(!type.norm) { - unsigned long long mask; - if (type.floating) - mask = ~(unsigned long long)0; - else if (type.fixed) - mask = ((unsigned long long)1 << (type.width / 2)) - 1; - else if (type.sign) - mask = ((unsigned long long)1 << (type.width - 1)) - 1; - else - mask = ((unsigned long long)1 << type.width) - 1; - value += (double)(mask & rand()); + if (type.floating) { + value *= 2.0; + } + else { + unsigned long long mask; + if (type.fixed) + mask = ((unsigned long long)1 << (type.width / 2)) - 1; + else if (type.sign) + mask = ((unsigned long long)1 << (type.width - 1)) - 1; + else + mask = ((unsigned long long)1 << type.width) - 1; + value += (double)(mask & rand()); + } } if(!type.sign) if(rand() & 1) @@ -261,12 +264,18 @@ boolean compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps) { unsigned i; + eps *= type.floating ? 8.0 : 2.0; for (i = 0; i < type.length; ++i) { double res_elem = read_elem(type, res, i); double ref_elem = read_elem(type, ref, i); - double delta = fabs(res_elem - ref_elem); - if(delta >= 2.0*eps) + double delta = res_elem - ref_elem; + if (ref_elem < -1.0 || ref_elem > 1.0) { + delta /= ref_elem; + } + delta = fabs(delta); + if (delta >= eps) { return FALSE; + } } return TRUE; -- 2.30.2