From 1dd7bb17c7331f9ecd0bc830b61ada235a56fe6d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jos=C3=A9=20Fonseca?= Date: Tue, 4 Aug 2009 12:09:52 +0100 Subject: [PATCH] llvmpipe: Optimize blend swizzles by using bitmasks instead of shuffles for ubytes. --- src/gallium/drivers/llvmpipe/SConscript | 1 + src/gallium/drivers/llvmpipe/lp_bld_blend.c | 74 ++--- src/gallium/drivers/llvmpipe/lp_bld_const.c | 35 +++ src/gallium/drivers/llvmpipe/lp_bld_const.h | 10 + src/gallium/drivers/llvmpipe/lp_bld_swizzle.c | 264 ++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bld_swizzle.h | 87 ++++++ 6 files changed, 416 insertions(+), 55 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_swizzle.c create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_swizzle.h diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 71c55a93ab9..85d0a737faa 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -19,6 +19,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_loop.c', 'lp_bld_logicop.c', 'lp_bld_blend.c', + 'lp_bld_swizzle.c', 'lp_bld_type.c', 'lp_clear.c', 'lp_context.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.c b/src/gallium/drivers/llvmpipe/lp_bld_blend.c index 90afe2e6b67..e070aac378d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.c @@ -43,6 +43,7 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_arit.h" +#include "lp_bld_swizzle.h" /** @@ -179,67 +180,30 @@ lp_build_blend_swizzle(struct lp_build_blend_context *bld, enum lp_build_blend_swizzle rgb_swizzle, unsigned alpha_swizzle) { - const unsigned n = bld->base.type.length; - LLVMValueRef swizzles[LP_MAX_VECTOR_LENGTH]; - unsigned i, j; - if(rgb == alpha) { if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) return rgb; - - alpha = bld->base.undef; - } - - if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA && - !bld->base.type.floating) { -#if 0 - /* Use a select */ - /* FIXME: Unfortunetaly select of vectors do not work */ - - for(j = 0; j < n; j += 4) - for(i = 0; i < 4; ++i) - swizzles[j + i] = LLVMConstInt(LLVMInt1Type(), i == alpha_swizzle ? 0 : 1, 0); - - return LLVMBuildSelect(bld->base.builder, LLVMConstVector(swizzles, n), rgb, alpha, ""); -#else - /* XXX: Use a bitmask, as byte shuffles often end up being translated - * into many PEXTRB. Ideally LLVM X86 code generation should pick this - * automatically for us. */ - - for(j = 0; j < n; j += 4) - for(i = 0; i < 4; ++i) - swizzles[j + i] = LLVMConstInt(LLVMIntType(bld->base.type.width), i == alpha_swizzle ? 0 : ~0, 0); - - /* TODO: Unfortunately constant propagation prevents from using PANDN. And - * on SSE4 we have even better -- PBLENDVB */ - return LLVMBuildOr(bld->base.builder, - LLVMBuildAnd(bld->base.builder, rgb, LLVMConstVector(swizzles, n), ""), - LLVMBuildAnd(bld->base.builder, alpha, LLVMBuildNot(bld->base.builder, LLVMConstVector(swizzles, n), ""), ""), - ""); -#endif + if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) + return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle); } - - for(j = 0; j < n; j += 4) { - for(i = 0; i < 4; ++i) { - unsigned swizzle; - - if(i == alpha_swizzle && alpha != bld->base.undef) { - /* Take the alpha from the second shuffle argument */ - swizzle = n + j + alpha_swizzle; - } - else if (rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) { - /* Take the alpha from the first shuffle argument */ - swizzle = j + alpha_swizzle; - } - else { - swizzle = j + i; - } - - swizzles[j + i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0); + else { + if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) { + boolean cond[4] = {0, 0, 0, 0}; + cond[alpha_swizzle] = 1; + return lp_build_select_aos(&bld->base, alpha, rgb, cond); + } + if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) { + unsigned char swizzle[4]; + swizzle[0] = alpha_swizzle; + swizzle[1] = alpha_swizzle; + swizzle[2] = alpha_swizzle; + swizzle[3] = alpha_swizzle; + swizzle[alpha_swizzle] += 4; + return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle); } } - - return LLVMBuildShuffleVector(bld->base.builder, rgb, alpha, LLVMConstVector(swizzles, n), ""); + assert(0); + return bld->base.undef; } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.c b/src/gallium/drivers/llvmpipe/lp_bld_const.c index 44fcc467f40..fe1c627eeeb 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_const.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_const.c @@ -143,3 +143,38 @@ lp_build_const_aos(union lp_type type, return LLVMConstVector(elems, type.length); } + + +LLVMValueRef +lp_build_const_shift(union lp_type type, + int c) +{ + LLVMTypeRef elem_type = LLVMIntType(type.width); + LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + assert(type.length <= LP_MAX_VECTOR_LENGTH); + + for(i = 0; i < type.length; ++i) + elems[i] = LLVMConstInt(elem_type, c, 0); + + return LLVMConstVector(elems, type.length); +} + + +LLVMValueRef +lp_build_const_mask_aos(union lp_type type, + boolean cond[4]) +{ + LLVMTypeRef elem_type = LLVMIntType(type.width); + LLVMValueRef masks[LP_MAX_VECTOR_LENGTH]; + unsigned i, j; + + assert(type.length <= LP_MAX_VECTOR_LENGTH); + + for(j = 0; j < type.length; j += 4) + for(i = 0; i < 4; ++i) + masks[j + i] = LLVMConstInt(elem_type, cond[i] ? ~0 : 0, 0); + + return LLVMConstVector(masks, type.length); +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/drivers/llvmpipe/lp_bld_const.h index f2e5deca944..98ed8911a55 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_const.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_const.h @@ -61,4 +61,14 @@ lp_build_const_aos(union lp_type type, const unsigned char *swizzle); +LLVMValueRef +lp_build_const_shift(union lp_type type, + int c); + + +LLVMValueRef +lp_build_const_mask_aos(union lp_type type, + boolean cond[4]); + + #endif /* !LP_BLD_CONST_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c new file mode 100644 index 00000000000..0205d17ff17 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c @@ -0,0 +1,264 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_debug.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_swizzle.h" + + +LLVMValueRef +lp_build_broadcast_aos(struct lp_build_context *bld, + LLVMValueRef a, + unsigned channel) +{ + const union lp_type type = bld->type; + const unsigned n = type.length; + unsigned i, j; + + if(a == bld->undef || a == bld->zero || a == bld->one) + return a; + + if (n <= 4) { + /* + * Shuffle. + */ + LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; + + for(j = 0; j < n; j += 4) + for(i = 0; i < 4; ++i) + shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0); + + return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); + } + else { + /* + * Bit mask and recursive shifts + * + * XYZW XYZW .... XYZW + * _Y__ _Y__ .... _Y__ + * YY_ YY__ .... YY__ + * YYYY YYYY .... YYYY + */ + union lp_type type4 = type; + const char shifts[4][2] = { + { 1, 2}, + {-1, 2}, + { 1, -2}, + {-1, -2} + }; + boolean cond[4]; + unsigned i; + + memset(cond, 0, sizeof cond); + cond[channel] = 1; + + a = LLVMBuildAnd(bld->builder, a, lp_build_const_mask_aos(type, cond), ""); + + type4.width *= 4; + type4.length /= 4; + + a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), ""); + + for(i = 0; i < 2; ++i) { + LLVMValueRef tmp = NULL; + int shift = shifts[channel][i]; + +#ifdef PIPE_ARCH_LITTLE_ENDIAN + shift = -shift; +#endif + + if(shift > 0) + tmp = LLVMBuildLShr(bld->builder, a, lp_build_const_shift(type4, shift*type.width), ""); + if(shift < 0) + tmp = LLVMBuildShl(bld->builder, a, lp_build_const_shift(type4, -shift*type.width), ""); + + assert(tmp); + if(tmp) + a = LLVMBuildOr(bld->builder, a, tmp, ""); + } + + return LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type), ""); + } +} + + +LLVMValueRef +lp_build_select_aos(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b, + boolean cond[4]) +{ + const union lp_type type = bld->type; + const unsigned n = type.length; + unsigned i, j; + + if(a == b) + return a; + if(cond[0] && cond[1] && cond[2] && cond[3]) + return a; + if(!cond[0] && !cond[1] && !cond[2] && !cond[3]) + return b; + if(a == bld->undef || b == bld->undef) + return bld->undef; + + /* + * There are three major ways of accomplishing this: + * - with a shuffle, + * - with a select, + * - or with a bit mask. + * + * Select isn't supported for vector types yet. + * The flip between these is empirical and might need to be. + */ + if (n <= 4) { + /* + * Shuffle. + */ + LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; + + for(j = 0; j < n; j += 4) + for(i = 0; i < 4; ++i) + shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0); + + return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); + } +#if 0 + else if(0) { + /* FIXME: Unfortunately select of vectors do not work */ + /* Use a select */ + LLVMTypeRef elem_type = LLVMInt1Type(); + LLVMValueRef cond[LP_MAX_VECTOR_LENGTH]; + + for(j = 0; j < n; j += 4) + for(i = 0; i < 4; ++i) + cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0); + + return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, ""); + } +#endif + else { + LLVMValueRef mask = lp_build_const_mask_aos(type, cond); + + /* TODO: On SSE4 we could do this with a single instruction -- PBLENDVB */ + + a = LLVMBuildAnd(bld->builder, a, mask, ""); + + /* This often gets translated to PANDN, but sometimes the NOT is + * pre-computed and stored in another constant. The best strategy depends + * on available registers, so it is not a big deal -- hopefully LLVM does + * the right decision attending the rest of the program. + */ + b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); + + return LLVMBuildOr(bld->builder, a, b, ""); + } +} + + +LLVMValueRef +lp_build_swizzle1_aos(struct lp_build_context *bld, + LLVMValueRef a, + unsigned char swizzle[4]) +{ + const unsigned n = bld->type.length; + unsigned i, j; + + if(a == bld->undef || a == bld->zero || a == bld->one) + return a; + + if(swizzle[0] == swizzle[1] && swizzle[1] == swizzle[2] && swizzle[2] == swizzle[3]) + return lp_build_broadcast_aos(bld, a, swizzle[0]); + + { + /* + * Shuffle. + */ + LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; + + for(j = 0; j < n; j += 4) + for(i = 0; i < 4; ++i) + shuffles[j + i] = LLVMConstInt(elem_type, j + swizzle[i], 0); + + return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); + } +} + + +LLVMValueRef +lp_build_swizzle2_aos(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b, + unsigned char swizzle[4]) +{ + const unsigned n = bld->type.length; + unsigned i, j; + + if(swizzle[0] < 4 && swizzle[1] < 4 && swizzle[2] < 4 && swizzle[3] < 4) + return lp_build_swizzle1_aos(bld, a, swizzle); + + if(a == b) { + swizzle[0] %= 4; + swizzle[1] %= 4; + swizzle[2] %= 4; + swizzle[3] %= 4; + return lp_build_swizzle1_aos(bld, a, swizzle); + } + + if(swizzle[0] % 4 == 0 && + swizzle[1] % 4 == 1 && + swizzle[2] % 4 == 2 && + swizzle[3] % 4 == 3) { + boolean cond[4]; + cond[0] = swizzle[0] / 4; + cond[1] = swizzle[1] / 4; + cond[2] = swizzle[2] / 4; + cond[3] = swizzle[3] / 4; + return lp_build_select_aos(bld, a, b, cond); + } + + { + /* + * Shuffle. + */ + LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; + + for(j = 0; j < n; j += 4) + for(i = 0; i < 4; ++i) + shuffles[j + i] = LLVMConstInt(elem_type, j + (swizzle[i] % 4) + (swizzle[i] / 4 * n), 0); + + return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); + } +} + + diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h new file mode 100644 index 00000000000..aeb4f42fa8a --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h @@ -0,0 +1,87 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Helper functions for constant building. + * + * @author Jose Fonseca + */ + + +#ifndef LP_BLD_SWIZZLE_H +#define LP_BLD_SWIZZLE_H + + +#include + + +union lp_type type; +struct lp_build_context; + + +/** + * Broadcast one channel of a vector composed of arrays of XYZW structures into + * all four channel. + */ +LLVMValueRef +lp_build_broadcast_aos(struct lp_build_context *bld, + LLVMValueRef a, + unsigned channel); + + +LLVMValueRef +lp_build_select_aos(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b, + boolean cond[4]); + + +/** + * Swizzle a vector consisting of an array of XYZW structs. + * + * @param swizzle is the in [0,4[ range. + */ +LLVMValueRef +lp_build_swizzle1_aos(struct lp_build_context *bld, + LLVMValueRef a, + unsigned char swizzle[4]); + + +/** + * Swizzle two vector consisting of an array of XYZW structs. + * + * @param swizzle is the in [0,8[ range. Values in [4,8[ range refer to b. + */ +LLVMValueRef +lp_build_swizzle2_aos(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b, + unsigned char swizzle[4]); + + +#endif /* !LP_BLD_SWIZZLE_H */ -- 2.30.2