From: José Fonseca Date: Sun, 25 Oct 2009 11:48:17 +0000 (+0000) Subject: llvmpipe: Immediate multiplication. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e4c5e01c109e51baaad23e90d08e8543b0fd6c07;p=mesa.git llvmpipe: Immediate multiplication. --- diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 93e797cb449..9c59677a741 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -47,6 +47,7 @@ #include "util/u_memory.h" #include "util/u_debug.h" +#include "util/u_math.h" #include "util/u_string.h" #include "util/u_cpu_detect.h" @@ -424,6 +425,59 @@ lp_build_mul(struct lp_build_context *bld, } +/** + * Small vector x scale multiplication optimization. + */ +LLVMValueRef +lp_build_mul_imm(struct lp_build_context *bld, + LLVMValueRef a, + int b) +{ + LLVMValueRef factor; + + if(b == 0) + return bld->zero; + + if(b == 1) + return a; + + if(b == -1) + return LLVMBuildNeg(bld->builder, a, ""); + + if(b == 2 && bld->type.floating) + return lp_build_add(bld, a, a); + + if(util_is_pot(b)) { + unsigned shift = ffs(b) - 1; + + if(bld->type.floating) { +#if 0 + /* + * Power of two multiplication by directly manipulating the mantissa. + * + * XXX: This might not be always faster, it will introduce a small error + * for multiplication by zero, and it will produce wrong results + * for Inf and NaN. + */ + unsigned mantissa = lp_mantissa(bld->type); + factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa); + a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), ""); + a = LLVMBuildAdd(bld->builder, a, factor, ""); + a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), ""); + return a; +#endif + } + else { + factor = lp_build_const_scalar(bld->type, shift); + return LLVMBuildShl(bld->builder, a, factor, ""); + } + } + + factor = lp_build_const_scalar(bld->type, (double)b); + return lp_build_mul(bld, a, factor); +} + + /** * Generate a / b */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index 4e568c055e2..62be4b9aee1 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -66,6 +66,11 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); +LLVMValueRef +lp_build_mul_imm(struct lp_build_context *bld, + LLVMValueRef a, + int b); + LLVMValueRef lp_build_div(struct lp_build_context *bld, LLVMValueRef a, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 338dbca6d1e..818c0e943e3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -108,32 +108,6 @@ coeffs_init(struct lp_build_interp_soa_context *bld, } -/** - * Small vector x scale multiplication optimization. - * - * TODO: Should be elsewhere. - */ -static LLVMValueRef -coeff_multiply(struct lp_build_interp_soa_context *bld, - LLVMValueRef coeff, - int step) -{ - LLVMValueRef factor; - - switch(step) { - case 0: - return bld->base.zero; - case 1: - return coeff; - case 2: - return lp_build_add(&bld->base, coeff, coeff); - default: - factor = lp_build_const_scalar(bld->base.type, (double)step); - return lp_build_mul(&bld->base, coeff, factor); - } -} - - /** * Multiply the dadx and dady with the xstep and ystep respectively. */ @@ -149,8 +123,8 @@ coeffs_update(struct lp_build_interp_soa_context *bld) if (mode != TGSI_INTERPOLATE_CONSTANT) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { if(mask & (1 << chan)) { - bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep); - bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep); + bld->dadx[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dadx[attrib][chan], bld->xstep); + bld->dady[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dady[attrib][chan], bld->ystep); } } } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index f7a030fb8c0..42e4ee6986d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -309,7 +309,7 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMBuilderRef builder = bld->builder; struct lp_build_context i32, h16, u8n; LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; - LLVMValueRef f32_c256, i32_c8, i32_c128, i32_c255; + LLVMValueRef i32_c8, i32_c128, i32_c255; LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; LLVMValueRef x0, x1; @@ -328,9 +328,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, h16_vec_type = lp_build_vec_type(h16.type); u8n_vec_type = lp_build_vec_type(u8n.type); - f32_c256 = lp_build_const_scalar(bld->coord_type, 256.0); - s = lp_build_mul(&bld->coord_bld, s, f32_c256); - t = lp_build_mul(&bld->coord_bld, t, f32_c256); + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + t = lp_build_mul_imm(&bld->coord_bld, t, 256); s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");