From 57907e7fd9fc63b9023d0e2b08934c2d0acf2953 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jos=C3=A9=20Fonseca?= Date: Wed, 12 Aug 2009 12:42:06 +0100 Subject: [PATCH] llvmpipe: Translate approximate log2/exp2. --- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 152 ++++++++++++------ src/gallium/drivers/llvmpipe/lp_bld_arit.h | 14 ++ .../drivers/llvmpipe/lp_bld_tgsi_soa.c | 103 ++++++------ 3 files changed, 169 insertions(+), 100 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 7b3932f5222..aec3e297f4a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -736,42 +736,70 @@ const double lp_build_exp2_polynomial[] = { }; -LLVMValueRef -lp_build_exp2(struct lp_build_context *bld, - LLVMValueRef x) +void +lp_build_exp2_approx(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef *p_exp2_int_part, + LLVMValueRef *p_frac_part, + LLVMValueRef *p_exp2) { const union lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef ipart; - LLVMValueRef fpart, expipart, expfpart; + LLVMValueRef fpart, expipart, expfpart, res; - /* TODO: optimize the constant case */ - if(LLVMIsConstant(x)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n"); + if(p_exp2_int_part || p_frac_part || p_exp2) { + /* TODO: optimize the constant case */ + if(LLVMIsConstant(x)) + debug_printf("%s: inefficient/imprecise constant arithmetic\n"); - assert(type.floating && type.width == 32); + assert(type.floating && type.width == 32); - x = lp_build_min(bld, x, lp_build_const_uni(type, 129.0)); - x = lp_build_max(bld, x, lp_build_const_uni(type, -126.99999)); + x = lp_build_min(bld, x, lp_build_const_uni(type, 129.0)); + x = lp_build_max(bld, x, lp_build_const_uni(type, -126.99999)); - /* ipart = int(x - 0.5) */ - ipart = LLVMBuildSub(bld->builder, x, lp_build_const_uni(type, 0.5f), ""); - ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, ""); + /* ipart = int(x - 0.5) */ + ipart = LLVMBuildSub(bld->builder, x, lp_build_const_uni(type, 0.5f), ""); + ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, ""); - /* fpart = x - ipart */ - fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, ""); - fpart = LLVMBuildSub(bld->builder, x, fpart, ""); + /* fpart = x - ipart */ + fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, ""); + fpart = LLVMBuildSub(bld->builder, x, fpart, ""); + } + + if(p_exp2_int_part || p_exp2) { + /* expipart = (float) (1 << ipart) */ + expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_uni(type, 127), ""); + expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_uni(type, 23), ""); + expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, ""); + } + + if(p_exp2) { + expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial, + Elements(lp_build_exp2_polynomial)); + + res = LLVMBuildMul(bld->builder, expipart, expfpart, ""); + } - /* expipart = (float) (1 << ipart) */ - expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_uni(type, 127), ""); - expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_uni(type, 23), ""); - expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, ""); + if(p_exp2_int_part) + *p_exp2_int_part = expipart; - expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial, - Elements(lp_build_exp2_polynomial)); + if(p_frac_part) + *p_frac_part = fpart; - return LLVMBuildMul(bld->builder, expipart, expfpart, ""); + if(p_exp2) + *p_exp2 = res; +} + + +LLVMValueRef +lp_build_exp2(struct lp_build_context *bld, + LLVMValueRef x) +{ + LLVMValueRef res; + lp_build_exp2_approx(bld, x, NULL, NULL, &res); + return res; } @@ -798,9 +826,12 @@ const double lp_build_log2_polynomial[] = { /** * See http://www.devmaster.net/forums/showthread.php?p=43580 */ -LLVMValueRef -lp_build_log2(struct lp_build_context *bld, - LLVMValueRef x) +void +lp_build_log2_approx(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef *p_exp, + LLVMValueRef *p_floor_log2, + LLVMValueRef *p_log2) { const union lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); @@ -810,34 +841,63 @@ lp_build_log2(struct lp_build_context *bld, LLVMValueRef mantmask = lp_build_int_const_uni(type, 0x007fffff); LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type); - LLVMValueRef i = LLVMBuildBitCast(bld->builder, x, int_vec_type, ""); - + LLVMValueRef i; LLVMValueRef exp; LLVMValueRef mant; + LLVMValueRef logexp; LLVMValueRef logmant; + LLVMValueRef res; - /* TODO: optimize the constant case */ - if(LLVMIsConstant(x)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n"); + if(p_exp || p_floor_log2 || p_log2) { + /* TODO: optimize the constant case */ + if(LLVMIsConstant(x)) + debug_printf("%s: inefficient/imprecise constant arithmetic\n"); - assert(type.floating && type.width == 32); + assert(type.floating && type.width == 32); - /* exp = (float) exponent(x) */ - exp = LLVMBuildAnd(bld->builder, i, expmask, ""); - exp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_uni(type, 23), ""); - exp = LLVMBuildSub(bld->builder, exp, lp_build_int_const_uni(type, 127), ""); - exp = LLVMBuildSIToFP(bld->builder, exp, vec_type, ""); + i = LLVMBuildBitCast(bld->builder, x, int_vec_type, ""); - /* mant = (float) mantissa(x) */ - mant = LLVMBuildAnd(bld->builder, i, mantmask, ""); - mant = LLVMBuildOr(bld->builder, mant, one, ""); - mant = LLVMBuildSIToFP(bld->builder, mant, vec_type, ""); + /* exp = (float) exponent(x) */ + exp = LLVMBuildAnd(bld->builder, i, expmask, ""); + } + + if(p_floor_log2 || p_log2) { + logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_uni(type, 23), ""); + logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_uni(type, 127), ""); + logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, ""); + } - logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial, - Elements(lp_build_log2_polynomial)); + if(p_log2) { + /* mant = (float) mantissa(x) */ + mant = LLVMBuildAnd(bld->builder, i, mantmask, ""); + mant = LLVMBuildOr(bld->builder, mant, one, ""); + mant = LLVMBuildSIToFP(bld->builder, mant, vec_type, ""); - /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ - logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildMul(bld->builder, mant, bld->one, ""), ""); + logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial, + Elements(lp_build_log2_polynomial)); + + /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ + logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildMul(bld->builder, mant, bld->one, ""), ""); + + res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); + } - return LLVMBuildAdd(bld->builder, logmant, exp, ""); + if(p_exp) + *p_exp = exp; + + if(p_floor_log2) + *p_floor_log2 = logexp; + + if(p_log2) + *p_log2 = res; +} + + +LLVMValueRef +lp_build_log2(struct lp_build_context *bld, + LLVMValueRef x) +{ + LLVMValueRef res; + lp_build_log2_approx(bld, x, NULL, NULL, &res); + return res; } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index 9f8fccb0d4e..fc8cb25966e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -126,4 +126,18 @@ LLVMValueRef lp_build_log2(struct lp_build_context *bld, LLVMValueRef a); +void +lp_build_exp2_approx(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef *p_exp2_int_part, + LLVMValueRef *p_frac_part, + LLVMValueRef *p_exp2); + +void +lp_build_log2_approx(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef *p_exp, + LLVMValueRef *p_floor_log2, + LLVMValueRef *p_log2); + #endif /* !LP_BLD_ARIT_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index 48eb7714866..c9143ebfe44 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -521,10 +521,7 @@ emit_instruction( tmp2 = FETCH( bld, *inst, 0, CHAN_W ); tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); tmp0 = FETCH( bld, *inst, 0, CHAN_X ); - sse_xorps( - bld, - make_xmm( 2 ), - make_xmm( 2 ) ); + tmp2 = bld->base.zero; sse_cmpps( bld, make_xmm( 2 ), @@ -560,34 +557,31 @@ emit_instruction( } break; -#if 0 case TGSI_OPCODE_EXP: if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { - tmp0 = FETCH( bld, *inst, 0, CHAN_X ); - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { - tmp1 = tmp0; - emit_flr( bld, 2, 1 ); - /* dst.x = ex2(floor(src.x)) */ - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { - tmp2 = tmp1; - tmp2 = lp_build_exp2( &bld->base, tmp2); - STORE( bld, *inst, 0, CHAN_X, tmp2); - } - /* dst.y = src.x - floor(src.x) */ - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { - tmp2 = tmp0; - tmp2 = lp_build_sub( &bld->base, tmp2, tmp1); - STORE( bld, *inst, 0, CHAN_Y, tmp2); - } - } - /* dst.z = ex2(src.x) */ - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { - tmp0 = lp_build_exp2( &bld->base, tmp0); - STORE( bld, *inst, 0, CHAN_Z, tmp0); - } + LLVMValueRef *p_exp2_int_part = NULL; + LLVMValueRef *p_frac_part = NULL; + LLVMValueRef *p_exp2 = NULL; + + src0 = FETCH( bld, *inst, 0, CHAN_X ); + + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) + p_exp2_int_part = &tmp0; + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) + p_frac_part = &tmp1; + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) + p_exp2 = &tmp2; + + lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); + + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) + STORE( bld, *inst, 0, CHAN_X, tmp0); + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) + STORE( bld, *inst, 0, CHAN_Y, tmp1); + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) + STORE( bld, *inst, 0, CHAN_Z, tmp2); } /* dst.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) { @@ -595,44 +589,45 @@ emit_instruction( STORE( bld, *inst, 0, CHAN_W, tmp0); } break; -#endif -#if 0 case TGSI_OPCODE_LOG: if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { - tmp0 = FETCH( bld, *inst, 0, CHAN_X ); - tmp0 = lp_build_abs( &bld->base, tmp0 ); - tmp1 = tmp0; - tmp1 = lp_build_log2( &bld->base, tmp1); - /* dst.z = lg2(abs(src.x)) */ - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( bld, *inst, 0, CHAN_Z, tmp1); - } - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { - emit_flr( bld, 2, 1 ); - /* dst.x = floor(lg2(abs(src.x))) */ - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( bld, *inst, 0, CHAN_X, tmp1); - } - /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */ - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { - tmp1 = lp_build_exp2( &bld->base, tmp1); - emit_rcp( bld, 1, 1 ); - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); - STORE( bld, *inst, 0, CHAN_Y, tmp0); - } + LLVMValueRef *p_floor_log2; + LLVMValueRef *p_exp; + LLVMValueRef *p_log2; + + src0 = FETCH( bld, *inst, 0, CHAN_X ); + src0 = lp_build_abs( &bld->base, src0 ); + + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) + p_floor_log2 = &tmp0; + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) + p_exp = &tmp1; + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) + p_log2 = &tmp2; + + lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); + + /* dst.x = floor(lg2(abs(src.x))) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) + STORE( bld, *inst, 0, CHAN_X, tmp0); + /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { + tmp1 = lp_build_div( &bld->base, src0, tmp1); + STORE( bld, *inst, 0, CHAN_Y, tmp1); } + /* dst.z = lg2(abs(src.x)) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) + STORE( bld, *inst, 0, CHAN_Z, tmp2); } /* dst.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) { - emit_tempf( bld, 0, TEMP_ONE_I, TEMP_ONE_C ); + tmp0 = bld->base.one; STORE( bld, *inst, 0, CHAN_W, tmp0); } break; -#endif case TGSI_OPCODE_MUL: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -- 2.30.2