From 679dd26623a53b5a052845bf4c6aef224cfdd5a2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jos=C3=A9=20Fonseca?= Date: Sat, 9 Oct 2010 12:12:03 +0100 Subject: [PATCH] gallivm: Special bri-linear computation path for unmodified rho. --- src/gallium/auxiliary/gallivm/lp_bld_sample.c | 111 ++++++++++++++---- 1 file changed, 91 insertions(+), 20 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 5bc3c263a0f..43ea8b1a140 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -294,31 +294,30 @@ lp_build_rho(struct lp_build_sample_context *bld, * TODO: This could be done in fixed point, where applicable. */ static void -lp_build_brilinear_lod(struct lp_build_sample_context *bld, +lp_build_brilinear_lod(struct lp_build_context *bld, LLVMValueRef lod, double factor, LLVMValueRef *out_lod_ipart, LLVMValueRef *out_lod_fpart) { - struct lp_build_context *float_bld = &bld->float_bld; LLVMValueRef lod_fpart; - float pre_offset = (factor - 0.5)/factor - 0.5; - float post_offset = 1 - factor; + double pre_offset = (factor - 0.5)/factor - 0.5; + double post_offset = 1 - factor; if (0) { lp_build_printf(bld->builder, "lod = %f\n", lod); } - lod = lp_build_add(float_bld, lod, - lp_build_const_vec(float_bld->type, pre_offset)); + lod = lp_build_add(bld, lod, + lp_build_const_vec(bld->type, pre_offset)); - lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, &lod_fpart); + lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart); - lod_fpart = lp_build_mul(float_bld, lod_fpart, - lp_build_const_vec(float_bld->type, factor)); + lod_fpart = lp_build_mul(bld, lod_fpart, + lp_build_const_vec(bld->type, factor)); - lod_fpart = lp_build_add(float_bld, lod_fpart, - lp_build_const_vec(float_bld->type, post_offset)); + lod_fpart = lp_build_add(bld, lod_fpart, + lp_build_const_vec(bld->type, post_offset)); /* * It's not necessary to clamp lod_fpart since: @@ -335,6 +334,61 @@ lp_build_brilinear_lod(struct lp_build_sample_context *bld, } +/* + * Combined log2 and brilinear lod computation. + * + * It's in all identical to calling lp_build_fast_log2() and + * lp_build_brilinear_lod() above, but by combining we can compute the interger + * and fractional part independently. + */ +static void +lp_build_brilinear_rho(struct lp_build_context *bld, + LLVMValueRef rho, + double factor, + LLVMValueRef *out_lod_ipart, + LLVMValueRef *out_lod_fpart) +{ + LLVMValueRef lod_ipart; + LLVMValueRef lod_fpart; + + const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor); + const double post_offset = 1 - 2*factor; + + assert(bld->type.floating); + + assert(lp_check_value(bld->type, rho)); + + /* + * The pre factor will make the intersections with the exact powers of two + * happen precisely where we want then to be, which means that the integer + * part will not need any post adjustments. + */ + rho = lp_build_mul(bld, rho, + lp_build_const_vec(bld->type, pre_factor)); + + /* ipart = ifloor(log2(rho)) */ + lod_ipart = lp_build_extract_exponent(bld, rho, 0); + + /* fpart = rho / 2**ipart */ + lod_fpart = lp_build_extract_mantissa(bld, rho); + + lod_fpart = lp_build_mul(bld, lod_fpart, + lp_build_const_vec(bld->type, factor)); + + lod_fpart = lp_build_add(bld, lod_fpart, + lp_build_const_vec(bld->type, post_offset)); + + /* + * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since: + * - the above expression will never produce numbers greater than one. + * - the mip filtering branch is only taken if lod_fpart is positive + */ + + *out_lod_ipart = lod_ipart; + *out_lod_fpart = lod_fpart; +} + + /** * Generate code to compute texture level of detail (lambda). * \param ddx partial derivatives of (s, t, r, q) with respect to X @@ -389,16 +443,32 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, rho = lp_build_rho(bld, ddx, ddy); - /* compute lod = log2(rho) */ - if ((mip_filter == PIPE_TEX_MIPFILTER_NONE || - mip_filter == PIPE_TEX_MIPFILTER_NEAREST) && - !lod_bias && + /* + * Compute lod = log2(rho) + */ + + if (!lod_bias && !bld->static_state->lod_bias_non_zero && !bld->static_state->apply_max_lod && !bld->static_state->apply_min_lod) { - *out_lod_ipart = lp_build_ilog2(float_bld, rho); - *out_lod_fpart = bld->float_bld.zero; - return; + /* + * Special case when there are no post-log2 adjustments, which + * saves instructions but keeping the integer and fractional lod + * computations separate from the start. + */ + + if (mip_filter == PIPE_TEX_MIPFILTER_NONE || + mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { + *out_lod_ipart = lp_build_ilog2(float_bld, rho); + *out_lod_fpart = bld->float_bld.zero; + return; + } + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR && + BRILINEAR_FACTOR > 1.0) { + lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR, + out_lod_ipart, out_lod_fpart); + return; + } } if (0) { @@ -438,20 +508,21 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { if (BRILINEAR_FACTOR > 1.0) { - lp_build_brilinear_lod(bld, lod, BRILINEAR_FACTOR, + lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR, out_lod_ipart, out_lod_fpart); } else { lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart); } - lp_build_name(*out_lod_ipart, "lod_ipart"); lp_build_name(*out_lod_fpart, "lod_fpart"); } else { *out_lod_ipart = lp_build_iround(float_bld, lod); } + lp_build_name(*out_lod_ipart, "lod_ipart"); + return; } -- 2.30.2