From af05f6157668b3c5e6fd73c3d743b11e619b9067 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jos=C3=A9=20Fonseca?= Date: Wed, 6 Oct 2010 18:31:36 +0100 Subject: [PATCH] gallivm: Combined ifloor & fract helper. The only way to ensure we don't do redundant FP <-> SI conversions. --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 42 +++++++++++++++++++ src/gallium/auxiliary/gallivm/lp_bld_arit.h | 6 +++ src/gallium/auxiliary/gallivm/lp_bld_sample.c | 4 +- .../auxiliary/gallivm/lp_bld_sample_soa.c | 41 +++++++----------- 4 files changed, 65 insertions(+), 28 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 15b74410188..64c468c14d4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1359,6 +1359,48 @@ lp_build_iceil(struct lp_build_context *bld, } +/** + * Combined ifloor() & fract(). + * + * Preferred to calling the functions separately, as it will ensure that the + * stratergy (floor() vs ifloor()) that results in less redundant work is used. + */ +void +lp_build_ifloor_fract(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef *out_ipart, + LLVMValueRef *out_fpart) +{ + + + const struct lp_type type = bld->type; + LLVMValueRef ipart; + + assert(type.floating); + assert(lp_check_value(type, a)); + + if (util_cpu_caps.has_sse4_1 && + (type.length == 1 || type.width*type.length == 128)) { + /* + * floor() is easier. + */ + + ipart = lp_build_floor(bld, a); + *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart"); + *out_ipart = LLVMBuildFPToSI(bld->builder, ipart, bld->int_vec_type, "ipart"); + } + else { + /* + * ifloor() is easier. + */ + + *out_ipart = lp_build_ifloor(bld, a); + ipart = LLVMBuildSIToFP(bld->builder, *out_ipart, bld->vec_type, "ipart"); + *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart"); + } +} + + LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index f36197479f0..8424384f8f7 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -171,6 +171,12 @@ LLVMValueRef lp_build_itrunc(struct lp_build_context *bld, LLVMValueRef a); +void +lp_build_ifloor_fract(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef *out_ipart, + LLVMValueRef *out_fpart); + LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 9dee653eee8..acd99741f13 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -319,7 +319,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld, bld->builder, unit); /* convert float lod to integer */ - level = lp_build_ifloor(float_bld, lod); + lp_build_ifloor_fract(float_bld, lod, &level, weight_out); /* compute level 0 and clamp to legal range of levels */ *level0_out = lp_build_clamp(int_bld, level, @@ -330,8 +330,6 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld, *level1_out = lp_build_clamp(int_bld, level, int_bld->zero, last_level); - - *weight_out = lp_build_fract(float_bld, lod); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 36a77d3aff0..d464147371d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -253,11 +253,9 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, /* mul by size and subtract 0.5 */ coord = lp_build_mul(coord_bld, coord, length_f); coord = lp_build_sub(coord_bld, coord, half); - /* convert to int */ - coord0 = lp_build_ifloor(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one); - /* compute lerp weight */ - weight = lp_build_fract(coord_bld, coord); /* repeat wrap */ if (is_pot) { coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, ""); @@ -284,8 +282,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_sub(coord_bld, coord, half); - weight = lp_build_fract(coord_bld, coord); - coord0 = lp_build_ifloor(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); break; @@ -304,10 +302,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, max = lp_build_sub(coord_bld, length_f, min); coord = lp_build_clamp(coord_bld, coord, min, max); } - /* compute lerp weight */ - weight = lp_build_fract(coord_bld, coord); - /* coord0 = floor(coord); */ - coord0 = lp_build_ifloor(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); /* coord0 = max(coord0, 0) */ coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero); @@ -327,10 +323,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, max = lp_build_sub(coord_bld, length_f, min); coord = lp_build_clamp(coord_bld, coord, min, max); coord = lp_build_sub(coord_bld, coord, half); - /* compute lerp weight */ - weight = lp_build_fract(coord_bld, coord); - /* convert to int */ - coord0 = lp_build_ifloor(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); } break; @@ -343,11 +337,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_mul(coord_bld, coord, length_f); coord = lp_build_sub(coord_bld, coord, half); - /* compute lerp weight */ - weight = lp_build_fract(coord_bld, coord); - - /* convert to int coords */ - coord0 = lp_build_ifloor(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); /* coord0 = max(coord0, 0) */ @@ -369,8 +360,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_sub(coord_bld, coord, half); - weight = lp_build_fract(coord_bld, coord); - coord0 = lp_build_ifloor(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); break; @@ -392,8 +383,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_sub(coord_bld, coord, half); - weight = lp_build_fract(coord_bld, coord); - coord0 = lp_build_ifloor(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); } break; @@ -416,8 +407,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_sub(coord_bld, coord, half); - weight = lp_build_fract(coord_bld, coord); - coord0 = lp_build_ifloor(coord_bld, coord); + /* convert to int, compute lerp weight */ + lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); } break; -- 2.30.2