From 88637e5764d475aa7ad140e20bdd12bcbe61cf18 Mon Sep 17 00:00:00 2001
From: Jeff Muizelaar <jmuizelaar@mozilla.com>
Date: Sat, 15 Feb 2014 02:34:39 +0100
Subject: [PATCH] gallivm: use correct rounding for linear wrap mode (in the
 aos int path)

The previous method for converting coords to ints was sligthly inaccurate
(effectively losing 1bit from the 8bit lerp weight). This is probably
especially noticeable when trying to draw a pixel-aligned texture.
As an example, for a 100x100 texture after dernormalization the texture
coords in this case would turn up as
0.5, 1.5, 2.5, 3.5, 4.5, ...
After the mul by 256, conversion to int and 128 subtraction, they end up as
0, 256, 512, 768, 1024, ...
which gets us the correct coords/weights of
0/0, 1/0, 2/0, 3/0, 4/0, ...
But even LSB errors (which are unavoidable) in the input coords may cause
these coords/weights to be wrong, e.g. for a coord of 3.49999 we'd get a
coord/weight of 2/255 instead.

Fix this by using round-to-nearest int instead of FPToSi (trunc). Should be
equally fast on x86 sse though other archs probably suffer a little.
---
 src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
index c35b628270e..4649fa9b957 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -987,7 +987,6 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
    const unsigned dims = bld->dims;
    LLVMBuilderRef builder = bld->gallivm->builder;
    struct lp_build_context i32;
-   LLVMTypeRef i32_vec_type;
    LLVMValueRef i32_c8, i32_c128, i32_c255;
    LLVMValueRef width_vec, height_vec, depth_vec;
    LLVMValueRef s_ipart, s_fpart, s_float;
@@ -1003,8 +1002,6 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
 
    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
 
-   i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
-
    lp_build_extract_image_sizes(bld,
                                 &bld->int_size_bld,
                                 bld->int_coord_type,
@@ -1036,11 +1033,16 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
    }
 
    /* convert float to int */
-   s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
+   /* For correct rounding, need round to nearest, not truncation here.
+    * Note that in some cases (clamp to edge, no texel offsets) we
+    * could use a non-signed build context which would help archs which
+    * don't have fptosi intrinsic with nearest rounding implemented.
+    */
+   s = lp_build_iround(&bld->coord_bld, s);
    if (dims >= 2)
-      t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
+      t = lp_build_iround(&bld->coord_bld, t);
    if (dims >= 3)
-      r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
+      r = lp_build_iround(&bld->coord_bld, r);
 
    /* subtract 0.5 (add -128) */
    i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
-- 
2.30.2