llvmpipe: Use lp_build_ifloor_fract for exp2 calculation.
authorJosé Fonseca <jose.r.fonseca@gmail.com>
Sun, 16 Oct 2011 00:42:57 +0000 (01:42 +0100)
committerJosé Fonseca <jose.r.fonseca@gmail.com>
Sun, 16 Oct 2011 13:18:41 +0000 (14:18 +0100)
Instead of separate ifloor / fract calls.

No change for SSE4.1 code, but less FP<->SI conversions on non SSE4.1
systems.

src/gallium/auxiliary/gallivm/lp_bld_arit.c
src/gallium/drivers/llvmpipe/lp_test_arit.c

index 2be8598704e3e911c0e52d0eaa9a86371b551e89..a15dcb085f5f7eced74fc0b1f8de687fe8219dea 100644 (file)
@@ -2255,7 +2255,6 @@ lp_build_exp2_approx(struct lp_build_context *bld,
    LLVMBuilderRef builder = bld->gallivm->builder;
    const struct lp_type type = bld->type;
    LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type);
-   LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
    LLVMValueRef ipart = NULL;
    LLVMValueRef fpart = NULL;
    LLVMValueRef expipart = NULL;
@@ -2278,15 +2277,12 @@ lp_build_exp2_approx(struct lp_build_context *bld,
       x = lp_build_max(bld, x, lp_build_const_vec(bld->gallivm, type, -126.99999));
 
       /* ipart = floor(x) */
-      ipart = lp_build_floor(bld, x);
-
       /* fpart = x - ipart */
-      fpart = LLVMBuildFSub(builder, x, ipart, "");
+      lp_build_ifloor_fract(bld, x, &ipart, &fpart);
    }
 
    if(p_exp2_int_part || p_exp2) {
       /* expipart = (float) (1 << ipart) */
-      ipart = LLVMBuildFPToSI(builder, ipart, int_vec_type, "");
       expipart = LLVMBuildAdd(builder, ipart,
                               lp_build_const_int_vec(bld->gallivm, type, 127), "");
       expipart = LLVMBuildShl(builder, expipart,
index ea2a659142fe746148196d382580aa5a9caf96d3..0b74dee176f8b0fa491baa619bf16518b97c0070 100644 (file)
@@ -91,6 +91,10 @@ const float exp2_values[] = {
    -1e-007,
    0,
    1e-007,
+   0.01,
+   0.1,
+   0.9,
+   0.99,
    1, 
    2, 
    4, 
@@ -107,8 +111,14 @@ const float log2_values[] = {
    1.4012984643248171e-45,
 #endif
    1e-007,
+   0.1,
    0.5,
+   0.99,
    1,
+   1.01,
+   1.1,
+   1.9,
+   1.99,
    2,
    4,
    100000,