gallivm: Use a faster (and less accurate) log2 in lod computation.
authorJosé Fonseca <jfonseca@vmware.com>
Wed, 6 Oct 2010 13:06:14 +0000 (14:06 +0100)
committerJosé Fonseca <jfonseca@vmware.com>
Wed, 6 Oct 2010 17:46:29 +0000 (18:46 +0100)
src/gallium/auxiliary/gallivm/lp_bld_arit.c
src/gallium/auxiliary/gallivm/lp_bld_arit.h
src/gallium/auxiliary/gallivm/lp_bld_sample.c

index 3f9c250ad57b8bbb3ef2a0d7fe0881234b7b8069..ff94f498acf7180776b88bd90666d12f21d71184 100644 (file)
@@ -2286,3 +2286,47 @@ lp_build_log2(struct lp_build_context *bld,
    lp_build_log2_approx(bld, x, NULL, NULL, &res);
    return res;
 }
+
+
+/**
+ * Faster (and less accurate) log2.
+ *
+ *    log2(x) = floor(log2(x)) + frac(x)
+ *
+ * See http://www.flipcode.com/archives/Fast_log_Function.shtml
+ */
+LLVMValueRef
+lp_build_fast_log2(struct lp_build_context *bld,
+                   LLVMValueRef x)
+{
+   const struct lp_type type = bld->type;
+   LLVMTypeRef vec_type = bld->vec_type;
+   LLVMTypeRef int_vec_type = bld->int_vec_type;
+
+   unsigned mantissa = lp_mantissa(type);
+   LLVMValueRef mantmask = lp_build_const_int_vec(type, (1ULL << mantissa) - 1);
+   LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type);
+
+   LLVMValueRef ipart;
+   LLVMValueRef fpart;
+
+   assert(lp_check_value(bld->type, x));
+
+   assert(type.floating);
+
+   x = LLVMBuildBitCast(bld->builder, x, int_vec_type, "");
+
+   /* ipart = floor(log2(x)) - 1 */
+   ipart = LLVMBuildLShr(bld->builder, x, lp_build_const_int_vec(type, mantissa), "");
+   ipart = LLVMBuildAnd(bld->builder, ipart, lp_build_const_int_vec(type, 255), "");
+   ipart = LLVMBuildSub(bld->builder, ipart, lp_build_const_int_vec(type, 128), "");
+   ipart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, "");
+
+   /* fpart = 1.0 + frac(x) */
+   fpart = LLVMBuildAnd(bld->builder, x, mantmask, "");
+   fpart = LLVMBuildOr(bld->builder, fpart, one, "");
+   fpart = LLVMBuildBitCast(bld->builder, fpart, vec_type, "");
+
+   /* floor(log2(x)) + frac(x) */
+   return LLVMBuildFAdd(bld->builder, ipart, fpart, "");
+}
index 31efa9921ce61739397bc9db585485006ea2ab5b..3ed4fec2333a99ee097b9e8c26ccd87250a3f1b1 100644 (file)
@@ -212,6 +212,11 @@ LLVMValueRef
 lp_build_log2(struct lp_build_context *bld,
               LLVMValueRef a);
 
+LLVMValueRef
+lp_build_fast_log2(struct lp_build_context *bld,
+                   LLVMValueRef a);
+
+
 void
 lp_build_exp2_approx(struct lp_build_context *bld,
                      LLVMValueRef x,
index aee94c1b866ac0633643cd8b2f9fa8f7be64b023..9dee653eee8195f50cfb4e395e8f91cb1a19723c 100644 (file)
@@ -243,7 +243,11 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
          }
 
          /* compute lod = log2(rho) */
+#if 0
          lod = lp_build_log2(float_bld, rho);
+#else
+         lod = lp_build_fast_log2(float_bld, rho);
+#endif
 
          /* add shader lod bias */
          if (lod_bias) {