gallivm: Special bri-linear computation path for unmodified rho.

[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample.c
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c

index a6a64f38c8421893564d67b1916deeeb8c93bc55..43ea8b1a1408d51785f232db7ffd4ae81b6f4d49 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -39,12 +39,20 @@
  #include "lp_bld_arit.h"
  #include "lp_bld_const.h"
  #include "lp_bld_debug.h"
+#include "lp_bld_printf.h"
  #include "lp_bld_flow.h"
  #include "lp_bld_sample.h"
  #include "lp_bld_swizzle.h"
  #include "lp_bld_type.h"
  
  
+/*
+ * Bri-linear factor. Use zero or any other number less than one to force
+ * tri-linear filtering.
+ */
+#define BRILINEAR_FACTOR 2
+
+
  /**
   * Does the given texture wrap mode allow sampling the texture border color?
   * XXX maybe move this into gallium util code.
@@ -182,7 +190,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
  {
     struct lp_build_context *float_size_bld = &bld->float_size_bld;
     struct lp_build_context *float_bld = &bld->float_bld;
-   const int dims = texture_dims(bld->static_state->target);
+   const unsigned dims = bld->dims;
     LLVMTypeRef i32t = LLVMInt32Type();
     LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
     LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
@@ -227,7 +235,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
  
     rho_vec = lp_build_max(float_size_bld, rho_x, rho_y);
  
-   float_size = lp_build_int_to_float(float_size_bld, bld->uint_size);
+   float_size = lp_build_int_to_float(float_size_bld, bld->int_size);
  
     rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
  
@@ -254,6 +262,133 @@ lp_build_rho(struct lp_build_sample_context *bld,
  }
  
  
+/*
+ * Bri-linear lod computation
+ *
+ * Use a piece-wise linear approximation of log2 such that:
+ * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
+ * - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
+ *   with the steepness specified in 'factor'
+ * - exact result for 0.5, 1.5, etc.
+ *
+ *
+ *   1.0 -              /----*
+ *                     /
+ *                    /
+ *                   /
+ *   0.5 -          *
+ *                 /
+ *                /
+ *               /
+ *   0.0 - *----/
+ *
+ *         |                 |
+ *        2^0               2^1
+ *
+ * This is a technique also commonly used in hardware:
+ * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
+ *
+ * TODO: For correctness, this should only be applied when texture is known to
+ * have regular mipmaps, i.e., mipmaps derived from the base level.
+ *
+ * TODO: This could be done in fixed point, where applicable.
+ */
+static void
+lp_build_brilinear_lod(struct lp_build_context *bld,
+                       LLVMValueRef lod,
+                       double factor,
+                       LLVMValueRef *out_lod_ipart,
+                       LLVMValueRef *out_lod_fpart)
+{
+   LLVMValueRef lod_fpart;
+   double pre_offset = (factor - 0.5)/factor - 0.5;
+   double post_offset = 1 - factor;
+
+   if (0) {
+      lp_build_printf(bld->builder, "lod = %f\n", lod);
+   }
+
+   lod = lp_build_add(bld, lod,
+                      lp_build_const_vec(bld->type, pre_offset));
+
+   lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);
+
+   lod_fpart = lp_build_mul(bld, lod_fpart,
+                            lp_build_const_vec(bld->type, factor));
+
+   lod_fpart = lp_build_add(bld, lod_fpart,
+                            lp_build_const_vec(bld->type, post_offset));
+
+   /*
+    * It's not necessary to clamp lod_fpart since:
+    * - the above expression will never produce numbers greater than one.
+    * - the mip filtering branch is only taken if lod_fpart is positive
+    */
+
+   *out_lod_fpart = lod_fpart;
+
+   if (0) {
+      lp_build_printf(bld->builder, "lod_ipart = %i\n", *out_lod_ipart);
+      lp_build_printf(bld->builder, "lod_fpart = %f\n\n", *out_lod_fpart);
+   }
+}
+
+
+/*
+ * Combined log2 and brilinear lod computation.
+ *
+ * It's in all identical to calling lp_build_fast_log2() and
+ * lp_build_brilinear_lod() above, but by combining we can compute the interger
+ * and fractional part independently.
+ */
+static void
+lp_build_brilinear_rho(struct lp_build_context *bld,
+                       LLVMValueRef rho,
+                       double factor,
+                       LLVMValueRef *out_lod_ipart,
+                       LLVMValueRef *out_lod_fpart)
+{
+   LLVMValueRef lod_ipart;
+   LLVMValueRef lod_fpart;
+
+   const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
+   const double post_offset = 1 - 2*factor;
+
+   assert(bld->type.floating);
+
+   assert(lp_check_value(bld->type, rho));
+
+   /*
+    * The pre factor will make the intersections with the exact powers of two
+    * happen precisely where we want then to be, which means that the integer
+    * part will not need any post adjustments.
+    */
+   rho = lp_build_mul(bld, rho,
+                      lp_build_const_vec(bld->type, pre_factor));
+
+   /* ipart = ifloor(log2(rho)) */
+   lod_ipart = lp_build_extract_exponent(bld, rho, 0);
+
+   /* fpart = rho / 2**ipart */
+   lod_fpart = lp_build_extract_mantissa(bld, rho);
+
+   lod_fpart = lp_build_mul(bld, lod_fpart,
+                            lp_build_const_vec(bld->type, factor));
+
+   lod_fpart = lp_build_add(bld, lod_fpart,
+                            lp_build_const_vec(bld->type, post_offset));
+
+   /*
+    * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
+    * - the above expression will never produce numbers greater than one.
+    * - the mip filtering branch is only taken if lod_fpart is positive
+    */
+
+   *out_lod_ipart = lod_ipart;
+   *out_lod_fpart = lod_fpart;
+}
+
+
  /**
   * Generate code to compute texture level of detail (lambda).
   * \param ddx  partial derivatives of (s, t, r, q) with respect to X
@@ -274,9 +409,6 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
                        const LLVMValueRef ddy[4],
                        LLVMValueRef lod_bias, /* optional */
                        LLVMValueRef explicit_lod, /* optional */
-                      LLVMValueRef width,
-                      LLVMValueRef height,
-                      LLVMValueRef depth,
                        unsigned mip_filter,
                        LLVMValueRef *out_lod_ipart,
                        LLVMValueRef *out_lod_fpart)
@@ -311,16 +443,32 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
  
           rho = lp_build_rho(bld, ddx, ddy);
  
-         /* compute lod = log2(rho) */
-         if ((mip_filter == PIPE_TEX_MIPFILTER_NONE ||
-              mip_filter == PIPE_TEX_MIPFILTER_NEAREST) &&
-             !lod_bias &&
+         /*
+          * Compute lod = log2(rho)
+          */
+
+         if (!lod_bias &&
               !bld->static_state->lod_bias_non_zero &&
               !bld->static_state->apply_max_lod &&
               !bld->static_state->apply_min_lod) {
-            *out_lod_ipart = lp_build_ilog2(float_bld, rho);
-            *out_lod_fpart = bld->float_bld.zero;
-            return;
+            /*
+             * Special case when there are no post-log2 adjustments, which
+             * saves instructions but keeping the integer and fractional lod
+             * computations separate from the start.
+             */
+
+            if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
+                mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
+               *out_lod_ipart = lp_build_ilog2(float_bld, rho);
+               *out_lod_fpart = bld->float_bld.zero;
+               return;
+            }
+            if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
+                BRILINEAR_FACTOR > 1.0) {
+               lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR,
+                                      out_lod_ipart, out_lod_fpart);
+               return;
+            }
           }
  
           if (0) {
@@ -359,16 +507,22 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
     }
  
     if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
-      LLVMValueRef ipart = lp_build_ifloor(float_bld, lod);
-      lp_build_name(ipart, "lod_ipart");
-      *out_lod_ipart = ipart;
-      ipart = LLVMBuildSIToFP(bld->builder, ipart, float_bld->vec_type, "");
-      *out_lod_fpart = LLVMBuildFSub(bld->builder, lod, ipart, "lod_fpart");
+      if (BRILINEAR_FACTOR > 1.0) {
+         lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR,
+                                out_lod_ipart, out_lod_fpart);
+      }
+      else {
+         lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart);
+      }
+
+      lp_build_name(*out_lod_fpart, "lod_fpart");
     }
     else {
        *out_lod_ipart = lp_build_iround(float_bld, lod);
     }
  
+   lp_build_name(*out_lod_ipart, "lod_ipart");
+
     return;
  }
  
@@ -411,27 +565,60 @@ void
  lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                             unsigned unit,
                             LLVMValueRef lod_ipart,
+                           LLVMValueRef *lod_fpart_inout,
                             LLVMValueRef *level0_out,
                             LLVMValueRef *level1_out)
  {
+   LLVMBuilderRef builder = bld->builder;
     struct lp_build_context *int_bld = &bld->int_bld;
-   LLVMValueRef last_level, level;
+   struct lp_build_context *float_bld = &bld->float_bld;
+   LLVMValueRef last_level;
+   LLVMValueRef clamp_min;
+   LLVMValueRef clamp_max;
+
+   *level0_out = lod_ipart;
+   *level1_out = lp_build_add(int_bld, lod_ipart, int_bld->one);
  
     last_level = bld->dynamic_state->last_level(bld->dynamic_state,
                                                 bld->builder, unit);
  
-   /* convert float lod to integer */
-   level = lod_ipart;
+   /*
+    * Clamp both lod_ipart and lod_ipart + 1 to [0, last_level], with the
+    * minimum number of comparisons, and zeroing lod_fpart in the extreme
+    * ends in the process.
+    */
+
+   /* lod_ipart < 0 */
+   clamp_min = LLVMBuildICmp(builder, LLVMIntSLT,
+                             lod_ipart, int_bld->zero,
+                             "clamp_lod_to_zero");
  
-   /* compute level 0 and clamp to legal range of levels */
-   *level0_out = lp_build_clamp(int_bld, level,
-                                int_bld->zero,
-                                last_level);
-   /* compute level 1 and clamp to legal range of levels */
-   level = lp_build_add(int_bld, level, int_bld->one);
-   *level1_out = lp_build_clamp(int_bld, level,
-                                int_bld->zero,
-                                last_level);
+   *level0_out = LLVMBuildSelect(builder, clamp_min,
+                                 int_bld->zero, *level0_out, "");
+
+   *level1_out = LLVMBuildSelect(builder, clamp_min,
+                                 int_bld->zero, *level1_out, "");
+
+   *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
+                                      float_bld->zero, *lod_fpart_inout, "");
+
+   /* lod_ipart >= last_level */
+   clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
+                             lod_ipart, last_level,
+                             "clamp_lod_to_last");
+
+   *level0_out = LLVMBuildSelect(builder, clamp_max,
+                                 last_level, *level0_out, "");
+
+   *level1_out = LLVMBuildSelect(builder, clamp_max,
+                                 last_level, *level1_out, "");
+
+   *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
+                                      float_bld->zero, *lod_fpart_inout, "");
+
+   lp_build_name(*level0_out, "sampler%u_miplevel0", unit);
+   lp_build_name(*level1_out, "sampler%u_miplevel1", unit);
+   lp_build_name(*lod_fpart_inout, "sampler%u_mipweight", unit);
  }
  
  
@@ -442,12 +629,12 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
   */
  LLVMValueRef
  lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
-                          LLVMValueRef data_array, LLVMValueRef level)
+                          LLVMValueRef level)
  {
     LLVMValueRef indexes[2], data_ptr;
     indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
     indexes[1] = level;
-   data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
+   data_ptr = LLVMBuildGEP(bld->builder, bld->data_array, indexes, 2, "");
     data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
     return data_ptr;
  }
@@ -455,10 +642,10 @@ lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
  
  LLVMValueRef
  lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
-                                LLVMValueRef data_array, int level)
+                                int level)
  {
     LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
-   return lp_build_get_mipmap_level(bld, data_array, lvl);
+   return lp_build_get_mipmap_level(bld, lvl);
  }
  
  
@@ -467,18 +654,22 @@ lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
   * Return max(1, base_size >> level);
   */
  static LLVMValueRef
-lp_build_minify(struct lp_build_sample_context *bld,
+lp_build_minify(struct lp_build_context *bld,
                  LLVMValueRef base_size,
                  LLVMValueRef level)
  {
-   if (level == bld->int_coord_bld.zero) {
+   assert(lp_check_value(bld->type, base_size));
+   assert(lp_check_value(bld->type, level));
+
+   if (level == bld->zero) {
        /* if we're using mipmap level zero, no minification is needed */
        return base_size;
     }
     else {
        LLVMValueRef size =
           LLVMBuildLShr(bld->builder, base_size, level, "minify");
-      size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
+      assert(bld->type.sign);
+      size = lp_build_max(bld, size, bld->one);
        return size;
     }
  }
@@ -509,71 +700,113 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
   */
  void
  lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
-                            unsigned dims,
-                            LLVMValueRef width_vec,
-                            LLVMValueRef height_vec,
-                            LLVMValueRef depth_vec,
-                            LLVMValueRef ilevel0,
-                            LLVMValueRef ilevel1,
-                            LLVMValueRef row_stride_array,
-                            LLVMValueRef img_stride_array,
-                            LLVMValueRef *width0_vec,
-                            LLVMValueRef *width1_vec,
-                            LLVMValueRef *height0_vec,
-                            LLVMValueRef *height1_vec,
-                            LLVMValueRef *depth0_vec,
-                            LLVMValueRef *depth1_vec,
-                            LLVMValueRef *row_stride0_vec,
-                            LLVMValueRef *row_stride1_vec,
-                            LLVMValueRef *img_stride0_vec,
-                            LLVMValueRef *img_stride1_vec)
+                            LLVMValueRef ilevel,
+                            LLVMValueRef *out_size,
+                            LLVMValueRef *row_stride_vec,
+                            LLVMValueRef *img_stride_vec)
  {
-   const unsigned mip_filter = bld->static_state->min_mip_filter;
-   LLVMValueRef ilevel0_vec, ilevel1_vec;
+   const unsigned dims = bld->dims;
+   LLVMValueRef ilevel_vec;
  
-   ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
-   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
-      ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
+   ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
  
     /*
-    * Compute width, height, depth at mipmap level 'ilevel0'
+    * Compute width, height, depth at mipmap level 'ilevel'
      */
-   *width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
+   *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
+
     if (dims >= 2) {
-      *height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
-      *row_stride0_vec = lp_build_get_level_stride_vec(bld,
-                                                       row_stride_array,
-                                                       ilevel0);
+      *row_stride_vec = lp_build_get_level_stride_vec(bld,
+                                                      bld->row_stride_array,
+                                                      ilevel);
        if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
-         *img_stride0_vec = lp_build_get_level_stride_vec(bld,
-                                                          img_stride_array,
-                                                          ilevel0);
-         if (dims == 3) {
-            *depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
-         }
+         *img_stride_vec = lp_build_get_level_stride_vec(bld,
+                                                         bld->img_stride_array,
+                                                         ilevel);
        }
     }
-   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
-      /* compute width, height, depth for second mipmap level at 'ilevel1' */
-      *width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
-      if (dims >= 2) {
-         *height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
-         *row_stride1_vec = lp_build_get_level_stride_vec(bld,
-                                                          row_stride_array,
-                                                          ilevel1);
-         if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
-            *img_stride1_vec = lp_build_get_level_stride_vec(bld,
-                                                             img_stride_array,
-                                                             ilevel1);
-            if (dims == 3) {
-               *depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
-            }
-         }
+}
+
+
+/**
+ * Extract and broadcast texture size.
+ *
+ * @param size_type   type of the texture size vector (either
+ *                    bld->int_size_type or bld->float_size_type)
+ * @param coord_type  type of the texture size vector (either
+ *                    bld->int_coord_type or bld->coord_type)
+ * @param int_size    vector with the integer texture size (width, height,
+ *                    depth)
+ */
+void
+lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
+                             struct lp_type size_type,
+                             struct lp_type coord_type,
+                             LLVMValueRef size,
+                             LLVMValueRef *out_width,
+                             LLVMValueRef *out_height,
+                             LLVMValueRef *out_depth)
+{
+   const unsigned dims = bld->dims;
+   LLVMTypeRef i32t = LLVMInt32Type();
+
+   *out_width = lp_build_extract_broadcast(bld->builder,
+                                           size_type,
+                                           coord_type,
+                                           size,
+                                           LLVMConstInt(i32t, 0, 0));
+   if (dims >= 2) {
+      *out_height = lp_build_extract_broadcast(bld->builder,
+                                               size_type,
+                                               coord_type,
+                                               size,
+                                               LLVMConstInt(i32t, 1, 0));
+      if (dims == 3) {
+         *out_depth = lp_build_extract_broadcast(bld->builder,
+                                                 size_type,
+                                                 coord_type,
+                                                 size,
+                                                 LLVMConstInt(i32t, 2, 0));
        }
     }
  }
  
  
+/**
+ * Unnormalize coords.
+ *
+ * @param int_size  vector with the integer texture size (width, height, depth)
+ */
+void
+lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
+                             LLVMValueRef flt_size,
+                             LLVMValueRef *s,
+                             LLVMValueRef *t,
+                             LLVMValueRef *r)
+{
+   const unsigned dims = bld->dims;
+   LLVMValueRef width;
+   LLVMValueRef height;
+   LLVMValueRef depth;
+
+   lp_build_extract_image_sizes(bld,
+                                bld->float_size_type,
+                                bld->coord_type,
+                                flt_size,
+                                &width,
+                                &height,
+                                &depth);
+
+   /* s = s * width, t = t * height */
+   *s = lp_build_mul(&bld->coord_bld, *s, width);
+   if (dims >= 2) {
+      *t = lp_build_mul(&bld->coord_bld, *t, height);
+      if (dims >= 3) {
+         *r = lp_build_mul(&bld->coord_bld, *r, depth);
+      }
+   }
+}
+
  
  /** Helper used by lp_build_cube_lookup() */
  static LLVMValueRef