gallivm: Fix mipfiltering with negative lod bias.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_soa.c
index 5b56f2cf3b7b6cfd31642ca9738c0546b845a6d9..3b1e6df4ec3e1d3590036e4c299ba3048dc5a301 100644 (file)
@@ -30,6 +30,7 @@
  * Texture sampling -- SoA.
  *
  * @author Jose Fonseca <jfonseca@vmware.com>
+ * @author Brian Paul <brianp@vmware.com>
  */
 
 #include "pipe/p_defines.h"
@@ -185,6 +186,21 @@ texture_dims(enum pipe_texture_target tex)
 }
 
 
+static void
+apply_sampler_swizzle(struct lp_build_sample_context *bld,
+                      LLVMValueRef *texel)
+{
+   unsigned char swizzles[4];
+
+   swizzles[0] = bld->static_state->swizzle_r;
+   swizzles[1] = bld->static_state->swizzle_g;
+   swizzles[2] = bld->static_state->swizzle_b;
+   swizzles[3] = bld->static_state->swizzle_a;
+
+   lp_build_swizzle_soa_inplace(&bld->texel_bld, texel, swizzles);
+}
+
+
 
 /**
  * Generate code to fetch a texel from a texture at int coords (x, y, z).
@@ -211,7 +227,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
    const int dims = texture_dims(bld->static_state->target);
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    LLVMValueRef offset;
-   LLVMValueRef packed;
+   LLVMValueRef i, j;
    LLVMValueRef use_border = NULL;
 
    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
@@ -248,6 +264,57 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
       }
    }
 
+   /*
+    * Describe the coordinates in terms of pixel blocks.
+    *
+    * TODO: pixel blocks are power of two. LLVM should convert rem/div to
+    * bit arithmetic. Verify this.
+    */
+
+   if (bld->format_desc->block.width == 1) {
+      i = bld->uint_coord_bld.zero;
+   }
+   else {
+      LLVMValueRef block_width = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.width);
+      i = LLVMBuildURem(bld->builder, x, block_width, "");
+      x = LLVMBuildUDiv(bld->builder, x, block_width, "");
+   }
+
+   if (bld->format_desc->block.height == 1) {
+      j = bld->uint_coord_bld.zero;
+   }
+   else {
+      LLVMValueRef block_height = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.height);
+      j = LLVMBuildURem(bld->builder, y, block_height, "");
+      y = LLVMBuildUDiv(bld->builder, y, block_height, "");
+   }
+
+   /* convert x,y,z coords to linear offset from start of texture, in bytes */
+   offset = lp_build_sample_offset(&bld->uint_coord_bld,
+                                   bld->format_desc,
+                                   x, y, z, y_stride, z_stride);
+
+   if (use_border) {
+      /* If we can sample the border color, it means that texcoords may
+       * lie outside the bounds of the texture image.  We need to do
+       * something to prevent reading out of bounds and causing a segfault.
+       *
+       * Simply AND the texture coords with !use_border.  This will cause
+       * coords which are out of bounds to become zero.  Zero's guaranteed
+       * to be inside the texture image.
+       */
+      offset = lp_build_andc(&bld->uint_coord_bld, offset, use_border);
+   }
+
+   lp_build_fetch_rgba_soa(bld->builder,
+                           bld->format_desc,
+                           bld->texel_type,
+                           data_ptr, offset,
+                           i, j,
+                           texel);
+
+   apply_sampler_swizzle(bld, texel);
+
    /*
     * Note: if we find an app which frequently samples the texture border
     * we might want to implement a true conditional here to avoid sampling
@@ -263,30 +330,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
     * the texel color results with the border color.
     */
 
-   /* convert x,y,z coords to linear offset from start of texture, in bytes */
-   offset = lp_build_sample_offset(&bld->uint_coord_bld,
-                                   bld->format_desc,
-                                   x, y, z, y_stride, z_stride);
-
-   assert(bld->format_desc->block.width == 1);
-   assert(bld->format_desc->block.height == 1);
-   assert(bld->format_desc->block.bits <= bld->texel_type.width);
-
-   /* gather the texels from the texture */
-   packed = lp_build_gather(bld->builder,
-                            bld->texel_type.length,
-                            bld->format_desc->block.bits,
-                            bld->texel_type.width,
-                            data_ptr, offset);
-
-   texel[0] = texel[1] = texel[2] = texel[3] = NULL;
-
-   /* convert texels to float rgba */
-   lp_build_unpack_rgba_soa(bld->builder,
-                            bld->format_desc,
-                            bld->texel_type,
-                            packed, texel);
-
    if (use_border) {
       /* select texel color or border color depending on use_border */
       int chan;
@@ -829,15 +872,24 @@ lp_build_minify(struct lp_build_sample_context *bld,
  * \param s  vector of texcoord s values
  * \param t  vector of texcoord t values
  * \param r  vector of texcoord r values
+ * \param lod_bias  optional float vector with the shader lod bias
+ * \param explicit_lod  optional float vector with the explicit lod
  * \param width  scalar int texture width
  * \param height  scalar int texture height
  * \param depth  scalar int texture depth
+ *
+ * XXX: The resulting lod is scalar, so ignore all but the first element of
+ * derivatives, lod_bias, etc that are passed by the shader.
  */
 static LLVMValueRef
 lp_build_lod_selector(struct lp_build_sample_context *bld,
                       LLVMValueRef s,
                       LLVMValueRef t,
                       LLVMValueRef r,
+                      const LLVMValueRef *ddx,
+                      const LLVMValueRef *ddy,
+                      LLVMValueRef lod_bias, /* optional */
+                      LLVMValueRef explicit_lod, /* optional */
                       LLVMValueRef width,
                       LLVMValueRef height,
                       LLVMValueRef depth)
@@ -850,85 +902,83 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
       return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
    }
    else {
-      const int dims = texture_dims(bld->static_state->target);
       struct lp_build_context *float_bld = &bld->float_bld;
-      LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(),
-                                            bld->static_state->lod_bias);
+      LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
+                                                    bld->static_state->lod_bias);
       LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
                                            bld->static_state->min_lod);
       LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
                                            bld->static_state->max_lod);
-
       LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
-      LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
-      LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
-
-      LLVMValueRef s0, s1, s2;
-      LLVMValueRef t0, t1, t2;
-      LLVMValueRef r0, r1, r2;
-      LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
-      LLVMValueRef rho, lod;
-
-      /*
-       * dsdx = abs(s[1] - s[0]);
-       * dsdy = abs(s[2] - s[0]);
-       * dtdx = abs(t[1] - t[0]);
-       * dtdy = abs(t[2] - t[0]);
-       * drdx = abs(r[1] - r[0]);
-       * drdy = abs(r[2] - r[0]);
-       * XXX we're assuming a four-element quad in 2x2 layout here.
-       */
-      s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
-      s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
-      s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
-      dsdx = LLVMBuildSub(bld->builder, s1, s0, "");
-      dsdx = lp_build_abs(float_bld, dsdx);
-      dsdy = LLVMBuildSub(bld->builder, s2, s0, "");
-      dsdy = lp_build_abs(float_bld, dsdy);
-      if (dims > 1) {
-         t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
-         t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
-         t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
-         dtdx = LLVMBuildSub(bld->builder, t1, t0, "");
-         dtdx = lp_build_abs(float_bld, dtdx);
-         dtdy = LLVMBuildSub(bld->builder, t2, t0, "");
-         dtdy = lp_build_abs(float_bld, dtdy);
-         if (dims > 2) {
-            r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
-            r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
-            r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
-            drdx = LLVMBuildSub(bld->builder, r1, r0, "");
-            drdx = lp_build_abs(float_bld, drdx);
-            drdy = LLVMBuildSub(bld->builder, r2, r0, "");
-            drdy = lp_build_abs(float_bld, drdy);
-         }
+      LLVMValueRef lod;
+
+      if (explicit_lod) {
+         lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
+                                       index0, "");
       }
+      else {
+         const int dims = texture_dims(bld->static_state->target);
+         LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
+         LLVMValueRef rho;
+
+         /*
+          * dsdx = abs(s[1] - s[0]);
+          * dsdy = abs(s[2] - s[0]);
+          * dtdx = abs(t[1] - t[0]);
+          * dtdy = abs(t[2] - t[0]);
+          * drdx = abs(r[1] - r[0]);
+          * drdy = abs(r[2] - r[0]);
+          */
+         dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
+         dsdx = lp_build_abs(float_bld, dsdx);
+         dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
+         dsdy = lp_build_abs(float_bld, dsdy);
+         if (dims > 1) {
+            dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
+            dtdx = lp_build_abs(float_bld, dtdx);
+            dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
+            dtdy = lp_build_abs(float_bld, dtdy);
+            if (dims > 2) {
+               drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
+               drdx = lp_build_abs(float_bld, drdx);
+               drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
+               drdy = lp_build_abs(float_bld, drdy);
+            }
+         }
 
-      /* Compute rho = max of all partial derivatives scaled by texture size.
-       * XXX this could be vectorized somewhat
-       */
-      rho = LLVMBuildMul(bld->builder,
-                         lp_build_max(float_bld, dsdx, dsdy),
-                         lp_build_int_to_float(float_bld, width), "");
-      if (dims > 1) {
-         LLVMValueRef max;
-         max = LLVMBuildMul(bld->builder,
-                            lp_build_max(float_bld, dtdx, dtdy),
-                            lp_build_int_to_float(float_bld, height), "");
-         rho = lp_build_max(float_bld, rho, max);
-         if (dims > 2) {
+         /* Compute rho = max of all partial derivatives scaled by texture size.
+          * XXX this could be vectorized somewhat
+          */
+         rho = LLVMBuildMul(bld->builder,
+                            lp_build_max(float_bld, dsdx, dsdy),
+                            lp_build_int_to_float(float_bld, width), "");
+         if (dims > 1) {
+            LLVMValueRef max;
             max = LLVMBuildMul(bld->builder,
-                               lp_build_max(float_bld, drdx, drdy),
-                               lp_build_int_to_float(float_bld, depth), "");
+                               lp_build_max(float_bld, dtdx, dtdy),
+                               lp_build_int_to_float(float_bld, height), "");
             rho = lp_build_max(float_bld, rho, max);
+            if (dims > 2) {
+               max = LLVMBuildMul(bld->builder,
+                                  lp_build_max(float_bld, drdx, drdy),
+                                  lp_build_int_to_float(float_bld, depth), "");
+               rho = lp_build_max(float_bld, rho, max);
+            }
          }
-      }
 
-      /* compute lod = log2(rho) */
-      lod = lp_build_log2(float_bld, rho);
+         /* compute lod = log2(rho) */
+         lod = lp_build_log2(float_bld, rho);
+
+         /* add shader lod bias */
+         if (lod_bias) {
+            lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias,
+                                               index0, "");
+            lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
+         }
+      }
 
-      /* add lod bias */
-      lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias");
+      /* add sampler lod bias */
+      lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
 
       /* clamp lod */
       lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
@@ -996,8 +1046,10 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                                 int_bld->zero,
                                 last_level);
    /* compute level 1 and clamp to legal range of levels */
-   *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one);
-   *level1_out = lp_build_min(int_bld, *level1_out, last_level);
+   level = lp_build_add(int_bld, level, int_bld->one);
+   *level1_out = lp_build_clamp(int_bld, level,
+                                int_bld->zero,
+                                last_level);
 
    *weight_out = lp_build_fract(float_bld, lod);
 }
@@ -1514,6 +1566,10 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
                         LLVMValueRef s,
                         LLVMValueRef t,
                         LLVMValueRef r,
+                        const LLVMValueRef *ddx,
+                        const LLVMValueRef *ddy,
+                        LLVMValueRef lod_bias, /* optional */
+                        LLVMValueRef explicit_lod, /* optional */
                         LLVMValueRef width,
                         LLVMValueRef height,
                         LLVMValueRef depth,
@@ -1521,7 +1577,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
                         LLVMValueRef height_vec,
                         LLVMValueRef depth_vec,
                         LLVMValueRef row_stride_array,
-                        LLVMValueRef img_stride_vec,
+                        LLVMValueRef img_stride_array,
                         LLVMValueRef data_array,
                         LLVMValueRef *colors_out)
 {
@@ -1530,13 +1586,13 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
    const unsigned min_filter = bld->static_state->min_img_filter;
    const unsigned mag_filter = bld->static_state->mag_img_filter;
    const int dims = texture_dims(bld->static_state->target);
-   LLVMValueRef lod, lod_fpart;
-   LLVMValueRef ilevel0, ilevel1, ilevel0_vec, ilevel1_vec;
+   LLVMValueRef lod = NULL, lod_fpart = NULL;
+   LLVMValueRef ilevel0, ilevel1 = NULL, ilevel0_vec, ilevel1_vec = NULL;
    LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
    LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
    LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
    LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
-   LLVMValueRef data_ptr0, data_ptr1;
+   LLVMValueRef data_ptr0, data_ptr1 = NULL;
 
    /*
    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
@@ -1551,7 +1607,9 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
       /* Need to compute lod either to choose mipmap levels or to
        * distinguish between minification/magnification with one mipmap level.
        */
-      lod = lp_build_lod_selector(bld, s, t, r, width, height, depth);
+      lod = lp_build_lod_selector(bld, s, t, r, ddx, ddy,
+                                  lod_bias, explicit_lod,
+                                  width, height, depth);
    }
 
    /*
@@ -1589,8 +1647,9 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
       row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
                                                       ilevel0);
       if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
-         img_stride0_vec = lp_build_mul(&bld->int_coord_bld,
-                                        row_stride0_vec, height0_vec);
+         img_stride0_vec = lp_build_get_level_stride_vec(bld,
+                                                         img_stride_array,
+                                                         ilevel0);
          if (dims == 3) {
             depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
          }
@@ -1604,8 +1663,9 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
          row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
                                                          ilevel1);
          if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
-            img_stride1_vec = lp_build_mul(&bld->int_coord_bld,
-                                           row_stride1_vec, height1_vec);
+            img_stride1_vec = lp_build_get_level_stride_vec(bld,
+                                                            img_stride_array,
+                                                            ilevel1);
             if (dims ==3) {
                depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
             }
@@ -1757,6 +1817,11 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
    LLVMValueRef unswizzled[4];
    LLVMValueRef stride;
 
+   assert(bld->static_state->target == PIPE_TEXTURE_2D);
+   assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
+   assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
+   assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE);
+
    lp_build_context_init(&i32, builder, lp_type_int_vec(32));
    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
@@ -1928,8 +1993,10 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
                              packed, unswizzled);
 
    lp_build_format_swizzle_soa(bld->format_desc,
-                               bld->texel_type, unswizzled,
-                               texel);
+                               &bld->texel_bld,
+                               unswizzled, texel);
+
+   apply_sampler_swizzle(bld, texel);
 }
 
 
@@ -1968,6 +2035,24 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
 }
 
 
+/**
+ * Just set texels to white instead of actually sampling the texture.
+ * For debugging.
+ */
+static void
+lp_build_sample_nop(struct lp_build_sample_context *bld,
+                    LLVMValueRef *texel)
+{
+   struct lp_build_context *texel_bld = &bld->texel_bld;
+   unsigned chan;
+
+   for (chan = 0; chan < 4; chan++) {
+      /*lp_bld_mov(texel_bld, texel, texel_bld->one);*/
+      texel[chan] = texel_bld->one;
+   }  
+}
+
+
 /**
  * Build texture sampling code.
  * 'texel' will return a vector of four LLVMValueRefs corresponding to
@@ -1982,24 +2067,22 @@ lp_build_sample_soa(LLVMBuilderRef builder,
                     unsigned unit,
                     unsigned num_coords,
                     const LLVMValueRef *coords,
-                    LLVMValueRef lodbias,
+                    const LLVMValueRef *ddx,
+                    const LLVMValueRef *ddy,
+                    LLVMValueRef lod_bias, /* optional */
+                    LLVMValueRef explicit_lod, /* optional */
                     LLVMValueRef *texel)
 {
    struct lp_build_sample_context bld;
    LLVMValueRef width, width_vec;
    LLVMValueRef height, height_vec;
    LLVMValueRef depth, depth_vec;
-   LLVMValueRef stride_array;
+   LLVMValueRef row_stride_array, img_stride_array;
    LLVMValueRef data_array;
    LLVMValueRef s;
    LLVMValueRef t;
    LLVMValueRef r;
 
-   (void) lp_build_lod_selector;   /* temporary to silence warning */
-   (void) lp_build_nearest_mip_level;
-   (void) lp_build_linear_mip_levels;
-   (void) lp_build_minify;
-
    /* Setup our build context */
    memset(&bld, 0, sizeof bld);
    bld.builder = builder;
@@ -2025,7 +2108,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    width = dynamic_state->width(dynamic_state, builder, unit);
    height = dynamic_state->height(dynamic_state, builder, unit);
    depth = dynamic_state->depth(dynamic_state, builder, unit);
-   stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
+   row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
+   img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit);
    data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
    /* Note that data_array is an array[level] of pointers to texture images */
 
@@ -2037,22 +2121,28 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
    depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
 
-   if (util_format_is_rgba8_variant(bld.format_desc) &&
-       static_state->target == PIPE_TEXTURE_2D &&
-       static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
-       static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
-       static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
-       is_simple_wrap_mode(static_state->wrap_s) &&
-       is_simple_wrap_mode(static_state->wrap_t)) {
+   if (0) {
+      /* For debug: no-op texture sampling */
+      lp_build_sample_nop(&bld, texel);
+   }
+   else if (util_format_is_rgba8_variant(bld.format_desc) &&
+            static_state->target == PIPE_TEXTURE_2D &&
+            static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
+            static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
+            static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
+            is_simple_wrap_mode(static_state->wrap_s) &&
+            is_simple_wrap_mode(static_state->wrap_t)) {
       /* special case */
       lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
-                                    stride_array, data_array, texel);
+                                    row_stride_array, data_array, texel);
    }
    else {
-      lp_build_sample_general(&bld, unit, s, t, r,
+      lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy,
+                              lod_bias, explicit_lod,
                               width, height, depth,
                               width_vec, height_vec, depth_vec,
-                              stride_array, NULL, data_array,
+                              row_stride_array, img_stride_array,
+                              data_array,
                               texel);
    }