gallivm: fix lp_build_sample_offset() crash when indexing a 1-D texture
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_soa.c
index 395eaaba2692b585cff2036ff2d8109dae041a28..1a20d74cac8b5295487054f3e083adcb182be883 100644 (file)
@@ -30,6 +30,7 @@
  * Texture sampling -- SoA.
  *
  * @author Jose Fonseca <jfonseca@vmware.com>
+ * @author Brian Paul <brianp@vmware.com>
  */
 
 #include "pipe/p_defines.h"
 #include "lp_bld_swizzle.h"
 #include "lp_bld_pack.h"
 #include "lp_bld_flow.h"
+#include "lp_bld_gather.h"
 #include "lp_bld_format.h"
 #include "lp_bld_sample.h"
+#include "lp_bld_quad.h"
 
 
 /**
@@ -185,6 +188,21 @@ texture_dims(enum pipe_texture_target tex)
 }
 
 
+static void
+apply_sampler_swizzle(struct lp_build_sample_context *bld,
+                      LLVMValueRef *texel)
+{
+   unsigned char swizzles[4];
+
+   swizzles[0] = bld->static_state->swizzle_r;
+   swizzles[1] = bld->static_state->swizzle_g;
+   swizzles[2] = bld->static_state->swizzle_b;
+   swizzles[3] = bld->static_state->swizzle_a;
+
+   lp_build_swizzle_soa_inplace(&bld->texel_bld, texel, swizzles);
+}
+
+
 
 /**
  * Generate code to fetch a texel from a texture at int coords (x, y, z).
@@ -206,7 +224,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
                           LLVMValueRef y_stride,
                           LLVMValueRef z_stride,
                           LLVMValueRef data_ptr,
-                          LLVMValueRef *texel)
+                          LLVMValueRef texel_out[4])
 {
    const int dims = texture_dims(bld->static_state->target);
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
@@ -248,42 +266,32 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
       }
    }
 
-   /*
-    * Describe the coordinates in terms of pixel blocks.
-    *
-    * TODO: pixel blocks are power of two. LLVM should convert rem/div to
-    * bit arithmetic. Verify this.
-    */
-
-   if (bld->format_desc->block.width == 1) {
-      i = bld->uint_coord_bld.zero;
-   }
-   else {
-      LLVMValueRef block_width = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.width);
-      i = LLVMBuildURem(bld->builder, x, block_width, "");
-      x = LLVMBuildUDiv(bld->builder, x, block_width, "");
-   }
+   /* convert x,y,z coords to linear offset from start of texture, in bytes */
+   lp_build_sample_offset(&bld->uint_coord_bld,
+                          bld->format_desc,
+                          x, y, z, y_stride, z_stride,
+                          &offset, &i, &j);
 
-   if (bld->format_desc->block.height == 1) {
-      j = bld->uint_coord_bld.zero;
-   }
-   else {
-      LLVMValueRef block_height = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.height);
-      j = LLVMBuildURem(bld->builder, y, block_height, "");
-      y = LLVMBuildUDiv(bld->builder, y, block_height, "");
+   if (use_border) {
+      /* If we can sample the border color, it means that texcoords may
+       * lie outside the bounds of the texture image.  We need to do
+       * something to prevent reading out of bounds and causing a segfault.
+       *
+       * Simply AND the texture coords with !use_border.  This will cause
+       * coords which are out of bounds to become zero.  Zero's guaranteed
+       * to be inside the texture image.
+       */
+      offset = lp_build_andc(&bld->uint_coord_bld, offset, use_border);
    }
 
-   /* convert x,y,z coords to linear offset from start of texture, in bytes */
-   offset = lp_build_sample_offset(&bld->uint_coord_bld,
-                                   bld->format_desc,
-                                   x, y, z, y_stride, z_stride);
-
    lp_build_fetch_rgba_soa(bld->builder,
                            bld->format_desc,
                            bld->texel_type,
                            data_ptr, offset,
                            i, j,
-                           texel);
+                           texel_out);
+
+   apply_sampler_swizzle(bld, texel_out);
 
    /*
     * Note: if we find an app which frequently samples the texture border
@@ -307,13 +315,16 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
          LLVMValueRef border_chan =
             lp_build_const_vec(bld->texel_type,
                                   bld->static_state->border_color[chan]);
-         texel[chan] = lp_build_select(&bld->texel_bld, use_border,
-                                       border_chan, texel[chan]);
+         texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
+                                           border_chan, texel_out[chan]);
       }
    }
 }
 
 
+/**
+ * Fetch the texels as <4n x i8> in AoS form.
+ */
 static LLVMValueRef
 lp_build_sample_packed(struct lp_build_sample_context *bld,
                        LLVMValueRef x,
@@ -321,25 +332,46 @@ lp_build_sample_packed(struct lp_build_sample_context *bld,
                        LLVMValueRef y_stride,
                        LLVMValueRef data_array)
 {
-   LLVMValueRef offset;
+   LLVMValueRef offset, i, j;
    LLVMValueRef data_ptr;
+   LLVMValueRef res;
 
-   offset = lp_build_sample_offset(&bld->uint_coord_bld,
-                                   bld->format_desc,
-                                   x, y, NULL, y_stride, NULL);
-
-   assert(bld->format_desc->block.width == 1);
-   assert(bld->format_desc->block.height == 1);
-   assert(bld->format_desc->block.bits <= bld->texel_type.width);
+   /* convert x,y,z coords to linear offset from start of texture, in bytes */
+   lp_build_sample_offset(&bld->uint_coord_bld,
+                          bld->format_desc,
+                          x, y, NULL, y_stride, NULL,
+                          &offset, &i, &j);
 
    /* get pointer to mipmap level 0 data */
    data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
 
-   return lp_build_gather(bld->builder,
-                          bld->texel_type.length,
-                          bld->format_desc->block.bits,
-                          bld->texel_type.width,
-                          data_ptr, offset);
+   if (util_format_is_rgba8_variant(bld->format_desc)) {
+      /* Just fetch the data directly without swizzling */
+      assert(bld->format_desc->block.width == 1);
+      assert(bld->format_desc->block.height == 1);
+      assert(bld->format_desc->block.bits <= bld->texel_type.width);
+
+      res = lp_build_gather(bld->builder,
+                            bld->texel_type.length,
+                            bld->format_desc->block.bits,
+                            bld->texel_type.width,
+                            data_ptr, offset);
+   }
+   else {
+      struct lp_type type;
+
+      assert(bld->texel_type.width == 32);
+
+      memset(&type, 0, sizeof type);
+      type.width = 8;
+      type.length = bld->texel_type.length*4;
+      type.norm = TRUE;
+
+      res = lp_build_fetch_rgba_aos(bld->builder, bld->format_desc, type,
+                                    data_ptr, offset, i, j);
+   }
+
+   return res;
 }
 
 
@@ -385,10 +417,8 @@ is_simple_wrap_mode(unsigned mode)
 {
    switch (mode) {
    case PIPE_TEX_WRAP_REPEAT:
-   case PIPE_TEX_WRAP_CLAMP:
    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
       return TRUE;
-   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
    default:
       return FALSE;
    }
@@ -425,24 +455,17 @@ lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
          coord = LLVMBuildURem(bld->builder, coord, length, "");
       break;
 
-   case PIPE_TEX_WRAP_CLAMP:
    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
       break;
 
+   case PIPE_TEX_WRAP_CLAMP:
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
    case PIPE_TEX_WRAP_MIRROR_REPEAT:
    case PIPE_TEX_WRAP_MIRROR_CLAMP:
    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
-      /* FIXME */
-      _debug_printf("llvmpipe: failed to translate texture wrap mode %s\n",
-                    util_dump_tex_wrap(wrap_mode, TRUE));
-      coord = lp_build_max(uint_coord_bld, coord, uint_coord_bld->zero);
-      coord = lp_build_min(uint_coord_bld, coord, length_minus_one);
-      break;
-
    default:
       assert(0);
    }
@@ -470,11 +493,9 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
    struct lp_build_context *coord_bld = &bld->coord_bld;
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
-   LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0);
    LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
-   LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
    LLVMValueRef coord0, coord1, weight;
 
    switch(wrap_mode) {
@@ -502,16 +523,18 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
 
    case PIPE_TEX_WRAP_CLAMP:
       if (bld->static_state->normalized_coords) {
+         /* scale coord to length */
          coord = lp_build_mul(coord_bld, coord, length_f);
       }
+
+      /* clamp to [0, length] */
+      coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
+
+      coord = lp_build_sub(coord_bld, coord, half);
+
       weight = lp_build_fract(coord_bld, coord);
-      coord0 = lp_build_clamp(coord_bld, coord, coord_bld->zero,
-                              length_f_minus_one);
-      coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
-      coord1 = lp_build_clamp(coord_bld, coord1, coord_bld->zero,
-                              length_f_minus_one);
-      coord0 = lp_build_ifloor(coord_bld, coord0);
-      coord1 = lp_build_ifloor(coord_bld, coord1);
+      coord0 = lp_build_ifloor(coord_bld, coord);
+      coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
       break;
 
    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
@@ -525,7 +548,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
       else {
          LLVMValueRef min, max;
          /* clamp to [0.5, length - 0.5] */
-         min = lp_build_const_vec(coord_bld->type, 0.5F);
+         min = half;
          max = lp_build_sub(coord_bld, length_f, min);
          coord = lp_build_clamp(coord_bld, coord, min, max);
       }
@@ -544,25 +567,14 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
       {
          LLVMValueRef min, max;
          if (bld->static_state->normalized_coords) {
-            /* min = -1.0 / (2 * length) = -0.5 / length */
-            min = lp_build_mul(coord_bld,
-                               lp_build_const_vec(coord_bld->type, -0.5F),
-                               lp_build_rcp(coord_bld, length_f));
-            /* max = 1.0 - min */
-            max = lp_build_sub(coord_bld, coord_bld->one, min);
-            /* coord = clamp(coord, min, max) */
-            coord = lp_build_clamp(coord_bld, coord, min, max);
-            /* scale coord to length (and sub 0.5?) */
+            /* scale coord to length */
             coord = lp_build_mul(coord_bld, coord, length_f);
-            coord = lp_build_sub(coord_bld, coord, half);
-         }
-         else {
-            /* clamp to [-0.5, length + 0.5] */
-            min = lp_build_const_vec(coord_bld->type, -0.5F);
-            max = lp_build_sub(coord_bld, length_f, min);
-            coord = lp_build_clamp(coord_bld, coord, min, max);
-            coord = lp_build_sub(coord_bld, coord, half);
          }
+         /* clamp to [-0.5, length + 0.5] */
+         min = lp_build_const_vec(coord_bld->type, -0.5F);
+         max = lp_build_sub(coord_bld, length_f, min);
+         coord = lp_build_clamp(coord_bld, coord, min, max);
+         coord = lp_build_sub(coord_bld, coord, half);
          /* compute lerp weight */
          weight = lp_build_fract(coord_bld, coord);
          /* convert to int */
@@ -593,35 +605,41 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
       break;
 
    case PIPE_TEX_WRAP_MIRROR_CLAMP:
-      {
-         LLVMValueRef min, max;
-         /* min = 1.0 / (2 * length) */
-         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
-         /* max = 1.0 - min */
-         max = lp_build_sub(coord_bld, coord_bld->one, min);
+      coord = lp_build_abs(coord_bld, coord);
 
-         coord = lp_build_abs(coord_bld, coord);
-         coord = lp_build_clamp(coord_bld, coord, min, max);
+      if (bld->static_state->normalized_coords) {
+         /* scale coord to length */
          coord = lp_build_mul(coord_bld, coord, length_f);
-         if(0)coord = lp_build_sub(coord_bld, coord, half);
-         weight = lp_build_fract(coord_bld, coord);
-         coord0 = lp_build_ifloor(coord_bld, coord);
-         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
       }
+
+      /* clamp to [0, length] */
+      coord = lp_build_min(coord_bld, coord, length_f);
+
+      coord = lp_build_sub(coord_bld, coord, half);
+
+      weight = lp_build_fract(coord_bld, coord);
+      coord0 = lp_build_ifloor(coord_bld, coord);
+      coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
       break;
 
    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
       {
          LLVMValueRef min, max;
-         /* min = 1.0 / (2 * length) */
-         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
-         /* max = 1.0 - min */
-         max = lp_build_sub(coord_bld, coord_bld->one, min);
 
          coord = lp_build_abs(coord_bld, coord);
+
+         if (bld->static_state->normalized_coords) {
+            /* scale coord to length */
+            coord = lp_build_mul(coord_bld, coord, length_f);
+         }
+
+         /* clamp to [0.5, length - 0.5] */
+         min = half;
+         max = lp_build_sub(coord_bld, length_f, min);
          coord = lp_build_clamp(coord_bld, coord, min, max);
-         coord = lp_build_mul(coord_bld, coord, length_f);
+
          coord = lp_build_sub(coord_bld, coord, half);
+
          weight = lp_build_fract(coord_bld, coord);
          coord0 = lp_build_ifloor(coord_bld, coord);
          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
@@ -631,17 +649,21 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
       {
          LLVMValueRef min, max;
-         /* min = -1.0 / (2 * length) = -0.5 / length */
-         min = lp_build_mul(coord_bld,
-                            lp_build_const_vec(coord_bld->type, -0.5F),
-                            lp_build_rcp(coord_bld, length_f));
-         /* max = 1.0 - min */
-         max = lp_build_sub(coord_bld, coord_bld->one, min);
 
          coord = lp_build_abs(coord_bld, coord);
+
+         if (bld->static_state->normalized_coords) {
+            /* scale coord to length */
+            coord = lp_build_mul(coord_bld, coord, length_f);
+         }
+
+         /* clamp to [-0.5, length + 0.5] */
+         min = lp_build_negate(coord_bld, half);
+         max = lp_build_sub(coord_bld, length_f, min);
          coord = lp_build_clamp(coord_bld, coord, min, max);
-         coord = lp_build_mul(coord_bld, coord, length_f);
+
          coord = lp_build_sub(coord_bld, coord, half);
+
          weight = lp_build_fract(coord_bld, coord);
          coord0 = lp_build_ifloor(coord_bld, coord);
          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
@@ -678,10 +700,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
    struct lp_build_context *coord_bld = &bld->coord_bld;
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
-   LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0);
    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
-   LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
    LLVMValueRef icoord;
    
    switch(wrap_mode) {
@@ -697,120 +717,80 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
       break;
 
    case PIPE_TEX_WRAP_CLAMP:
-      /* mul by size */
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
       if (bld->static_state->normalized_coords) {
+         /* scale coord to length */
          coord = lp_build_mul(coord_bld, coord, length_f);
       }
+
       /* floor */
       icoord = lp_build_ifloor(coord_bld, coord);
-      /* clamp to [0, size-1].  Note: int coord builder type */
+
+      /* clamp to [0, length - 1]. */
       icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
                               length_minus_one);
       break;
 
-   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-      {
-         LLVMValueRef min, max;
-         if (bld->static_state->normalized_coords) {
-            /* min = 1.0 / (2 * length) */
-            min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
-            /* max = length - min */
-            max = lp_build_sub(coord_bld, length_f, min);
-            /* scale coord to length */
-            coord = lp_build_mul(coord_bld, coord, length_f);
-         }
-         else {
-            /* clamp to [0.5, length - 0.5] */
-            min = lp_build_const_vec(coord_bld->type, 0.5F);
-            max = lp_build_sub(coord_bld, length_f, min);
-         }
-         /* coord = clamp(coord, min, max) */
-         coord = lp_build_clamp(coord_bld, coord, min, max);
-         icoord = lp_build_ifloor(coord_bld, coord);
-      }
-      break;
-
    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
       /* Note: this is the same as CLAMP_TO_EDGE, except min = -min */
       {
          LLVMValueRef min, max;
+
          if (bld->static_state->normalized_coords) {
-            /* min = -1.0 / (2 * length) = -0.5 / length */
-            min = lp_build_mul(coord_bld,
-                               lp_build_const_vec(coord_bld->type, -0.5F),
-                               lp_build_rcp(coord_bld, length_f));
-            /* max = length - min */
-            max = lp_build_sub(coord_bld, length_f, min);
             /* scale coord to length */
             coord = lp_build_mul(coord_bld, coord, length_f);
          }
-         else {
-            /* clamp to [-0.5, length + 0.5] */
-            min = lp_build_const_vec(coord_bld->type, -0.5F);
-            max = lp_build_sub(coord_bld, length_f, min);
-         }
-         /* coord = clamp(coord, min, max) */
-         coord = lp_build_clamp(coord_bld, coord, min, max);
+
          icoord = lp_build_ifloor(coord_bld, coord);
+
+         /* clamp to [-1, length] */
+         min = lp_build_negate(int_coord_bld, int_coord_bld->one);
+         max = length;
+         icoord = lp_build_clamp(int_coord_bld, icoord, min, max);
       }
       break;
 
    case PIPE_TEX_WRAP_MIRROR_REPEAT:
-      {
-         LLVMValueRef min, max;
-         /* min = 1.0 / (2 * length) */
-         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
-         /* max = length - min */
-         max = lp_build_sub(coord_bld, length_f, min);
+      /* compute mirror function */
+      coord = lp_build_coord_mirror(bld, coord);
 
-         /* compute mirror function */
-         coord = lp_build_coord_mirror(bld, coord);
+      /* scale coord to length */
+      assert(bld->static_state->normalized_coords);
+      coord = lp_build_mul(coord_bld, coord, length_f);
 
-         /* scale coord to length */
-         coord = lp_build_mul(coord_bld, coord, length_f);
+      icoord = lp_build_ifloor(coord_bld, coord);
 
-         /* coord = clamp(coord, min, max) */
-         coord = lp_build_clamp(coord_bld, coord, min, max);
-         icoord = lp_build_ifloor(coord_bld, coord);
-      }
+      /* clamp to [0, length - 1] */
+      icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
       break;
 
    case PIPE_TEX_WRAP_MIRROR_CLAMP:
-      coord = lp_build_abs(coord_bld, coord);
-      coord = lp_build_mul(coord_bld, coord, length_f);
-      coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f_minus_one);
-      icoord = lp_build_ifloor(coord_bld, coord);
-      break;
-
    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
-      {
-         LLVMValueRef min, max;
-         /* min = 1.0 / (2 * length) */
-         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
-         /* max = length - min */
-         max = lp_build_sub(coord_bld, length_f, min);
+      coord = lp_build_abs(coord_bld, coord);
 
-         coord = lp_build_abs(coord_bld, coord);
+      if (bld->static_state->normalized_coords) {
+         /* scale coord to length */
          coord = lp_build_mul(coord_bld, coord, length_f);
-         coord = lp_build_clamp(coord_bld, coord, min, max);
-         icoord = lp_build_ifloor(coord_bld, coord);
       }
+
+      icoord = lp_build_ifloor(coord_bld, coord);
+
+      /* clamp to [0, length - 1] */
+      icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
       break;
 
    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
-      {
-         LLVMValueRef min, max;
-         /* min = 1.0 / (2 * length) */
-         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
-         min = lp_build_negate(coord_bld, min);
-         /* max = length - min */
-         max = lp_build_sub(coord_bld, length_f, min);
+      coord = lp_build_abs(coord_bld, coord);
 
-         coord = lp_build_abs(coord_bld, coord);
+      if (bld->static_state->normalized_coords) {
+         /* scale coord to length */
          coord = lp_build_mul(coord_bld, coord, length_f);
-         coord = lp_build_clamp(coord_bld, coord, min, max);
-         icoord = lp_build_ifloor(coord_bld, coord);
       }
+
+      icoord = lp_build_ifloor(coord_bld, coord);
+
+      /* clamp to [0, length] */
+      icoord = lp_build_min(int_coord_bld, icoord, length);
       break;
 
    default:
@@ -839,18 +819,23 @@ lp_build_minify(struct lp_build_sample_context *bld,
 
 /**
  * Generate code to compute texture level of detail (lambda).
- * \param s  vector of texcoord s values
- * \param t  vector of texcoord t values
- * \param r  vector of texcoord r values
+ * \param ddx  partial derivatives of (s, t, r, q) with respect to X
+ * \param ddy  partial derivatives of (s, t, r, q) with respect to Y
+ * \param lod_bias  optional float vector with the shader lod bias
+ * \param explicit_lod  optional float vector with the explicit lod
  * \param width  scalar int texture width
  * \param height  scalar int texture height
  * \param depth  scalar int texture depth
+ *
+ * XXX: The resulting lod is scalar, so ignore all but the first element of
+ * derivatives, lod_bias, etc that are passed by the shader.
  */
 static LLVMValueRef
 lp_build_lod_selector(struct lp_build_sample_context *bld,
-                      LLVMValueRef s,
-                      LLVMValueRef t,
-                      LLVMValueRef r,
+                      const LLVMValueRef ddx[4],
+                      const LLVMValueRef ddy[4],
+                      LLVMValueRef lod_bias, /* optional */
+                      LLVMValueRef explicit_lod, /* optional */
                       LLVMValueRef width,
                       LLVMValueRef height,
                       LLVMValueRef depth)
@@ -863,85 +848,76 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
       return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
    }
    else {
-      const int dims = texture_dims(bld->static_state->target);
       struct lp_build_context *float_bld = &bld->float_bld;
-      LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(),
-                                            bld->static_state->lod_bias);
+      LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
+                                                    bld->static_state->lod_bias);
       LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
                                            bld->static_state->min_lod);
       LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
                                            bld->static_state->max_lod);
-
       LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
-      LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
-      LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
-
-      LLVMValueRef s0, s1, s2;
-      LLVMValueRef t0, t1, t2;
-      LLVMValueRef r0, r1, r2;
-      LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
-      LLVMValueRef rho, lod;
-
-      /*
-       * dsdx = abs(s[1] - s[0]);
-       * dsdy = abs(s[2] - s[0]);
-       * dtdx = abs(t[1] - t[0]);
-       * dtdy = abs(t[2] - t[0]);
-       * drdx = abs(r[1] - r[0]);
-       * drdy = abs(r[2] - r[0]);
-       * XXX we're assuming a four-element quad in 2x2 layout here.
-       */
-      s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
-      s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
-      s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
-      dsdx = LLVMBuildSub(bld->builder, s1, s0, "");
-      dsdx = lp_build_abs(float_bld, dsdx);
-      dsdy = LLVMBuildSub(bld->builder, s2, s0, "");
-      dsdy = lp_build_abs(float_bld, dsdy);
-      if (dims > 1) {
-         t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
-         t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
-         t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
-         dtdx = LLVMBuildSub(bld->builder, t1, t0, "");
-         dtdx = lp_build_abs(float_bld, dtdx);
-         dtdy = LLVMBuildSub(bld->builder, t2, t0, "");
-         dtdy = lp_build_abs(float_bld, dtdy);
-         if (dims > 2) {
-            r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
-            r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
-            r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
-            drdx = LLVMBuildSub(bld->builder, r1, r0, "");
-            drdx = lp_build_abs(float_bld, drdx);
-            drdy = LLVMBuildSub(bld->builder, r2, r0, "");
-            drdy = lp_build_abs(float_bld, drdy);
-         }
+      LLVMValueRef lod;
+
+      if (explicit_lod) {
+         lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
+                                       index0, "");
       }
+      else {
+         const int dims = texture_dims(bld->static_state->target);
+         LLVMValueRef dsdx, dsdy;
+         LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
+         LLVMValueRef rho;
+
+         dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
+         dsdx = lp_build_abs(float_bld, dsdx);
+         dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
+         dsdy = lp_build_abs(float_bld, dsdy);
+         if (dims > 1) {
+            dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
+            dtdx = lp_build_abs(float_bld, dtdx);
+            dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
+            dtdy = lp_build_abs(float_bld, dtdy);
+            if (dims > 2) {
+               drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
+               drdx = lp_build_abs(float_bld, drdx);
+               drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
+               drdy = lp_build_abs(float_bld, drdy);
+            }
+         }
 
-      /* Compute rho = max of all partial derivatives scaled by texture size.
-       * XXX this could be vectorized somewhat
-       */
-      rho = LLVMBuildMul(bld->builder,
-                         lp_build_max(float_bld, dsdx, dsdy),
-                         lp_build_int_to_float(float_bld, width), "");
-      if (dims > 1) {
-         LLVMValueRef max;
-         max = LLVMBuildMul(bld->builder,
-                            lp_build_max(float_bld, dtdx, dtdy),
-                            lp_build_int_to_float(float_bld, height), "");
-         rho = lp_build_max(float_bld, rho, max);
-         if (dims > 2) {
+         /* Compute rho = max of all partial derivatives scaled by texture size.
+          * XXX this could be vectorized somewhat
+          */
+         rho = LLVMBuildMul(bld->builder,
+                            lp_build_max(float_bld, dsdx, dsdy),
+                            lp_build_int_to_float(float_bld, width), "");
+         if (dims > 1) {
+            LLVMValueRef max;
             max = LLVMBuildMul(bld->builder,
-                               lp_build_max(float_bld, drdx, drdy),
-                               lp_build_int_to_float(float_bld, depth), "");
+                               lp_build_max(float_bld, dtdx, dtdy),
+                               lp_build_int_to_float(float_bld, height), "");
             rho = lp_build_max(float_bld, rho, max);
+            if (dims > 2) {
+               max = LLVMBuildMul(bld->builder,
+                                  lp_build_max(float_bld, drdx, drdy),
+                                  lp_build_int_to_float(float_bld, depth), "");
+               rho = lp_build_max(float_bld, rho, max);
+            }
          }
-      }
 
-      /* compute lod = log2(rho) */
-      lod = lp_build_log2(float_bld, rho);
+         /* compute lod = log2(rho) */
+         lod = lp_build_log2(float_bld, rho);
 
-      /* add lod bias */
-      lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias");
+         /* add shader lod bias */
+         if (lod_bias) {
+            lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias,
+                                               index0, "");
+            lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
+         }
+      }
+
+      /* add sampler lod bias */
+      lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
 
       /* clamp lod */
       lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
@@ -1009,8 +985,10 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                                 int_bld->zero,
                                 last_level);
    /* compute level 1 and clamp to legal range of levels */
-   *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one);
-   *level1_out = lp_build_min(int_bld, *level1_out, last_level);
+   level = lp_build_add(int_bld, level, int_bld->one);
+   *level1_out = lp_build_clamp(int_bld, level,
+                                int_bld->zero,
+                                last_level);
 
    *weight_out = lp_build_fract(float_bld, lod);
 }
@@ -1299,7 +1277,7 @@ lp_build_cube_face(struct lp_build_sample_context *bld,
 
 
 /**
- * Generate code to do cube face selection and per-face texcoords.
+ * Generate code to do cube face selection and compute per-face texcoords.
  */
 static void
 lp_build_cube_lookup(struct lp_build_sample_context *bld,
@@ -1423,7 +1401,6 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
          lp_build_endif(&if_ctx2);
          lp_build_flow_scope_end(flow_ctx2);
          lp_build_flow_destroy(flow_ctx2);
-
          *face_s = face_s2;
          *face_t = face_t2;
          *face = face2;
@@ -1469,13 +1446,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
    int chan;
 
    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
+      /* sample the first mipmap level */
       lp_build_sample_image_nearest(bld,
                                     width0_vec, height0_vec, depth0_vec,
                                     row_stride0_vec, img_stride0_vec,
                                     data_ptr0, s, t, r, colors0);
 
       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
-         /* sample the second mipmap level, and interp */
+         /* sample the second mipmap level */
          lp_build_sample_image_nearest(bld,
                                        width1_vec, height1_vec, depth1_vec,
                                        row_stride1_vec, img_stride1_vec,
@@ -1485,13 +1463,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
    else {
       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
 
+      /* sample the first mipmap level */
       lp_build_sample_image_linear(bld,
                                    width0_vec, height0_vec, depth0_vec,
                                    row_stride0_vec, img_stride0_vec,
                                    data_ptr0, s, t, r, colors0);
 
       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
-         /* sample the second mipmap level, and interp */
+         /* sample the second mipmap level */
          lp_build_sample_image_linear(bld,
                                       width1_vec, height1_vec, depth1_vec,
                                       row_stride1_vec, img_stride1_vec,
@@ -1527,6 +1506,10 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
                         LLVMValueRef s,
                         LLVMValueRef t,
                         LLVMValueRef r,
+                        const LLVMValueRef *ddx,
+                        const LLVMValueRef *ddy,
+                        LLVMValueRef lod_bias, /* optional */
+                        LLVMValueRef explicit_lod, /* optional */
                         LLVMValueRef width,
                         LLVMValueRef height,
                         LLVMValueRef depth,
@@ -1534,7 +1517,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
                         LLVMValueRef height_vec,
                         LLVMValueRef depth_vec,
                         LLVMValueRef row_stride_array,
-                        LLVMValueRef img_stride_vec,
+                        LLVMValueRef img_stride_array,
                         LLVMValueRef data_array,
                         LLVMValueRef *colors_out)
 {
@@ -1543,19 +1526,44 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
    const unsigned min_filter = bld->static_state->min_img_filter;
    const unsigned mag_filter = bld->static_state->mag_img_filter;
    const int dims = texture_dims(bld->static_state->target);
-   LLVMValueRef lod, lod_fpart;
-   LLVMValueRef ilevel0, ilevel1, ilevel0_vec, ilevel1_vec;
+   LLVMValueRef lod = NULL, lod_fpart = NULL;
+   LLVMValueRef ilevel0, ilevel1 = NULL, ilevel0_vec, ilevel1_vec = NULL;
    LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
    LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
    LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
    LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
-   LLVMValueRef data_ptr0, data_ptr1;
+   LLVMValueRef data_ptr0, data_ptr1 = NULL;
+   LLVMValueRef face_ddx[4], face_ddy[4];
 
    /*
    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
           mip_filter, min_filter, mag_filter);
    */
 
+   /*
+    * Choose cube face, recompute texcoords and derivatives for the chosen face.
+    */
+   if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+      LLVMValueRef face, face_s, face_t;
+      lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
+      s = face_s; /* vec */
+      t = face_t; /* vec */
+      /* use 'r' to indicate cube face */
+      r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
+
+      /* recompute ddx, ddy using the new (s,t) face texcoords */
+      face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
+      face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
+      face_ddx[2] = NULL;
+      face_ddx[3] = NULL;
+      face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
+      face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
+      face_ddy[2] = NULL;
+      face_ddy[3] = NULL;
+      ddx = face_ddx;
+      ddy = face_ddy;
+   }
+
    /*
     * Compute the level of detail (float).
     */
@@ -1564,7 +1572,9 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
       /* Need to compute lod either to choose mipmap levels or to
        * distinguish between minification/magnification with one mipmap level.
        */
-      lod = lp_build_lod_selector(bld, s, t, r, width, height, depth);
+      lod = lp_build_lod_selector(bld, ddx, ddy,
+                                  lod_bias, explicit_lod,
+                                  width, height, depth);
    }
 
    /*
@@ -1572,9 +1582,20 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
     */
    if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
       /* always use mip level 0 */
-      ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+      if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+         /* XXX this is a work-around for an apparent bug in LLVM 2.7.
+          * We should be able to set ilevel0 = const(0) but that causes
+          * bad x86 code to be emitted.
+          */
+         lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
+         lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+      }
+      else {
+         ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+      }
    }
    else {
+      assert(lod);
       if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
          lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
       }
@@ -1602,8 +1623,9 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
       row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
                                                       ilevel0);
       if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
-         img_stride0_vec = lp_build_mul(&bld->int_coord_bld,
-                                        row_stride0_vec, height0_vec);
+         img_stride0_vec = lp_build_get_level_stride_vec(bld,
+                                                         img_stride_array,
+                                                         ilevel0);
          if (dims == 3) {
             depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
          }
@@ -1617,8 +1639,9 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
          row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
                                                          ilevel1);
          if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
-            img_stride1_vec = lp_build_mul(&bld->int_coord_bld,
-                                           row_stride1_vec, height1_vec);
+            img_stride1_vec = lp_build_get_level_stride_vec(bld,
+                                                            img_stride_array,
+                                                            ilevel1);
             if (dims ==3) {
                depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
             }
@@ -1626,18 +1649,6 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
       }
    }
 
-   /*
-    * Choose cube face, recompute per-face texcoords.
-    */
-   if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
-      LLVMValueRef face, face_s, face_t;
-      lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
-      s = face_s; /* vec */
-      t = face_t; /* vec */
-      /* use 'r' to indicate cube face */
-      r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
-   }
-
    /*
     * Get pointer(s) to image data for mipmap level(s).
     */
@@ -1715,36 +1726,6 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
 
 
 
-static void
-lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
-                          struct lp_type dst_type,
-                          LLVMValueRef packed,
-                          LLVMValueRef *rgba)
-{
-   LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff);
-   unsigned chan;
-
-   /* Decode the input vector components */
-   for (chan = 0; chan < 4; ++chan) {
-      unsigned start = chan*8;
-      unsigned stop = start + 8;
-      LLVMValueRef input;
-
-      input = packed;
-
-      if(start)
-         input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(dst_type, start), "");
-
-      if(stop < 32)
-         input = LLVMBuildAnd(builder, input, mask, "");
-
-      input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
-
-      rgba[chan] = input;
-   }
-}
-
-
 static void
 lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
                               LLVMValueRef s,
@@ -1753,7 +1734,7 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
                               LLVMValueRef height,
                               LLVMValueRef stride_array,
                               LLVMValueRef data_array,
-                              LLVMValueRef *texel)
+                              LLVMValueRef texel_out[4])
 {
    LLVMBuilderRef builder = bld->builder;
    struct lp_build_context i32, h16, u8n;
@@ -1770,6 +1751,11 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
    LLVMValueRef unswizzled[4];
    LLVMValueRef stride;
 
+   assert(bld->static_state->target == PIPE_TEXTURE_2D);
+   assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
+   assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
+   assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE);
+
    lp_build_context_init(&i32, builder, lp_type_int_vec(32));
    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
@@ -1934,22 +1920,29 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
     * Convert to SoA and swizzle.
     */
 
-   packed = LLVMBuildBitCast(builder, packed, i32_vec_type, "");
-
    lp_build_rgba8_to_f32_soa(bld->builder,
                              bld->texel_type,
                              packed, unswizzled);
 
-   lp_build_format_swizzle_soa(bld->format_desc,
-                               bld->texel_type, unswizzled,
-                               texel);
+   if (util_format_is_rgba8_variant(bld->format_desc)) {
+      lp_build_format_swizzle_soa(bld->format_desc,
+                                  &bld->texel_bld,
+                                  unswizzled, texel_out);
+   } else {
+      texel_out[0] = unswizzled[0];
+      texel_out[1] = unswizzled[1];
+      texel_out[2] = unswizzled[2];
+      texel_out[3] = unswizzled[3];
+   }
+
+   apply_sampler_swizzle(bld, texel_out);
 }
 
 
 static void
 lp_build_sample_compare(struct lp_build_sample_context *bld,
                         LLVMValueRef p,
-                        LLVMValueRef *texel)
+                        LLVMValueRef texel[4])
 {
    struct lp_build_context *texel_bld = &bld->texel_bld;
    LLVMValueRef res;
@@ -1981,11 +1974,31 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
 }
 
 
+/**
+ * Just set texels to white instead of actually sampling the texture.
+ * For debugging.
+ */
+static void
+lp_build_sample_nop(struct lp_build_sample_context *bld,
+                    LLVMValueRef texel_out[4])
+{
+   struct lp_build_context *texel_bld = &bld->texel_bld;
+   unsigned chan;
+
+   for (chan = 0; chan < 4; chan++) {
+      /*lp_bld_mov(texel_bld, texel, texel_bld->one);*/
+      texel_out[chan] = texel_bld->one;
+   }  
+}
+
+
 /**
  * Build texture sampling code.
  * 'texel' will return a vector of four LLVMValueRefs corresponding to
  * R, G, B, A.
  * \param type  vector float type to use for coords, etc.
+ * \param ddx  partial derivatives of (s,t,r,q) with respect to x
+ * \param ddy  partial derivatives of (s,t,r,q) with respect to y
  */
 void
 lp_build_sample_soa(LLVMBuilderRef builder,
@@ -1995,23 +2008,26 @@ lp_build_sample_soa(LLVMBuilderRef builder,
                     unsigned unit,
                     unsigned num_coords,
                     const LLVMValueRef *coords,
-                    LLVMValueRef lodbias,
-                    LLVMValueRef *texel)
+                    const LLVMValueRef ddx[4],
+                    const LLVMValueRef ddy[4],
+                    LLVMValueRef lod_bias, /* optional */
+                    LLVMValueRef explicit_lod, /* optional */
+                    LLVMValueRef texel_out[4])
 {
    struct lp_build_sample_context bld;
    LLVMValueRef width, width_vec;
    LLVMValueRef height, height_vec;
    LLVMValueRef depth, depth_vec;
-   LLVMValueRef stride_array;
+   LLVMValueRef row_stride_array, img_stride_array;
    LLVMValueRef data_array;
    LLVMValueRef s;
    LLVMValueRef t;
    LLVMValueRef r;
 
-   (void) lp_build_lod_selector;   /* temporary to silence warning */
-   (void) lp_build_nearest_mip_level;
-   (void) lp_build_linear_mip_levels;
-   (void) lp_build_minify;
+   if (0) {
+      enum pipe_format fmt = static_state->format;
+      debug_printf("Sample from %s\n", util_format_name(fmt));
+   }
 
    /* Setup our build context */
    memset(&bld, 0, sizeof bld);
@@ -2038,7 +2054,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    width = dynamic_state->width(dynamic_state, builder, unit);
    height = dynamic_state->height(dynamic_state, builder, unit);
    depth = dynamic_state->depth(dynamic_state, builder, unit);
-   stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
+   row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
+   img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit);
    data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
    /* Note that data_array is an array[level] of pointers to texture images */
 
@@ -2050,24 +2067,31 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
    depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
 
-   if (util_format_is_rgba8_variant(bld.format_desc) &&
-       static_state->target == PIPE_TEXTURE_2D &&
-       static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
-       static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
-       static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
-       is_simple_wrap_mode(static_state->wrap_s) &&
-       is_simple_wrap_mode(static_state->wrap_t)) {
+   if (0) {
+      /* For debug: no-op texture sampling */
+      lp_build_sample_nop(&bld, texel_out);
+   }
+   else if (util_format_fits_8unorm(bld.format_desc) &&
+            bld.format_desc->nr_channels > 1 &&
+            static_state->target == PIPE_TEXTURE_2D &&
+            static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
+            static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
+            static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
+            is_simple_wrap_mode(static_state->wrap_s) &&
+            is_simple_wrap_mode(static_state->wrap_t)) {
       /* special case */
       lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
-                                    stride_array, data_array, texel);
+                                    row_stride_array, data_array, texel_out);
    }
    else {
-      lp_build_sample_general(&bld, unit, s, t, r,
+      lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy,
+                              lod_bias, explicit_lod,
                               width, height, depth,
                               width_vec, height_vec, depth_vec,
-                              stride_array, NULL, data_array,
-                              texel);
+                              row_stride_array, img_stride_array,
+                              data_array,
+                              texel_out);
    }
 
-   lp_build_sample_compare(&bld, r, texel);
+   lp_build_sample_compare(&bld, r, texel_out);
 }