Merge remote branch 'origin/7.8'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_soa.c
index a41068835038b49ab3c1b728ca8375ab4d4ed865..c9b613e21c89d8243744f2a40b66345021104389 100644 (file)
@@ -211,7 +211,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
    const int dims = texture_dims(bld->static_state->target);
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    LLVMValueRef offset;
-   LLVMValueRef packed;
+   LLVMValueRef i, j;
    LLVMValueRef use_border = NULL;
 
    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
@@ -248,6 +248,43 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
       }
    }
 
+   /*
+    * Describe the coordinates in terms of pixel blocks.
+    *
+    * TODO: pixel blocks are power of two. LLVM should convert rem/div to
+    * bit arithmetic. Verify this.
+    */
+
+   if (bld->format_desc->block.width == 1) {
+      i = bld->uint_coord_bld.zero;
+   }
+   else {
+      LLVMValueRef block_width = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.width);
+      i = LLVMBuildURem(bld->builder, x, block_width, "");
+      x = LLVMBuildUDiv(bld->builder, x, block_width, "");
+   }
+
+   if (bld->format_desc->block.height == 1) {
+      j = bld->uint_coord_bld.zero;
+   }
+   else {
+      LLVMValueRef block_height = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.height);
+      j = LLVMBuildURem(bld->builder, y, block_height, "");
+      y = LLVMBuildUDiv(bld->builder, y, block_height, "");
+   }
+
+   /* convert x,y,z coords to linear offset from start of texture, in bytes */
+   offset = lp_build_sample_offset(&bld->uint_coord_bld,
+                                   bld->format_desc,
+                                   x, y, z, y_stride, z_stride);
+
+   lp_build_fetch_rgba_soa(bld->builder,
+                           bld->format_desc,
+                           bld->texel_type,
+                           data_ptr, offset,
+                           i, j,
+                           texel);
+
    /*
     * Note: if we find an app which frequently samples the texture border
     * we might want to implement a true conditional here to avoid sampling
@@ -263,36 +300,12 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
     * the texel color results with the border color.
     */
 
-   /* convert x,y,z coords to linear offset from start of texture, in bytes */
-   offset = lp_build_sample_offset(&bld->uint_coord_bld,
-                                   bld->format_desc,
-                                   x, y, z, y_stride, z_stride);
-
-   assert(bld->format_desc->block.width == 1);
-   assert(bld->format_desc->block.height == 1);
-   assert(bld->format_desc->block.bits <= bld->texel_type.width);
-
-   /* gather the texels from the texture */
-   packed = lp_build_gather(bld->builder,
-                            bld->texel_type.length,
-                            bld->format_desc->block.bits,
-                            bld->texel_type.width,
-                            data_ptr, offset);
-
-   texel[0] = texel[1] = texel[2] = texel[3] = NULL;
-
-   /* convert texels to float rgba */
-   lp_build_unpack_rgba_soa(bld->builder,
-                            bld->format_desc,
-                            bld->texel_type,
-                            packed, texel);
-
    if (use_border) {
       /* select texel color or border color depending on use_border */
       int chan;
       for (chan = 0; chan < 4; chan++) {
          LLVMValueRef border_chan =
-            lp_build_const_scalar(bld->texel_type,
+            lp_build_const_vec(bld->texel_type,
                                   bld->static_state->border_color[chan]);
          texel[chan] = lp_build_select(&bld->texel_bld, use_border,
                                        border_chan, texel[chan]);
@@ -457,8 +470,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
    struct lp_build_context *coord_bld = &bld->coord_bld;
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
-   LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0);
-   LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5);
+   LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0);
+   LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
    LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
@@ -512,7 +525,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
       else {
          LLVMValueRef min, max;
          /* clamp to [0.5, length - 0.5] */
-         min = lp_build_const_scalar(coord_bld->type, 0.5F);
+         min = lp_build_const_vec(coord_bld->type, 0.5F);
          max = lp_build_sub(coord_bld, length_f, min);
          coord = lp_build_clamp(coord_bld, coord, min, max);
       }
@@ -533,7 +546,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
          if (bld->static_state->normalized_coords) {
             /* min = -1.0 / (2 * length) = -0.5 / length */
             min = lp_build_mul(coord_bld,
-                               lp_build_const_scalar(coord_bld->type, -0.5F),
+                               lp_build_const_vec(coord_bld->type, -0.5F),
                                lp_build_rcp(coord_bld, length_f));
             /* max = 1.0 - min */
             max = lp_build_sub(coord_bld, coord_bld->one, min);
@@ -545,7 +558,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
          }
          else {
             /* clamp to [-0.5, length + 0.5] */
-            min = lp_build_const_scalar(coord_bld->type, -0.5F);
+            min = lp_build_const_vec(coord_bld->type, -0.5F);
             max = lp_build_sub(coord_bld, length_f, min);
             coord = lp_build_clamp(coord_bld, coord, min, max);
             coord = lp_build_sub(coord_bld, coord, half);
@@ -620,7 +633,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
          LLVMValueRef min, max;
          /* min = -1.0 / (2 * length) = -0.5 / length */
          min = lp_build_mul(coord_bld,
-                            lp_build_const_scalar(coord_bld->type, -0.5F),
+                            lp_build_const_vec(coord_bld->type, -0.5F),
                             lp_build_rcp(coord_bld, length_f));
          /* max = 1.0 - min */
          max = lp_build_sub(coord_bld, coord_bld->one, min);
@@ -665,7 +678,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
    struct lp_build_context *coord_bld = &bld->coord_bld;
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
-   LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0);
+   LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0);
    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
    LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
@@ -708,7 +721,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
          }
          else {
             /* clamp to [0.5, length - 0.5] */
-            min = lp_build_const_scalar(coord_bld->type, 0.5F);
+            min = lp_build_const_vec(coord_bld->type, 0.5F);
             max = lp_build_sub(coord_bld, length_f, min);
          }
          /* coord = clamp(coord, min, max) */
@@ -724,7 +737,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
          if (bld->static_state->normalized_coords) {
             /* min = -1.0 / (2 * length) = -0.5 / length */
             min = lp_build_mul(coord_bld,
-                               lp_build_const_scalar(coord_bld->type, -0.5F),
+                               lp_build_const_vec(coord_bld->type, -0.5F),
                                lp_build_rcp(coord_bld, length_f));
             /* max = length - min */
             max = lp_build_sub(coord_bld, length_f, min);
@@ -733,7 +746,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
          }
          else {
             /* clamp to [-0.5, length + 0.5] */
-            min = lp_build_const_scalar(coord_bld->type, -0.5F);
+            min = lp_build_const_vec(coord_bld->type, -0.5F);
             max = lp_build_sub(coord_bld, length_f, min);
          }
          /* coord = clamp(coord, min, max) */
@@ -843,87 +856,98 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
                       LLVMValueRef depth)
 
 {
-   const int dims = texture_dims(bld->static_state->target);
-   struct lp_build_context *float_bld = &bld->float_bld;
-   LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), bld->static_state->lod_bias);
-   LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
-   LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->max_lod);
-
-   LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
-   LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
-   LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
-
-   LLVMValueRef s0, s1, s2;
-   LLVMValueRef t0, t1, t2;
-   LLVMValueRef r0, r1, r2;
-   LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
-   LLVMValueRef rho, lod;
-
-   /*
-    * dsdx = abs(s[1] - s[0]);
-    * dsdy = abs(s[2] - s[0]);
-    * dtdx = abs(t[1] - t[0]);
-    * dtdy = abs(t[2] - t[0]);
-    * drdx = abs(r[1] - r[0]);
-    * drdy = abs(r[2] - r[0]);
-    * XXX we're assuming a four-element quad in 2x2 layout here.
-    */
-   s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
-   s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
-   s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
-   dsdx = LLVMBuildSub(bld->builder, s1, s0, "");
-   dsdx = lp_build_abs(float_bld, dsdx);
-   dsdy = LLVMBuildSub(bld->builder, s2, s0, "");
-   dsdy = lp_build_abs(float_bld, dsdy);
-   if (dims > 1) {
-      t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
-      t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
-      t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
-      dtdx = LLVMBuildSub(bld->builder, t1, t0, "");
-      dtdx = lp_build_abs(float_bld, dtdx);
-      dtdy = LLVMBuildSub(bld->builder, t2, t0, "");
-      dtdy = lp_build_abs(float_bld, dtdy);
-      if (dims > 2) {
-         r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
-         r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
-         r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
-         drdx = LLVMBuildSub(bld->builder, r1, r0, "");
-         drdx = lp_build_abs(float_bld, drdx);
-         drdy = LLVMBuildSub(bld->builder, r2, r0, "");
-         drdy = lp_build_abs(float_bld, drdy);
-      }
+   if (bld->static_state->min_lod == bld->static_state->max_lod) {
+      /* User is forcing sampling from a particular mipmap level.
+       * This is hit during mipmap generation.
+       */
+      return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
    }
+   else {
+      const int dims = texture_dims(bld->static_state->target);
+      struct lp_build_context *float_bld = &bld->float_bld;
+      LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(),
+                                            bld->static_state->lod_bias);
+      LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
+                                           bld->static_state->min_lod);
+      LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
+                                           bld->static_state->max_lod);
+
+      LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+      LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
+      LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
+
+      LLVMValueRef s0, s1, s2;
+      LLVMValueRef t0, t1, t2;
+      LLVMValueRef r0, r1, r2;
+      LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
+      LLVMValueRef rho, lod;
+
+      /*
+       * dsdx = abs(s[1] - s[0]);
+       * dsdy = abs(s[2] - s[0]);
+       * dtdx = abs(t[1] - t[0]);
+       * dtdy = abs(t[2] - t[0]);
+       * drdx = abs(r[1] - r[0]);
+       * drdy = abs(r[2] - r[0]);
+       * XXX we're assuming a four-element quad in 2x2 layout here.
+       */
+      s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
+      s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
+      s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
+      dsdx = LLVMBuildSub(bld->builder, s1, s0, "");
+      dsdx = lp_build_abs(float_bld, dsdx);
+      dsdy = LLVMBuildSub(bld->builder, s2, s0, "");
+      dsdy = lp_build_abs(float_bld, dsdy);
+      if (dims > 1) {
+         t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
+         t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
+         t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
+         dtdx = LLVMBuildSub(bld->builder, t1, t0, "");
+         dtdx = lp_build_abs(float_bld, dtdx);
+         dtdy = LLVMBuildSub(bld->builder, t2, t0, "");
+         dtdy = lp_build_abs(float_bld, dtdy);
+         if (dims > 2) {
+            r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
+            r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
+            r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
+            drdx = LLVMBuildSub(bld->builder, r1, r0, "");
+            drdx = lp_build_abs(float_bld, drdx);
+            drdy = LLVMBuildSub(bld->builder, r2, r0, "");
+            drdy = lp_build_abs(float_bld, drdy);
+         }
+      }
 
-   /* Compute rho = max of all partial derivatives scaled by texture size.
-    * XXX this could be vectorized somewhat
-    */
-   rho = LLVMBuildMul(bld->builder,
-                      lp_build_max(float_bld, dsdx, dsdy),
-                      lp_build_int_to_float(float_bld, width), "");
-   if (dims > 1) {
-      LLVMValueRef max;
-      max = LLVMBuildMul(bld->builder,
-                         lp_build_max(float_bld, dtdx, dtdy),
-                         lp_build_int_to_float(float_bld, height), "");
-      rho = lp_build_max(float_bld, rho, max);
-      if (dims > 2) {
+      /* Compute rho = max of all partial derivatives scaled by texture size.
+       * XXX this could be vectorized somewhat
+       */
+      rho = LLVMBuildMul(bld->builder,
+                         lp_build_max(float_bld, dsdx, dsdy),
+                         lp_build_int_to_float(float_bld, width), "");
+      if (dims > 1) {
+         LLVMValueRef max;
          max = LLVMBuildMul(bld->builder,
-                            lp_build_max(float_bld, drdx, drdy),
-                            lp_build_int_to_float(float_bld, depth), "");
+                            lp_build_max(float_bld, dtdx, dtdy),
+                            lp_build_int_to_float(float_bld, height), "");
          rho = lp_build_max(float_bld, rho, max);
+         if (dims > 2) {
+            max = LLVMBuildMul(bld->builder,
+                               lp_build_max(float_bld, drdx, drdy),
+                               lp_build_int_to_float(float_bld, depth), "");
+            rho = lp_build_max(float_bld, rho, max);
+         }
       }
-   }
 
-   /* compute lod = log2(rho) */
-   lod = lp_build_log2(float_bld, rho);
+      /* compute lod = log2(rho) */
+      lod = lp_build_log2(float_bld, rho);
 
-   /* add lod bias */
-   lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias");
+      /* add lod bias */
+      lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias");
 
-   /* clamp lod */
-   lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
+      /* clamp lod */
+      lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
 
-   return lod;
+      return lod;
+   }
 }
 
 
@@ -986,7 +1010,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                                 last_level);
    /* compute level 1 and clamp to legal range of levels */
    *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one);
-   *level1_out = lp_build_min(int_bld, *level1_out, int_bld->zero);
+   *level1_out = lp_build_min(int_bld, *level1_out, last_level);
 
    *weight_out = lp_build_fract(float_bld, lod);
 }
@@ -994,6 +1018,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
 
 /**
  * Generate code to sample a mipmap level with nearest filtering.
+ * If sampling a cube texture, r = cube face in [0,5].
  */
 static void
 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
@@ -1031,6 +1056,9 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
                                           bld->static_state->wrap_r);
          lp_build_name(z, "tex.z.wrapped");
       }
+      else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+         z = r;
+      }
       else {
          z = NULL;
       }
@@ -1051,7 +1079,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
 
 /**
  * Generate code to sample a mipmap level with linear filtering.
- * 1D, 2D and 3D images are suppored.
+ * If sampling a cube texture, r = cube face in [0,5].
  */
 static void
 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
@@ -1098,8 +1126,13 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
          lp_build_name(z0, "tex.z0.wrapped");
          lp_build_name(z1, "tex.z1.wrapped");
       }
+      else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+         z0 = z1 = r;  /* cube face */
+         r_fpart = NULL;
+      }
       else {
-         z0 = z1 = r_fpart = NULL;
+         z0 = z1 = NULL;
+         r_fpart = NULL;
       }
    }
    else {
@@ -1201,6 +1234,70 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
 }
 
 
+/** Helper used by lp_build_cube_lookup() */
+static LLVMValueRef
+lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
+{
+   /* ima = -0.5 / abs(coord); */
+   LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
+   LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
+   LLVMValueRef ima = lp_build_mul(coord_bld, negHalf,
+                                   lp_build_rcp(coord_bld, absCoord));
+   return ima;
+}
+
+
+/**
+ * Helper used by lp_build_cube_lookup()
+ * \param sign  scalar +1 or -1
+ * \param coord  float vector
+ * \param ima  float vector
+ */
+static LLVMValueRef
+lp_build_cube_coord(struct lp_build_context *coord_bld,
+                    LLVMValueRef sign, int negate_coord,
+                    LLVMValueRef coord, LLVMValueRef ima)
+{
+   /* return negate(coord) * ima * sign + 0.5; */
+   LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
+   LLVMValueRef res;
+
+   assert(negate_coord == +1 || negate_coord == -1);
+
+   if (negate_coord == -1) {
+      coord = lp_build_negate(coord_bld, coord);
+   }
+
+   res = lp_build_mul(coord_bld, coord, ima);
+   if (sign) {
+      sign = lp_build_broadcast_scalar(coord_bld, sign);
+      res = lp_build_mul(coord_bld, res, sign);
+   }
+   res = lp_build_add(coord_bld, res, half);
+
+   return res;
+}
+
+
+/** Helper used by lp_build_cube_lookup()
+ * Return (major_coord >= 0) ? pos_face : neg_face;
+ */
+static LLVMValueRef
+lp_build_cube_face(struct lp_build_sample_context *bld,
+                   LLVMValueRef major_coord,
+                   unsigned pos_face, unsigned neg_face)
+{
+   LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
+                                    major_coord,
+                                    bld->float_bld.zero, "");
+   LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0);
+   LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0);
+   LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, "");
+   return res;
+}
+
+
+
 /**
  * Generate code to do cube face selection and per-face texcoords.
  */
@@ -1213,11 +1310,10 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
                      LLVMValueRef *face_s,
                      LLVMValueRef *face_t)
 {
-#if 0
    struct lp_build_context *float_bld = &bld->float_bld;
+   struct lp_build_context *coord_bld = &bld->coord_bld;
    LLVMValueRef rx, ry, rz;
    LLVMValueRef arx, ary, arz;
-   LLVMValueRef sc, tc, ma;
    LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25);
    LLVMValueRef arx_ge_ary, arx_ge_arz;
    LLVMValueRef ary_ge_arx, ary_ge_arz;
@@ -1257,34 +1353,165 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
 
    {
       struct lp_build_flow_context *flow_ctx;
-      struct lp_build_if_state if_ctx, if2_ctx;
+      struct lp_build_if_state if_ctx;
 
       flow_ctx = lp_build_flow_create(bld->builder);
+      lp_build_flow_scope_begin(flow_ctx);
+
+      *face_s = bld->coord_bld.undef;
+      *face_t = bld->coord_bld.undef;
+      *face = bld->int_bld.undef;
+
+      lp_build_name(*face_s, "face_s");
+      lp_build_name(*face_t, "face_t");
+      lp_build_name(*face, "face");
+
+      lp_build_flow_scope_declare(flow_ctx, face_s);
+      lp_build_flow_scope_declare(flow_ctx, face_t);
+      lp_build_flow_scope_declare(flow_ctx, face);
 
       lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
       {
-#if 0
-         lp_build_if(&if2_ctx, flow_ctx, bld->builder, rx_pos);
+         /* +/- X face */
+         LLVMValueRef sign = lp_build_sgn(float_bld, rx);
+         LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
+         *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
+         *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
+         *face = lp_build_cube_face(bld, rx,
+                                    PIPE_TEX_FACE_POS_X,
+                                    PIPE_TEX_FACE_NEG_X);
+      }
+      lp_build_else(&if_ctx);
+      {
+         struct lp_build_flow_context *flow_ctx2;
+         struct lp_build_if_state if_ctx2;
+
+         LLVMValueRef face_s2 = bld->coord_bld.undef;
+         LLVMValueRef face_t2 = bld->coord_bld.undef;
+         LLVMValueRef face2 = bld->int_bld.undef;
+
+         flow_ctx2 = lp_build_flow_create(bld->builder);
+         lp_build_flow_scope_begin(flow_ctx2);
+         lp_build_flow_scope_declare(flow_ctx2, &face_s2);
+         lp_build_flow_scope_declare(flow_ctx2, &face_t2);
+         lp_build_flow_scope_declare(flow_ctx2, &face2);
+
+         ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
+
+         lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
          {
-            /* Positive X face */
+            /* +/- Y face */
+            LLVMValueRef sign = lp_build_sgn(float_bld, ry);
+            LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
+            face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
+            face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
+            face2 = lp_build_cube_face(bld, ry,
+                                       PIPE_TEX_FACE_POS_Y,
+                                       PIPE_TEX_FACE_NEG_Y);
          }
-         lp_build_else(&if2_ctx);
+         lp_build_else(&if_ctx2);
          {
-            /* Negative X face */
+            /* +/- Z face */
+            LLVMValueRef sign = lp_build_sgn(float_bld, rz);
+            LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
+            face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
+            face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
+            face2 = lp_build_cube_face(bld, rz,
+                                       PIPE_TEX_FACE_POS_Z,
+                                       PIPE_TEX_FACE_NEG_Z);
          }
-         lp_build_endif(&if2_ctx);
-#endif
+         lp_build_endif(&if_ctx2);
+         lp_build_flow_scope_end(flow_ctx2);
+         lp_build_flow_destroy(flow_ctx2);
+
+         *face_s = face_s2;
+         *face_t = face_t2;
+         *face = face2;
       }
-      lp_build_else(&if_ctx);
-      {
+
+      lp_build_endif(&if_ctx);
+      lp_build_flow_scope_end(flow_ctx);
+      lp_build_flow_destroy(flow_ctx);
+   }
+}
+
 
 
+/**
+ * Sample the texture/mipmap using given image filter and mip filter.
+ * data0_ptr and data1_ptr point to the two mipmap levels to sample
+ * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
+ * If we're using nearest miplevel sampling the '1' values will be null/unused.
+ */
+static void
+lp_build_sample_mipmap(struct lp_build_sample_context *bld,
+                       unsigned img_filter,
+                       unsigned mip_filter,
+                       LLVMValueRef s,
+                       LLVMValueRef t,
+                       LLVMValueRef r,
+                       LLVMValueRef lod_fpart,
+                       LLVMValueRef width0_vec,
+                       LLVMValueRef width1_vec,
+                       LLVMValueRef height0_vec,
+                       LLVMValueRef height1_vec,
+                       LLVMValueRef depth0_vec,
+                       LLVMValueRef depth1_vec,
+                       LLVMValueRef row_stride0_vec,
+                       LLVMValueRef row_stride1_vec,
+                       LLVMValueRef img_stride0_vec,
+                       LLVMValueRef img_stride1_vec,
+                       LLVMValueRef data_ptr0,
+                       LLVMValueRef data_ptr1,
+                       LLVMValueRef *colors_out)
+{
+   LLVMValueRef colors0[4], colors1[4];
+   int chan;
+
+   if (img_filter == PIPE_TEX_FILTER_NEAREST) {
+      lp_build_sample_image_nearest(bld,
+                                    width0_vec, height0_vec, depth0_vec,
+                                    row_stride0_vec, img_stride0_vec,
+                                    data_ptr0, s, t, r, colors0);
+
+      if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+         /* sample the second mipmap level, and interp */
+         lp_build_sample_image_nearest(bld,
+                                       width1_vec, height1_vec, depth1_vec,
+                                       row_stride1_vec, img_stride1_vec,
+                                       data_ptr1, s, t, r, colors1);
       }
-      lp_build_endif(&if_ctx);
+   }
+   else {
+      assert(img_filter == PIPE_TEX_FILTER_LINEAR);
 
-      lp_build_flow_destroy(flow_ctx);
+      lp_build_sample_image_linear(bld,
+                                   width0_vec, height0_vec, depth0_vec,
+                                   row_stride0_vec, img_stride0_vec,
+                                   data_ptr0, s, t, r, colors0);
+
+      if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+         /* sample the second mipmap level, and interp */
+         lp_build_sample_image_linear(bld,
+                                      width1_vec, height1_vec, depth1_vec,
+                                      row_stride1_vec, img_stride1_vec,
+                                      data_ptr1, s, t, r, colors1);
+      }
+   }
+
+   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+      /* interpolate samples from the two mipmap levels */
+      for (chan = 0; chan < 4; chan++) {
+         colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
+                                          colors0[chan], colors1[chan]);
+      }
+   }
+   else {
+      /* use first/only level's colors */
+      for (chan = 0; chan < 4; chan++) {
+         colors_out[chan] = colors0[chan];
+      }
    }
-#endif
 }
 
 
@@ -1307,22 +1534,22 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
                         LLVMValueRef height_vec,
                         LLVMValueRef depth_vec,
                         LLVMValueRef row_stride_array,
-                        LLVMValueRef img_stride_vec,
+                        LLVMValueRef img_stride_array,
                         LLVMValueRef data_array,
                         LLVMValueRef *colors_out)
 {
+   struct lp_build_context *float_bld = &bld->float_bld;
    const unsigned mip_filter = bld->static_state->min_mip_filter;
    const unsigned min_filter = bld->static_state->min_img_filter;
    const unsigned mag_filter = bld->static_state->mag_img_filter;
    const int dims = texture_dims(bld->static_state->target);
-   LLVMValueRef lod, lod_fpart;
-   LLVMValueRef ilevel0, ilevel1, ilevel0_vec, ilevel1_vec;
+   LLVMValueRef lod = NULL, lod_fpart = NULL;
+   LLVMValueRef ilevel0, ilevel1 = NULL, ilevel0_vec, ilevel1_vec = NULL;
    LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
    LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
    LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
    LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
-   LLVMValueRef data_ptr0, data_ptr1;
-   int chan;
+   LLVMValueRef data_ptr0, data_ptr1 = NULL;
 
    /*
    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
@@ -1330,16 +1557,24 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
    */
 
    /*
-    * Compute the level of detail (mipmap level index(es)).
+    * Compute the level of detail (float).
+    */
+   if (min_filter != mag_filter ||
+       mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+      /* Need to compute lod either to choose mipmap levels or to
+       * distinguish between minification/magnification with one mipmap level.
+       */
+      lod = lp_build_lod_selector(bld, s, t, r, width, height, depth);
+   }
+
+   /*
+    * Compute integer mipmap level(s) to fetch texels from.
     */
    if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
       /* always use mip level 0 */
       ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
    }
    else {
-      /* compute float LOD */
-      lod = lp_build_lod_selector(bld, s, t, r, width, height, depth);
-
       if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
          lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
       }
@@ -1366,33 +1601,43 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
       height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
       row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
                                                       ilevel0);
-      if (dims == 3) {
-         depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
-         img_stride0_vec = lp_build_mul(&bld->int_coord_bld,
-                                        row_stride0_vec, height0_vec);
+      if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
+         img_stride0_vec = lp_build_get_level_stride_vec(bld,
+                                                         img_stride_array,
+                                                         ilevel0);
+         if (dims == 3) {
+            depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
+         }
       }
    }
    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
-      /* compute width, height, depth for second mipmap level at ilevel1 */
+      /* compute width, height, depth for second mipmap level at 'ilevel1' */
       width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
       if (dims >= 2) {
          height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
          row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
                                                          ilevel1);
-         if (dims == 3) {
-            depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
-            img_stride1_vec = lp_build_mul(&bld->int_coord_bld,
-                                           row_stride1_vec, height1_vec);
+         if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
+            img_stride1_vec = lp_build_get_level_stride_vec(bld,
+                                                            img_stride_array,
+                                                            ilevel1);
+            if (dims ==3) {
+               depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
+            }
          }
       }
    }
 
    /*
-    * Choose cube face, recompute texcoords.
+    * Choose cube face, recompute per-face texcoords.
     */
    if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
       LLVMValueRef face, face_s, face_t;
       lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
+      s = face_s; /* vec */
+      t = face_t; /* vec */
+      /* use 'r' to indicate cube face */
+      r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
    }
 
    /*
@@ -1406,62 +1651,67 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
    /*
     * Get/interpolate texture colors.
     */
-   /* XXX temporarily force this path: */
-   if (1 /*min_filter == mag_filter*/) {
-      /* same filter for minification or magnification */
-      LLVMValueRef colors0[4], colors1[4];
-
-      if (min_filter == PIPE_TEX_FILTER_NEAREST) {
-         lp_build_sample_image_nearest(bld,
-                                       width0_vec, height0_vec, depth0_vec,
-                                       row_stride0_vec, img_stride0_vec,
-                                       data_ptr0, s, t, r, colors0);
-
-         if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
-            /* sample the second mipmap level, and interp */
-            lp_build_sample_image_nearest(bld,
-                                          width1_vec, height1_vec, depth1_vec,
-                                          row_stride1_vec, img_stride1_vec,
-                                          data_ptr1, s, t, r, colors1);
-         }
-      }
-      else {
-         assert(min_filter == PIPE_TEX_FILTER_LINEAR);
+   if (min_filter == mag_filter) {
+      /* no need to distinquish between minification and magnification */
+      lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, lod_fpart,
+                             width0_vec, width1_vec,
+                             height0_vec, height1_vec,
+                             depth0_vec, depth1_vec,
+                             row_stride0_vec, row_stride1_vec,
+                             img_stride0_vec, img_stride1_vec,
+                             data_ptr0, data_ptr1,
+                             colors_out);
+   }
+   else {
+      /* Emit conditional to choose min image filter or mag image filter
+       * depending on the lod being >0 or <= 0, respectively.
+       */
+      struct lp_build_flow_context *flow_ctx;
+      struct lp_build_if_state if_ctx;
+      LLVMValueRef minify;
 
-         lp_build_sample_image_linear(bld,
-                                      width0_vec, height0_vec, depth0_vec,
-                                      row_stride0_vec, img_stride0_vec,
-                                      data_ptr0, s, t, r, colors0);
+      flow_ctx = lp_build_flow_create(bld->builder);
+      lp_build_flow_scope_begin(flow_ctx);
 
+      lp_build_flow_scope_declare(flow_ctx, &colors_out[0]);
+      lp_build_flow_scope_declare(flow_ctx, &colors_out[1]);
+      lp_build_flow_scope_declare(flow_ctx, &colors_out[2]);
+      lp_build_flow_scope_declare(flow_ctx, &colors_out[3]);
 
-         if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
-            /* sample the second mipmap level, and interp */
-            lp_build_sample_image_linear(bld,
-                                         width1_vec, height1_vec, depth1_vec,
-                                         row_stride1_vec, img_stride1_vec,
-                                         data_ptr1, s, t, r, colors1);
-         }
-      }
+      /* minify = lod > 0.0 */
+      minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
+                             lod, float_bld->zero, "");
 
-      if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
-         /* interpolate samples from the two mipmap levels */
-         for (chan = 0; chan < 4; chan++) {
-            colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
-                                             colors0[chan], colors1[chan]);
-         }
+      lp_build_if(&if_ctx, flow_ctx, bld->builder, minify);
+      {
+         /* Use the minification filter */
+         lp_build_sample_mipmap(bld, min_filter, mip_filter,
+                                s, t, r, lod_fpart,
+                                width0_vec, width1_vec,
+                                height0_vec, height1_vec,
+                                depth0_vec, depth1_vec,
+                                row_stride0_vec, row_stride1_vec,
+                                img_stride0_vec, img_stride1_vec,
+                                data_ptr0, data_ptr1,
+                                colors_out);
       }
-      else {
-         /* use first/only level's colors */
-         for (chan = 0; chan < 4; chan++) {
-            colors_out[chan] = colors0[chan];
-         }
+      lp_build_else(&if_ctx);
+      {
+         /* Use the magnification filter */
+         lp_build_sample_mipmap(bld, mag_filter, mip_filter,
+                                s, t, r, lod_fpart,
+                                width0_vec, width1_vec,
+                                height0_vec, height1_vec,
+                                depth0_vec, depth1_vec,
+                                row_stride0_vec, row_stride1_vec,
+                                img_stride0_vec, img_stride1_vec,
+                                data_ptr0, data_ptr1,
+                                colors_out);
       }
-   }
-   else {
-      /* emit conditional to choose min image filter or mag image filter
-       * depending on the lod being >0 or <= 0, respectively.
-       */
-      abort();
+      lp_build_endif(&if_ctx);
+
+      lp_build_flow_scope_end(flow_ctx);
+      lp_build_flow_destroy(flow_ctx);
    }
 }
 
@@ -1473,7 +1723,7 @@ lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
                           LLVMValueRef packed,
                           LLVMValueRef *rgba)
 {
-   LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff);
+   LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff);
    unsigned chan;
 
    /* Decode the input vector components */
@@ -1485,7 +1735,7 @@ lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
       input = packed;
 
       if(start)
-         input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), "");
+         input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(dst_type, start), "");
 
       if(stop < 32)
          input = LLVMBuildAnd(builder, input, mask, "");
@@ -1547,17 +1797,17 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
    t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
 
    /* subtract 0.5 (add -128) */
-   i32_c128 = lp_build_int_const_scalar(i32.type, -128);
+   i32_c128 = lp_build_const_int_vec(i32.type, -128);
    s = LLVMBuildAdd(builder, s, i32_c128, "");
    t = LLVMBuildAdd(builder, t, i32_c128, "");
 
    /* compute floor (shift right 8) */
-   i32_c8 = lp_build_int_const_scalar(i32.type, 8);
+   i32_c8 = lp_build_const_int_vec(i32.type, 8);
    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
    t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
 
    /* compute fractional part (AND with 0xff) */
-   i32_c255 = lp_build_int_const_scalar(i32.type, 255);
+   i32_c255 = lp_build_const_int_vec(i32.type, 255);
    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
    t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
 
@@ -1724,7 +1974,7 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
    }
 
    assert(res);
-   res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25));
+   res = lp_build_mul(texel_bld, res, lp_build_const_vec(texel_bld->type, 0.25));
 
    /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
    for(chan = 0; chan < 3; ++chan)
@@ -1754,17 +2004,12 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    LLVMValueRef width, width_vec;
    LLVMValueRef height, height_vec;
    LLVMValueRef depth, depth_vec;
-   LLVMValueRef stride_array;
+   LLVMValueRef row_stride_array, img_stride_array;
    LLVMValueRef data_array;
    LLVMValueRef s;
    LLVMValueRef t;
    LLVMValueRef r;
 
-   (void) lp_build_lod_selector;   /* temporary to silence warning */
-   (void) lp_build_nearest_mip_level;
-   (void) lp_build_linear_mip_levels;
-   (void) lp_build_minify;
-
    /* Setup our build context */
    memset(&bld, 0, sizeof bld);
    bld.builder = builder;
@@ -1790,7 +2035,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    width = dynamic_state->width(dynamic_state, builder, unit);
    height = dynamic_state->height(dynamic_state, builder, unit);
    depth = dynamic_state->depth(dynamic_state, builder, unit);
-   stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
+   row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
+   img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit);
    data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
    /* Note that data_array is an array[level] of pointers to texture images */
 
@@ -1802,7 +2048,7 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
    depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
 
-   if (lp_format_is_rgba8(bld.format_desc) &&
+   if (util_format_is_rgba8_variant(bld.format_desc) &&
        static_state->target == PIPE_TEXTURE_2D &&
        static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
        static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
@@ -1811,13 +2057,14 @@ lp_build_sample_soa(LLVMBuilderRef builder,
        is_simple_wrap_mode(static_state->wrap_t)) {
       /* special case */
       lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
-                                    stride_array, data_array, texel);
+                                    row_stride_array, data_array, texel);
    }
    else {
       lp_build_sample_general(&bld, unit, s, t, r,
                               width, height, depth,
                               width_vec, height_vec, depth_vec,
-                              stride_array, NULL, data_array,
+                              row_stride_array, img_stride_array,
+                              data_array,
                               texel);
    }