From 10e40ad11d5ed7f2d286837f92f8b63547f3db0b Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Wed, 28 Aug 2013 16:26:43 +0200
Subject: [PATCH] gallivm: refactor num_lods handling

This is just preparation for per-pixel (or per-quad in case of multiple quads)
min/mag filter since some assumptions about number of miplevels being equal
to number of lods no longer holds true.
This change does not change behavior yet (though theoretically when forcing
per-element path it might be slower with different min/mag filter since the
code will respect this setting even when there's no mip maps now in this case,
so some lod calcs will be done per-element just ultimately still the same
filter used for all pixels).

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
---
 src/gallium/auxiliary/gallivm/lp_bld_sample.c | 126 ++++++++--------
 src/gallium/auxiliary/gallivm/lp_bld_sample.h |  13 +-
 .../auxiliary/gallivm/lp_bld_sample_aos.c     |  20 +--
 .../auxiliary/gallivm/lp_bld_sample_soa.c     | 141 +++++++++++-------
 4 files changed, 169 insertions(+), 131 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 89d72494be0..e1cfd78e885 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -217,7 +217,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
    struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
    struct lp_build_context *float_bld = &bld->float_bld;
    struct lp_build_context *coord_bld = &bld->coord_bld;
-   struct lp_build_context *levelf_bld = &bld->levelf_bld;
+   struct lp_build_context *rho_bld = &bld->lodf_bld;
    const unsigned dims = bld->dims;
    LLVMValueRef ddx_ddy[2];
    LLVMBuilderRef builder = bld->gallivm->builder;
@@ -231,7 +231,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
    LLVMValueRef first_level, first_level_vec;
    unsigned length = coord_bld->type.length;
    unsigned num_quads = length / 4;
-   boolean rho_per_quad = levelf_bld->type.length != length;
+   boolean rho_per_quad = rho_bld->type.length != length;
    unsigned i;
    LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
    LLVMValueRef rho_xvec, rho_yvec;
@@ -259,18 +259,18 @@ lp_build_rho(struct lp_build_sample_context *bld,
        */
       if (rho_per_quad) {
          rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                         levelf_bld->type, cube_rho, 0);
+                                         rho_bld->type, cube_rho, 0);
       }
       else {
          rho = lp_build_swizzle_scalar_aos(coord_bld, cube_rho, 0, 4);
       }
       if (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) {
-         rho = lp_build_sqrt(levelf_bld, rho);
+         rho = lp_build_sqrt(rho_bld, rho);
       }
       /* Could optimize this for single quad just skip the broadcast */
       cubesize = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
-                                            levelf_bld->type, float_size, index0);
-      rho = lp_build_mul(levelf_bld, cubesize, rho);
+                                            rho_bld->type, float_size, index0);
+      rho = lp_build_mul(rho_bld, cubesize, rho);
    }
    else if (derivs && !(bld->static_texture_state->target == PIPE_TEXTURE_CUBE)) {
       LLVMValueRef ddmax[3], ddx[3], ddy[3];
@@ -311,9 +311,9 @@ lp_build_rho(struct lp_build_sample_context *bld,
              * otherwise would also need different code to per-pixel lod case.
              */
             rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                            levelf_bld->type, rho, 0);
+                                            rho_bld->type, rho, 0);
          }
-         rho = lp_build_sqrt(levelf_bld, rho);
+         rho = lp_build_sqrt(rho_bld, rho);
 
       }
       else {
@@ -329,7 +329,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
              * rho_vec contains per-pixel rho, convert to scalar per quad.
              */
             rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                            levelf_bld->type, rho, 0);
+                                            rho_bld->type, rho, 0);
          }
       }
    }
@@ -404,7 +404,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
 
          if (rho_per_quad) {
             rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                            levelf_bld->type, rho, 0);
+                                            rho_bld->type, rho, 0);
          }
          else {
             /*
@@ -416,7 +416,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
              */
             rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
          }
-         rho = lp_build_sqrt(levelf_bld, rho);
+         rho = lp_build_sqrt(rho_bld, rho);
       }
       else {
          ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
@@ -497,7 +497,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
             }
             if (rho_per_quad) {
                rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                               levelf_bld->type, rho, 0);
+                                               rho_bld->type, rho, 0);
             }
             else {
                rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
@@ -528,7 +528,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
                }
             }
             if (!rho_per_quad) {
-               rho = lp_build_broadcast_scalar(levelf_bld, rho);
+               rho = lp_build_broadcast_scalar(rho_bld, rho);
             }
          }
       }
@@ -675,8 +675,7 @@ lp_build_brilinear_rho(struct lp_build_context *bld,
  * \param out_lod_fpart  float part of lod (never larger than 1 but may be negative)
  * \param out_lod_positive  (mask) if lod is positive (i.e. texture is minified)
  *
- * The resulting lod is scalar per quad, so only the first value per quad
- * passed in from lod_bias, explicit_lod is used.
+ * The resulting lod can be scalar per quad or be per element.
  */
 void
 lp_build_lod_selector(struct lp_build_sample_context *bld,
@@ -696,12 +695,12 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
 
 {
    LLVMBuilderRef builder = bld->gallivm->builder;
-   struct lp_build_context *levelf_bld = &bld->levelf_bld;
+   struct lp_build_context *lodf_bld = &bld->lodf_bld;
    LLVMValueRef lod;
 
-   *out_lod_ipart = bld->leveli_bld.zero;
-   *out_lod_positive = bld->leveli_bld.zero;
-   *out_lod_fpart = levelf_bld->zero;
+   *out_lod_ipart = bld->lodi_bld.zero;
+   *out_lod_positive = bld->lodi_bld.zero;
+   *out_lod_fpart = lodf_bld->zero;
 
    /*
     * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture Magnification:
@@ -729,13 +728,13 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
          bld->dynamic_state->min_lod(bld->dynamic_state,
                                      bld->gallivm, sampler_unit);
 
-      lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
+      lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
    }
    else {
       if (explicit_lod) {
          if (bld->num_lods != bld->coord_type.length)
             lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
-                                            levelf_bld->type, explicit_lod, 0);
+                                            lodf_bld->type, explicit_lod, 0);
          else
             lod = explicit_lod;
       }
@@ -764,33 +763,33 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
                 * Don't actually need both all the time, ipart is needed
                 * for nearest mipfilter, pos_or_zero if min != mag.
                 */
-               *out_lod_ipart = lp_build_ilog2(levelf_bld, rho);
-               *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
-                                                rho, levelf_bld->one);
+               *out_lod_ipart = lp_build_ilog2(lodf_bld, rho);
+               *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
+                                                rho, lodf_bld->one);
                return;
             }
             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
                 !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
-               lp_build_brilinear_rho(levelf_bld, rho, BRILINEAR_FACTOR,
+               lp_build_brilinear_rho(lodf_bld, rho, BRILINEAR_FACTOR,
                                       out_lod_ipart, out_lod_fpart);
-               *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
-                                                rho, levelf_bld->one);
+               *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
+                                                rho, lodf_bld->one);
                return;
             }
          }
 
          if (0) {
-            lod = lp_build_log2(levelf_bld, rho);
+            lod = lp_build_log2(lodf_bld, rho);
          }
          else {
-            lod = lp_build_fast_log2(levelf_bld, rho);
+            lod = lp_build_fast_log2(lodf_bld, rho);
          }
 
          /* add shader lod bias */
          if (lod_bias) {
             if (bld->num_lods != bld->coord_type.length)
                lod_bias = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
-                                                    levelf_bld->type, lod_bias, 0);
+                                                    lodf_bld->type, lod_bias, 0);
             lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
          }
       }
@@ -800,7 +799,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
          LLVMValueRef sampler_lod_bias =
             bld->dynamic_state->lod_bias(bld->dynamic_state,
                                          bld->gallivm, sampler_unit);
-         sampler_lod_bias = lp_build_broadcast_scalar(levelf_bld,
+         sampler_lod_bias = lp_build_broadcast_scalar(lodf_bld,
                                                       sampler_lod_bias);
          lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
       }
@@ -810,36 +809,36 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
          LLVMValueRef max_lod =
             bld->dynamic_state->max_lod(bld->dynamic_state,
                                         bld->gallivm, sampler_unit);
-         max_lod = lp_build_broadcast_scalar(levelf_bld, max_lod);
+         max_lod = lp_build_broadcast_scalar(lodf_bld, max_lod);
 
-         lod = lp_build_min(levelf_bld, lod, max_lod);
+         lod = lp_build_min(lodf_bld, lod, max_lod);
       }
       if (bld->static_sampler_state->apply_min_lod) {
          LLVMValueRef min_lod =
             bld->dynamic_state->min_lod(bld->dynamic_state,
                                         bld->gallivm, sampler_unit);
-         min_lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
+         min_lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
 
-         lod = lp_build_max(levelf_bld, lod, min_lod);
+         lod = lp_build_max(lodf_bld, lod, min_lod);
       }
    }
 
-   *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
-                                    lod, levelf_bld->zero);
+   *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
+                                    lod, lodf_bld->zero);
 
    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
       if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
-         lp_build_brilinear_lod(levelf_bld, lod, BRILINEAR_FACTOR,
+         lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR,
                                 out_lod_ipart, out_lod_fpart);
       }
       else {
-         lp_build_ifloor_fract(levelf_bld, lod, out_lod_ipart, out_lod_fpart);
+         lp_build_ifloor_fract(lodf_bld, lod, out_lod_ipart, out_lod_fpart);
       }
 
       lp_build_name(*out_lod_fpart, "lod_fpart");
    }
    else {
-      *out_lod_ipart = lp_build_iround(levelf_bld, lod);
+      *out_lod_ipart = lp_build_iround(lodf_bld, lod);
    }
 
    lp_build_name(*out_lod_ipart, "lod_ipart");
@@ -880,14 +879,14 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
       out = lp_build_cmp(leveli_bld, PIPE_FUNC_LESS, level, first_level);
       out1 = lp_build_cmp(leveli_bld, PIPE_FUNC_GREATER, level, last_level);
       out = lp_build_or(leveli_bld, out, out1);
-      if (bld->num_lods == bld->coord_bld.type.length) {
+      if (bld->num_mips == bld->coord_bld.type.length) {
          *out_of_bounds = out;
       }
-      else if (bld->num_lods == 1) {
+      else if (bld->num_mips == 1) {
          *out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld, out);
       }
       else {
-         assert(bld->num_lods == bld->coord_bld.type.length / 4);
+         assert(bld->num_mips == bld->coord_bld.type.length / 4);
          *out_of_bounds = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
                                                                 leveli_bld->type,
                                                                 bld->int_coord_bld.type,
@@ -904,8 +903,9 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
 
 
 /**
- * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad int LOD(s) to two (per-quad)
- * (adjacent) mipmap level indexes, and fix up float lod part accordingly.
+ * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad (or per element) int LOD(s)
+ * to two (per-quad) (adjacent) mipmap level indexes, and fix up float lod
+ * part accordingly.
  * Later, we'll sample from those two mipmap levels and interpolate between them.
  */
 void
@@ -923,6 +923,8 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
    LLVMValueRef clamp_min;
    LLVMValueRef clamp_max;
 
+   assert(bld->num_lods == bld->num_mips);
+
    first_level = bld->dynamic_state->first_level(bld->dynamic_state,
                                                  bld->gallivm, texture_unit);
    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
@@ -1013,17 +1015,17 @@ lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
    LLVMValueRef indexes[2], offsets, offset1;
 
    indexes[0] = lp_build_const_int32(bld->gallivm, 0);
-   if (bld->num_lods == 1) {
+   if (bld->num_mips == 1) {
       indexes[1] = level;
       offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
       offset1 = LLVMBuildLoad(builder, offset1, "");
       offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
    }
-   else if (bld->num_lods == bld->coord_bld.type.length / 4) {
+   else if (bld->num_mips == bld->coord_bld.type.length / 4) {
       unsigned i;
 
       offsets = bld->int_coord_bld.undef;
-      for (i = 0; i < bld->num_lods; i++) {
+      for (i = 0; i < bld->num_mips; i++) {
          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
          LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
          indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
@@ -1036,10 +1038,10 @@ lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
    else {
       unsigned i;
 
-      assert (bld->num_lods == bld->coord_bld.type.length);
+      assert (bld->num_mips == bld->coord_bld.type.length);
 
       offsets = bld->int_coord_bld.undef;
-      for (i = 0; i < bld->num_lods; i++) {
+      for (i = 0; i < bld->num_mips; i++) {
          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
          indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
          offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
@@ -1089,18 +1091,18 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
    LLVMBuilderRef builder = bld->gallivm->builder;
    LLVMValueRef indexes[2], stride, stride1;
    indexes[0] = lp_build_const_int32(bld->gallivm, 0);
-   if (bld->num_lods == 1) {
+   if (bld->num_mips == 1) {
       indexes[1] = level;
       stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
       stride1 = LLVMBuildLoad(builder, stride1, "");
       stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
    }
-   else if (bld->num_lods == bld->coord_bld.type.length / 4) {
+   else if (bld->num_mips == bld->coord_bld.type.length / 4) {
       LLVMValueRef stride1;
       unsigned i;
 
       stride = bld->int_coord_bld.undef;
-      for (i = 0; i < bld->num_lods; i++) {
+      for (i = 0; i < bld->num_mips; i++) {
          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
          LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
          indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
@@ -1114,7 +1116,7 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
       LLVMValueRef stride1;
       unsigned i;
 
-      assert (bld->num_lods == bld->coord_bld.type.length);
+      assert (bld->num_mips == bld->coord_bld.type.length);
 
       stride = bld->int_coord_bld.undef;
       for (i = 0; i < bld->coord_bld.type.length; i++) {
@@ -1147,7 +1149,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
    /*
     * Compute width, height, depth at mipmap level 'ilevel'
     */
-   if (bld->num_lods == 1) {
+   if (bld->num_mips == 1) {
       ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
       *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
    }
@@ -1157,7 +1159,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
       unsigned num_quads = bld->coord_bld.type.length / 4;
       unsigned i;
 
-      if (bld->num_lods == num_quads) {
+      if (bld->num_mips == num_quads) {
          /*
           * XXX: this should be #ifndef SANE_INSTRUCTION_SET.
           * intel "forgot" the variable shift count instruction until avx2.
@@ -1216,7 +1218,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
          * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
          * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] vector.
          */
-         assert(bld->num_lods == bld->coord_bld.type.length);
+         assert(bld->num_mips == bld->coord_bld.type.length);
          if (bld->dims == 1) {
             assert(bld->int_size_in_bld.type.length == 1);
             int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
@@ -1226,7 +1228,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
          }
          else {
             LLVMValueRef ilevel1;
-            for (i = 0; i < bld->num_lods; i++) {
+            for (i = 0; i < bld->num_mips; i++) {
                LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
                ilevel1 = lp_build_extract_broadcast(bld->gallivm, bld->int_coord_type,
                                                     bld->int_size_in_bld.type, ilevel, indexi);
@@ -1235,7 +1237,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
             }
             *out_size = lp_build_concat(bld->gallivm, tmp,
                                         bld->int_size_in_bld.type,
-                                        bld->num_lods);
+                                        bld->num_mips);
          }
       }
    }
@@ -1278,7 +1280,7 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
    struct lp_type size_type = size_bld->type;
 
-   if (bld->num_lods == 1) {
+   if (bld->num_mips == 1) {
       *out_width = lp_build_extract_broadcast(bld->gallivm,
                                               size_type,
                                               coord_type,
@@ -1305,7 +1307,7 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
       if (dims == 1) {
          *out_width = size;
       }
-      else if (bld->num_lods == num_quads) {
+      else if (bld->num_mips == num_quads) {
          *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4);
          if (dims >= 2) {
             *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4);
@@ -1315,7 +1317,7 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
          }
       }
       else {
-         assert(bld->num_lods == bld->coord_type.length);
+         assert(bld->num_mips == bld->coord_type.length);
          *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
                                                 coord_type, size, 0);
          if (dims >= 2) {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index a7ebe7e9ed8..e6b9f30d7bb 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -233,7 +233,10 @@ struct lp_build_sample_context
    /** SIMD vector width */
    unsigned vector_width;
 
-   /**Â number of lod values (valid are 1, length/4, length) */
+   /** number of mipmaps (valid are 1, length/4, length) */
+   unsigned num_mips;
+
+   /** number of lod values (valid are 1, length/4, length) */
    unsigned num_lods;
 
    /** regular scalar float type */
@@ -283,6 +286,14 @@ struct lp_build_sample_context
    struct lp_type leveli_type;
    struct lp_build_context leveli_bld;
 
+   /** Float lod type */
+   struct lp_type lodf_type;
+   struct lp_build_context lodf_bld;
+
+   /** Int lod type */
+   struct lp_type lodi_type;
+   struct lp_build_context lodi_bld;
+
    /* Common dynamic state values */
    LLVMValueRef row_stride_array;
    LLVMValueRef img_stride_array;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
index 7431388812d..c35b628270e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -1373,7 +1373,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
    lp_build_mipmap_level_sizes(bld, ilevel0,
                                &size0,
                                &row_stride0_vec, &img_stride0_vec);
-   if (bld->num_lods == 1) {
+   if (bld->num_mips == 1) {
       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
    }
    else {
@@ -1422,8 +1422,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 
    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
       LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
-                                                     bld->levelf_bld.type, 256.0);
-      LLVMTypeRef i32vec_type = bld->leveli_bld.vec_type;
+                                                     bld->lodf_bld.type, 256.0);
+      LLVMTypeRef i32vec_type = bld->lodi_bld.vec_type;
       struct lp_build_if_state if_ctx;
       LLVMValueRef need_lerp;
       unsigned num_quads = bld->coord_bld.type.length / 4;
@@ -1435,7 +1435,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
       /* need_lerp = lod_fpart > 0 */
       if (bld->num_lods == 1) {
          need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
-                                   lod_fpart, bld->leveli_bld.zero,
+                                   lod_fpart, bld->lodi_bld.zero,
                                    "need_lerp");
       }
       else {
@@ -1450,9 +1450,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
           * lod_fpart values have same sign.
           * We can however then skip the greater than comparison.
           */
-         lod_fpart = lp_build_max(&bld->leveli_bld, lod_fpart,
-                                  bld->leveli_bld.zero);
-         need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, lod_fpart);
+         lod_fpart = lp_build_max(&bld->lodi_bld, lod_fpart,
+                                  bld->lodi_bld.zero);
+         need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_fpart);
       }
 
       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
@@ -1465,7 +1465,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
          lp_build_mipmap_level_sizes(bld, ilevel1,
                                      &size1,
                                      &row_stride1_vec, &img_stride1_vec);
-         if (bld->num_lods == 1) {
+         if (bld->num_mips == 1) {
             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
          }
          else {
@@ -1524,7 +1524,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
          }
          else {
             unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods;
-            LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->leveli_bld.type.length);
+            LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->lodi_bld.type.length);
             LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
 
             /* Take the LSB of lod_fpart */
@@ -1613,7 +1613,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
        * some max probably could hack up the weights in the linear
        * path with selects to work for nearest.
        */
-      if (bld->leveli_bld.type.length > 1)
+      if (bld->num_lods > 1)
          lod_positive = LLVMBuildExtractElement(builder, lod_positive,
                                                 lp_build_const_int32(bld->gallivm, 0), "");
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 8ad3b9f246a..c686d82de57 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -1087,7 +1087,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
    lp_build_mipmap_level_sizes(bld, ilevel0,
                                &size0,
                                &row_stride0_vec, &img_stride0_vec);
-   if (bld->num_lods == 1) {
+   if (bld->num_mips == 1) {
       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
    }
    else {
@@ -1123,7 +1123,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
       /* need_lerp = lod_fpart > 0 */
       if (bld->num_lods == 1) {
          need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
-                                   lod_fpart, bld->levelf_bld.zero,
+                                   lod_fpart, bld->lodf_bld.zero,
                                    "need_lerp");
       }
       else {
@@ -1138,12 +1138,12 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
           * negative values which would screw up filtering if not all
           * lod_fpart values have same sign.
           */
-         lod_fpart = lp_build_max(&bld->levelf_bld, lod_fpart,
-                                  bld->levelf_bld.zero);
-         need_lerp = lp_build_compare(bld->gallivm, bld->levelf_bld.type,
+         lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
+                                  bld->lodf_bld.zero);
+         need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
                                       PIPE_FUNC_GREATER,
-                                      lod_fpart, bld->levelf_bld.zero);
-         need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, need_lerp);
+                                      lod_fpart, bld->lodf_bld.zero);
+         need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
       }
 
       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
@@ -1152,7 +1152,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
          lp_build_mipmap_level_sizes(bld, ilevel1,
                                      &size1,
                                      &row_stride1_vec, &img_stride1_vec);
-         if (bld->num_lods == 1) {
+         if (bld->num_mips == 1) {
             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
          }
          else {
@@ -1178,7 +1178,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 
          if (bld->num_lods != bld->coord_type.length)
             lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
-                                                              bld->levelf_bld.type,
+                                                              bld->lodf_bld.type,
                                                               bld->texel_bld.type,
                                                               lod_fpart);
 
@@ -1312,8 +1312,14 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
                             mip_filter,
                             &lod_ipart, lod_fpart, lod_pos_or_zero);
    } else {
-      lod_ipart = bld->leveli_bld.zero;
-      *lod_pos_or_zero = bld->leveli_bld.zero;
+      lod_ipart = bld->lodi_bld.zero;
+      *lod_pos_or_zero = bld->lodi_bld.zero;
+   }
+
+   if (bld->num_lods != bld->num_mips) {
+      /* only makes sense if there's just a single mip level */
+      assert(bld->num_mips == 1);
+      lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
    }
 
    /*
@@ -1641,7 +1647,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
        * some max probably could hack up the weights in the linear
        * path with selects to work for nearest.
        */
-      if (bld->leveli_bld.type.length > 1)
+      if (bld->num_lods > 1)
          lod_positive = LLVMBuildExtractElement(builder, lod_positive,
                                                 lp_build_const_int32(bld->gallivm, 0), "");
 
@@ -1692,7 +1698,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
                      const LLVMValueRef *offsets,
                      LLVMValueRef *colors_out)
 {
-   struct lp_build_context *perquadi_bld = &bld->leveli_bld;
+   struct lp_build_context *perquadi_bld = &bld->lodi_bld;
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    unsigned dims = bld->dims, chan;
    unsigned target = bld->static_texture_state->target;
@@ -1706,7 +1712,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
    out_of_bounds = int_coord_bld->zero;
 
    if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
-      if (bld->num_lods != int_coord_bld->type.length) {
+      if (bld->num_mips != int_coord_bld->type.length) {
          ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
                                             perquadi_bld->type, explicit_lod, 0);
       }
@@ -1717,7 +1723,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
                                  out_of_bound_ret_zero ? &out_of_bounds : NULL);
    }
    else {
-      assert(bld->num_lods == 1);
+      assert(bld->num_mips == 1);
       if (bld->static_texture_state->target != PIPE_BUFFER) {
          ilevel = bld->dynamic_state->first_level(bld->dynamic_state,
                                                   bld->gallivm, texture_unit);
@@ -1856,7 +1862,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
    unsigned target = static_texture_state->target;
    unsigned dims = texture_dims(target);
    unsigned num_quads = type.length / 4;
-   unsigned mip_filter, i;
+   unsigned mip_filter, min_img_filter, mag_img_filter, i;
    struct lp_build_sample_context bld;
    struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
@@ -1919,6 +1925,10 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
       debug_printf("  .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
    }
 
+   min_img_filter = static_sampler_state->min_img_filter;
+   mag_img_filter = static_sampler_state->mag_img_filter;
+
+
    /*
     * This is all a bit complicated different paths are chosen for performance
     * reasons.
@@ -1936,38 +1946,51 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
    /*
     * There are other situations where at least the multiple int lods could be
     * avoided like min and max lod being equal.
-    * XXX if num_lods == 1 (for multiple quads) the level bld contexts will still
-    * have length 4. Because lod_selector is always using per quad calcs in this
-    * case, but minification etc. don't need to bother. This is very brittle though
-    * e.g. num_lods might be 1 but still have multiple positive_lod values!
     */
+   bld.num_mips = bld.num_lods = 1;
    if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
        (explicit_lod || lod_bias ||
-        (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE)) &&
-       ((is_fetch && target != PIPE_BUFFER) ||
-        (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
-      bld.num_lods = type.length;
+        (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) {
+      if ((is_fetch && target != PIPE_BUFFER) ||
+          (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+         bld.num_mips = type.length;
+         bld.num_lods = type.length;
+      }
+      else if (!is_fetch && min_img_filter != mag_img_filter) {
+         bld.num_mips = 1;
+         bld.num_lods = type.length;
+      }
+   }
    /* TODO: for true scalar_lod should only use 1 lod value */
-   else if ((is_fetch && explicit_lod && target != PIPE_BUFFER ) ||
+   else if ((is_fetch && explicit_lod && target != PIPE_BUFFER) ||
             (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+      bld.num_mips = num_quads;
       bld.num_lods = num_quads;
    }
-   else {
-      bld.num_lods = 1;
+   else if (!is_fetch && min_img_filter != mag_img_filter) {
+      bld.num_mips = 1;
+      bld.num_lods = num_quads;
    }
 
-   bld.levelf_type = type;
+
+   bld.lodf_type = type;
    /* we want native vector size to be able to use our intrinsics */
    if (bld.num_lods != type.length) {
-      bld.levelf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
+      /* TODO: this currently always has to be per-quad or per-element */
+      bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
+   }
+   bld.lodi_type = lp_int_type(bld.lodf_type);
+   bld.levelf_type = bld.lodf_type;
+   if (bld.num_mips == 1) {
+      bld.levelf_type.length = 1;
    }
    bld.leveli_type = lp_int_type(bld.levelf_type);
    bld.float_size_type = bld.float_size_in_type;
    /* Note: size vectors may not be native. They contain minified w/h/d/_ values,
     * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
-   if (bld.num_lods > 1) {
-      bld.float_size_type.length = bld.num_lods == type.length ?
-                                      bld.num_lods * bld.float_size_in_type.length :
+   if (bld.num_mips > 1) {
+      bld.float_size_type.length = bld.num_mips == type.length ?
+                                      bld.num_mips * bld.float_size_in_type.length :
                                       type.length;
    }
    bld.int_size_type = lp_int_type(bld.float_size_type);
@@ -1984,6 +2007,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
    lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
    lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
    lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
+   lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
+   lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
 
    /* Get the dynamic state */
    tex_width = dynamic_state->width(dynamic_state, gallivm, texture_index);
@@ -2071,16 +2096,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
        * (It should be faster if we'd support avx2)
        */
       if (num_quads == 1 || !use_aos) {
-
-         if (num_quads > 1) {
-            if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
-               LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
-               /*
-                * This parameter is the same for all quads could probably simplify.
-                */
-               ilevel0 = LLVMBuildExtractElement(builder, ilevel0, index0, "");
-            }
-         }
          if (use_aos) {
             /* do sampling/filtering with fixed pt arithmetic */
             lp_build_sample_aos(&bld, sampler_index,
@@ -2134,30 +2149,37 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
          bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
          bld4.texel_type = bld.texel_type;
          bld4.texel_type.length = 4;
-         bld4.levelf_type = type4;
-         /* we want native vector size to be able to use our intrinsics */
-         bld4.levelf_type.length = 1;
-         bld4.leveli_type = lp_int_type(bld4.levelf_type);
 
+         bld4.num_mips = bld4.num_lods = 1;
          if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
-               (explicit_lod || lod_bias ||
-                (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE)) &&
-               ((is_fetch && target != PIPE_BUFFER) ||
-                (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
-            bld4.num_lods = type4.length;
-         else
-            bld4.num_lods = 1;
+             (explicit_lod || lod_bias ||
+              (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) {
+            if ((is_fetch && target != PIPE_BUFFER) ||
+                (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+               bld4.num_mips = type4.length;
+               bld4.num_lods = type4.length;
+            }
+            else if (!is_fetch && min_img_filter != mag_img_filter) {
+               bld4.num_mips = 1;
+               bld4.num_lods = type4.length;
+            }
+         }
 
-         bld4.levelf_type = type4;
          /* we want native vector size to be able to use our intrinsics */
+         bld4.lodf_type = type4;
          if (bld4.num_lods != type4.length) {
+            bld4.lodf_type.length = 1;
+         }
+         bld4.lodi_type = lp_int_type(bld4.lodf_type);
+         bld4.levelf_type = type4;
+         if (bld4.num_mips != type4.length) {
             bld4.levelf_type.length = 1;
          }
          bld4.leveli_type = lp_int_type(bld4.levelf_type);
          bld4.float_size_type = bld4.float_size_in_type;
-         if (bld4.num_lods > 1) {
-            bld4.float_size_type.length = bld4.num_lods == type4.length ?
-                                            bld4.num_lods * bld4.float_size_in_type.length :
+         if (bld4.num_mips > 1) {
+            bld4.float_size_type.length = bld4.num_mips == type4.length ?
+                                            bld4.num_mips * bld4.float_size_in_type.length :
                                             type4.length;
          }
          bld4.int_size_type = lp_int_type(bld4.float_size_type);
@@ -2174,6 +2196,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
          lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
          lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
          lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
+         lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
+         lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
 
          for (i = 0; i < num_quads; i++) {
             LLVMValueRef s4, t4, r4;
@@ -2196,7 +2220,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
                }
             }
             lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
-            ilevel04 = lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
+            ilevel04 = bld.num_mips == 1 ? ilevel0 :
+                          lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
                ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
                lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
-- 
2.30.2