gallivm: refactor num_lods handling

author Roland Scheidegger <sroland@vmware.com>

Wed, 28 Aug 2013 14:26:43 +0000 (16:26 +0200)

committer Roland Scheidegger <sroland@vmware.com>

Fri, 30 Aug 2013 00:16:45 +0000 (02:16 +0200)
author Roland Scheidegger <sroland@vmware.com>
Wed, 28 Aug 2013 14:26:43 +0000 (16:26 +0200)
committer Roland Scheidegger <sroland@vmware.com>
Fri, 30 Aug 2013 00:16:45 +0000 (02:16 +0200)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c

index 89d72494be0e2021a7389539e5e69130a6509d7d..e1cfd78e885bca0cb493333f73cb09dda53996a8 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -217,7 +217,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
     struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
     struct lp_build_context *float_bld = &bld->float_bld;
     struct lp_build_context *coord_bld = &bld->coord_bld;
-   struct lp_build_context *levelf_bld = &bld->levelf_bld;
+   struct lp_build_context *rho_bld = &bld->lodf_bld;
     const unsigned dims = bld->dims;
     LLVMValueRef ddx_ddy[2];
     LLVMBuilderRef builder = bld->gallivm->builder;
@@ -231,7 +231,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
     LLVMValueRef first_level, first_level_vec;
     unsigned length = coord_bld->type.length;
     unsigned num_quads = length / 4;
-   boolean rho_per_quad = levelf_bld->type.length != length;
+   boolean rho_per_quad = rho_bld->type.length != length;
     unsigned i;
     LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
     LLVMValueRef rho_xvec, rho_yvec;
@@ -259,18 +259,18 @@ lp_build_rho(struct lp_build_sample_context *bld,
         */
        if (rho_per_quad) {
           rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                         levelf_bld->type, cube_rho, 0);
+                                         rho_bld->type, cube_rho, 0);
        }
        else {
           rho = lp_build_swizzle_scalar_aos(coord_bld, cube_rho, 0, 4);
        }
        if (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) {
-         rho = lp_build_sqrt(levelf_bld, rho);
+         rho = lp_build_sqrt(rho_bld, rho);
        }
        /* Could optimize this for single quad just skip the broadcast */
        cubesize = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
-                                            levelf_bld->type, float_size, index0);
-      rho = lp_build_mul(levelf_bld, cubesize, rho);
+                                            rho_bld->type, float_size, index0);
+      rho = lp_build_mul(rho_bld, cubesize, rho);
     }
     else if (derivs && !(bld->static_texture_state->target == PIPE_TEXTURE_CUBE)) {
        LLVMValueRef ddmax[3], ddx[3], ddy[3];
@@ -311,9 +311,9 @@ lp_build_rho(struct lp_build_sample_context *bld,
               * otherwise would also need different code to per-pixel lod case.
               */
              rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                            levelf_bld->type, rho, 0);
+                                            rho_bld->type, rho, 0);
           }
-         rho = lp_build_sqrt(levelf_bld, rho);
+         rho = lp_build_sqrt(rho_bld, rho);
  
        }
        else {
@@ -329,7 +329,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
               * rho_vec contains per-pixel rho, convert to scalar per quad.
               */
              rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                            levelf_bld->type, rho, 0);
+                                            rho_bld->type, rho, 0);
           }
        }
     }
@@ -404,7 +404,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
  
           if (rho_per_quad) {
              rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                            levelf_bld->type, rho, 0);
+                                            rho_bld->type, rho, 0);
           }
           else {
              /*
@@ -416,7 +416,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
               */
              rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
           }
-         rho = lp_build_sqrt(levelf_bld, rho);
+         rho = lp_build_sqrt(rho_bld, rho);
        }
        else {
           ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
@@ -497,7 +497,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
              }
              if (rho_per_quad) {
                 rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                               levelf_bld->type, rho, 0);
+                                               rho_bld->type, rho, 0);
              }
              else {
                 rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
@@ -528,7 +528,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
                 }
              }
              if (!rho_per_quad) {
-               rho = lp_build_broadcast_scalar(levelf_bld, rho);
+               rho = lp_build_broadcast_scalar(rho_bld, rho);
              }
           }
        }
@@ -675,8 +675,7 @@ lp_build_brilinear_rho(struct lp_build_context *bld,
   * \param out_lod_fpart  float part of lod (never larger than 1 but may be negative)
   * \param out_lod_positive  (mask) if lod is positive (i.e. texture is minified)
   *
- * The resulting lod is scalar per quad, so only the first value per quad
- * passed in from lod_bias, explicit_lod is used.
+ * The resulting lod can be scalar per quad or be per element.
   */
  void
  lp_build_lod_selector(struct lp_build_sample_context *bld,
@@ -696,12 +695,12 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
  
  {
     LLVMBuilderRef builder = bld->gallivm->builder;
-   struct lp_build_context *levelf_bld = &bld->levelf_bld;
+   struct lp_build_context *lodf_bld = &bld->lodf_bld;
     LLVMValueRef lod;
  
-   *out_lod_ipart = bld->leveli_bld.zero;
-   *out_lod_positive = bld->leveli_bld.zero;
-   *out_lod_fpart = levelf_bld->zero;
+   *out_lod_ipart = bld->lodi_bld.zero;
+   *out_lod_positive = bld->lodi_bld.zero;
+   *out_lod_fpart = lodf_bld->zero;
  
     /*
      * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture Magnification:
@@ -729,13 +728,13 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
           bld->dynamic_state->min_lod(bld->dynamic_state,
                                       bld->gallivm, sampler_unit);
  
-      lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
+      lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
     }
     else {
        if (explicit_lod) {
           if (bld->num_lods != bld->coord_type.length)
              lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
-                                            levelf_bld->type, explicit_lod, 0);
+                                            lodf_bld->type, explicit_lod, 0);
           else
              lod = explicit_lod;
        }
@@ -764,33 +763,33 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
                  * Don't actually need both all the time, ipart is needed
                  * for nearest mipfilter, pos_or_zero if min != mag.
                  */
-               *out_lod_ipart = lp_build_ilog2(levelf_bld, rho);
-               *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
-                                                rho, levelf_bld->one);
+               *out_lod_ipart = lp_build_ilog2(lodf_bld, rho);
+               *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
+                                                rho, lodf_bld->one);
                 return;
              }
              if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
                  !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
-               lp_build_brilinear_rho(levelf_bld, rho, BRILINEAR_FACTOR,
+               lp_build_brilinear_rho(lodf_bld, rho, BRILINEAR_FACTOR,
                                        out_lod_ipart, out_lod_fpart);
-               *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
-                                                rho, levelf_bld->one);
+               *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
+                                                rho, lodf_bld->one);
                 return;
              }
           }
  
           if (0) {
-            lod = lp_build_log2(levelf_bld, rho);
+            lod = lp_build_log2(lodf_bld, rho);
           }
           else {
-            lod = lp_build_fast_log2(levelf_bld, rho);
+            lod = lp_build_fast_log2(lodf_bld, rho);
           }
  
           /* add shader lod bias */
           if (lod_bias) {
              if (bld->num_lods != bld->coord_type.length)
                 lod_bias = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
-                                                    levelf_bld->type, lod_bias, 0);
+                                                    lodf_bld->type, lod_bias, 0);
              lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
           }
        }
@@ -800,7 +799,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
           LLVMValueRef sampler_lod_bias =
              bld->dynamic_state->lod_bias(bld->dynamic_state,
                                           bld->gallivm, sampler_unit);
-         sampler_lod_bias = lp_build_broadcast_scalar(levelf_bld,
+         sampler_lod_bias = lp_build_broadcast_scalar(lodf_bld,
                                                        sampler_lod_bias);
           lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
        }
@@ -810,36 +809,36 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
           LLVMValueRef max_lod =
              bld->dynamic_state->max_lod(bld->dynamic_state,
                                          bld->gallivm, sampler_unit);
-         max_lod = lp_build_broadcast_scalar(levelf_bld, max_lod);
+         max_lod = lp_build_broadcast_scalar(lodf_bld, max_lod);
  
-         lod = lp_build_min(levelf_bld, lod, max_lod);
+         lod = lp_build_min(lodf_bld, lod, max_lod);
        }
        if (bld->static_sampler_state->apply_min_lod) {
           LLVMValueRef min_lod =
              bld->dynamic_state->min_lod(bld->dynamic_state,
                                          bld->gallivm, sampler_unit);
-         min_lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
+         min_lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
  
-         lod = lp_build_max(levelf_bld, lod, min_lod);
+         lod = lp_build_max(lodf_bld, lod, min_lod);
        }
     }
  
-   *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
-                                    lod, levelf_bld->zero);
+   *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
+                                    lod, lodf_bld->zero);
  
     if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
        if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
-         lp_build_brilinear_lod(levelf_bld, lod, BRILINEAR_FACTOR,
+         lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR,
                                  out_lod_ipart, out_lod_fpart);
        }
        else {
-         lp_build_ifloor_fract(levelf_bld, lod, out_lod_ipart, out_lod_fpart);
+         lp_build_ifloor_fract(lodf_bld, lod, out_lod_ipart, out_lod_fpart);
        }
  
        lp_build_name(*out_lod_fpart, "lod_fpart");
     }
     else {
-      *out_lod_ipart = lp_build_iround(levelf_bld, lod);
+      *out_lod_ipart = lp_build_iround(lodf_bld, lod);
     }
  
     lp_build_name(*out_lod_ipart, "lod_ipart");
@@ -880,14 +879,14 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
        out = lp_build_cmp(leveli_bld, PIPE_FUNC_LESS, level, first_level);
        out1 = lp_build_cmp(leveli_bld, PIPE_FUNC_GREATER, level, last_level);
        out = lp_build_or(leveli_bld, out, out1);
-      if (bld->num_lods == bld->coord_bld.type.length) {
+      if (bld->num_mips == bld->coord_bld.type.length) {
           *out_of_bounds = out;
        }
-      else if (bld->num_lods == 1) {
+      else if (bld->num_mips == 1) {
           *out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld, out);
        }
        else {
-         assert(bld->num_lods == bld->coord_bld.type.length / 4);
+         assert(bld->num_mips == bld->coord_bld.type.length / 4);
           *out_of_bounds = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
                                                                  leveli_bld->type,
                                                                  bld->int_coord_bld.type,
@@ -904,8 +903,9 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
  
  
  /**
- * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad int LOD(s) to two (per-quad)
- * (adjacent) mipmap level indexes, and fix up float lod part accordingly.
+ * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad (or per element) int LOD(s)
+ * to two (per-quad) (adjacent) mipmap level indexes, and fix up float lod
+ * part accordingly.
   * Later, we'll sample from those two mipmap levels and interpolate between them.
   */
  void
@@ -923,6 +923,8 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
     LLVMValueRef clamp_min;
     LLVMValueRef clamp_max;
  
+   assert(bld->num_lods == bld->num_mips);
+
     first_level = bld->dynamic_state->first_level(bld->dynamic_state,
                                                   bld->gallivm, texture_unit);
     last_level = bld->dynamic_state->last_level(bld->dynamic_state,
@@ -1013,17 +1015,17 @@ lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
     LLVMValueRef indexes[2], offsets, offset1;
  
     indexes[0] = lp_build_const_int32(bld->gallivm, 0);
-   if (bld->num_lods == 1) {
+   if (bld->num_mips == 1) {
        indexes[1] = level;
        offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
        offset1 = LLVMBuildLoad(builder, offset1, "");
        offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
     }
-   else if (bld->num_lods == bld->coord_bld.type.length / 4) {
+   else if (bld->num_mips == bld->coord_bld.type.length / 4) {
        unsigned i;
  
        offsets = bld->int_coord_bld.undef;
-      for (i = 0; i < bld->num_lods; i++) {
+      for (i = 0; i < bld->num_mips; i++) {
           LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
           LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
           indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
@@ -1036,10 +1038,10 @@ lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
     else {
        unsigned i;
  
-      assert (bld->num_lods == bld->coord_bld.type.length);
+      assert (bld->num_mips == bld->coord_bld.type.length);
  
        offsets = bld->int_coord_bld.undef;
-      for (i = 0; i < bld->num_lods; i++) {
+      for (i = 0; i < bld->num_mips; i++) {
           LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
           indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
           offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
@@ -1089,18 +1091,18 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
     LLVMBuilderRef builder = bld->gallivm->builder;
     LLVMValueRef indexes[2], stride, stride1;
     indexes[0] = lp_build_const_int32(bld->gallivm, 0);
-   if (bld->num_lods == 1) {
+   if (bld->num_mips == 1) {
        indexes[1] = level;
        stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
        stride1 = LLVMBuildLoad(builder, stride1, "");
        stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
     }
-   else if (bld->num_lods == bld->coord_bld.type.length / 4) {
+   else if (bld->num_mips == bld->coord_bld.type.length / 4) {
        LLVMValueRef stride1;
        unsigned i;
  
        stride = bld->int_coord_bld.undef;
-      for (i = 0; i < bld->num_lods; i++) {
+      for (i = 0; i < bld->num_mips; i++) {
           LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
           LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
           indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
@@ -1114,7 +1116,7 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
        LLVMValueRef stride1;
        unsigned i;
  
-      assert (bld->num_lods == bld->coord_bld.type.length);
+      assert (bld->num_mips == bld->coord_bld.type.length);
  
        stride = bld->int_coord_bld.undef;
        for (i = 0; i < bld->coord_bld.type.length; i++) {
@@ -1147,7 +1149,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
     /*
      * Compute width, height, depth at mipmap level 'ilevel'
      */
-   if (bld->num_lods == 1) {
+   if (bld->num_mips == 1) {
        ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
        *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
     }
@@ -1157,7 +1159,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
        unsigned num_quads = bld->coord_bld.type.length / 4;
        unsigned i;
  
-      if (bld->num_lods == num_quads) {
+      if (bld->num_mips == num_quads) {
           /*
            * XXX: this should be #ifndef SANE_INSTRUCTION_SET.
            * intel "forgot" the variable shift count instruction until avx2.
@@ -1216,7 +1218,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
           * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
           * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] vector.
           */
-         assert(bld->num_lods == bld->coord_bld.type.length);
+         assert(bld->num_mips == bld->coord_bld.type.length);
           if (bld->dims == 1) {
              assert(bld->int_size_in_bld.type.length == 1);
              int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
@@ -1226,7 +1228,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
           }
           else {
              LLVMValueRef ilevel1;
-            for (i = 0; i < bld->num_lods; i++) {
+            for (i = 0; i < bld->num_mips; i++) {
                 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
                 ilevel1 = lp_build_extract_broadcast(bld->gallivm, bld->int_coord_type,
                                                      bld->int_size_in_bld.type, ilevel, indexi);
@@ -1235,7 +1237,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
              }
              *out_size = lp_build_concat(bld->gallivm, tmp,
                                          bld->int_size_in_bld.type,
-                                        bld->num_lods);
+                                        bld->num_mips);
           }
        }
     }
@@ -1278,7 +1280,7 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
     LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
     struct lp_type size_type = size_bld->type;
  
-   if (bld->num_lods == 1) {
+   if (bld->num_mips == 1) {
        *out_width = lp_build_extract_broadcast(bld->gallivm,
                                                size_type,
                                                coord_type,
@@ -1305,7 +1307,7 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
        if (dims == 1) {
           *out_width = size;
        }
-      else if (bld->num_lods == num_quads) {
+      else if (bld->num_mips == num_quads) {
           *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4);
           if (dims >= 2) {
              *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4);
@@ -1315,7 +1317,7 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
           }
        }
        else {
-         assert(bld->num_lods == bld->coord_type.length);
+         assert(bld->num_mips == bld->coord_type.length);
           *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
                                                  coord_type, size, 0);
           if (dims >= 2) {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h

index a7ebe7e9ed8776b72334ef908f18e7bd4b2e58f2..e6b9f30d7bb85434854cbbd0774a4273f498c3b4 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -233,7 +233,10 @@ struct lp_build_sample_context
     /** SIMD vector width */
     unsigned vector_width;
  
-   /** number of lod values (valid are 1, length/4, length) */
+   /** number of mipmaps (valid are 1, length/4, length) */
+   unsigned num_mips;
+
+   /** number of lod values (valid are 1, length/4, length) */
     unsigned num_lods;
  
     /** regular scalar float type */
@@ -283,6 +286,14 @@ struct lp_build_sample_context
     struct lp_type leveli_type;
     struct lp_build_context leveli_bld;
  
+   /** Float lod type */
+   struct lp_type lodf_type;
+   struct lp_build_context lodf_bld;
+
+   /** Int lod type */
+   struct lp_type lodi_type;
+   struct lp_build_context lodi_bld;
+
     /* Common dynamic state values */
     LLVMValueRef row_stride_array;
     LLVMValueRef img_stride_array;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c

index 7431388812dfe5e0653f761dbb97d20ff57eb7ba..c35b628270e071403f7aa267098d3d45fd91d192 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -1373,7 +1373,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
     lp_build_mipmap_level_sizes(bld, ilevel0,
                                 &size0,
                                 &row_stride0_vec, &img_stride0_vec);
-   if (bld->num_lods == 1) {
+   if (bld->num_mips == 1) {
        data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
     }
     else {
@@ -1422,8 +1422,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
  
     if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
        LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
-                                                     bld->levelf_bld.type, 256.0);
-      LLVMTypeRef i32vec_type = bld->leveli_bld.vec_type;
+                                                     bld->lodf_bld.type, 256.0);
+      LLVMTypeRef i32vec_type = bld->lodi_bld.vec_type;
        struct lp_build_if_state if_ctx;
        LLVMValueRef need_lerp;
        unsigned num_quads = bld->coord_bld.type.length / 4;
@@ -1435,7 +1435,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
        /* need_lerp = lod_fpart > 0 */
        if (bld->num_lods == 1) {
           need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
-                                   lod_fpart, bld->leveli_bld.zero,
+                                   lod_fpart, bld->lodi_bld.zero,
                                     "need_lerp");
        }
        else {
@@ -1450,9 +1450,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
            * lod_fpart values have same sign.
            * We can however then skip the greater than comparison.
            */
-         lod_fpart = lp_build_max(&bld->leveli_bld, lod_fpart,
-                                  bld->leveli_bld.zero);
-         need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, lod_fpart);
+         lod_fpart = lp_build_max(&bld->lodi_bld, lod_fpart,
+                                  bld->lodi_bld.zero);
+         need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_fpart);
        }
  
        lp_build_if(&if_ctx, bld->gallivm, need_lerp);
@@ -1465,7 +1465,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
           lp_build_mipmap_level_sizes(bld, ilevel1,
                                       &size1,
                                       &row_stride1_vec, &img_stride1_vec);
-         if (bld->num_lods == 1) {
+         if (bld->num_mips == 1) {
              data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
           }
           else {
@@ -1524,7 +1524,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
           }
           else {
              unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods;
-            LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->leveli_bld.type.length);
+            LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->lodi_bld.type.length);
              LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
  
              /* Take the LSB of lod_fpart */
@@ -1613,7 +1613,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
         * some max probably could hack up the weights in the linear
         * path with selects to work for nearest.
         */
-      if (bld->leveli_bld.type.length > 1)
+      if (bld->num_lods > 1)
           lod_positive = LLVMBuildExtractElement(builder, lod_positive,
                                                  lp_build_const_int32(bld->gallivm, 0), "");
  
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

index 8ad3b9f246a918ee4842cc0cfb5496c36ddcbe10..c686d82de5745881079a587ad5340561d1f60e49 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -1087,7 +1087,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
     lp_build_mipmap_level_sizes(bld, ilevel0,
                                 &size0,
                                 &row_stride0_vec, &img_stride0_vec);
-   if (bld->num_lods == 1) {
+   if (bld->num_mips == 1) {
        data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
     }
     else {
@@ -1123,7 +1123,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
        /* need_lerp = lod_fpart > 0 */
        if (bld->num_lods == 1) {
           need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
-                                   lod_fpart, bld->levelf_bld.zero,
+                                   lod_fpart, bld->lodf_bld.zero,
                                     "need_lerp");
        }
        else {
@@ -1138,12 +1138,12 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
            * negative values which would screw up filtering if not all
            * lod_fpart values have same sign.
            */
-         lod_fpart = lp_build_max(&bld->levelf_bld, lod_fpart,
-                                  bld->levelf_bld.zero);
-         need_lerp = lp_build_compare(bld->gallivm, bld->levelf_bld.type,
+         lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
+                                  bld->lodf_bld.zero);
+         need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
                                        PIPE_FUNC_GREATER,
-                                      lod_fpart, bld->levelf_bld.zero);
-         need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, need_lerp);
+                                      lod_fpart, bld->lodf_bld.zero);
+         need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
        }
  
        lp_build_if(&if_ctx, bld->gallivm, need_lerp);
@@ -1152,7 +1152,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
           lp_build_mipmap_level_sizes(bld, ilevel1,
                                       &size1,
                                       &row_stride1_vec, &img_stride1_vec);
-         if (bld->num_lods == 1) {
+         if (bld->num_mips == 1) {
              data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
           }
           else {
@@ -1178,7 +1178,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
  
           if (bld->num_lods != bld->coord_type.length)
              lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
-                                                              bld->levelf_bld.type,
+                                                              bld->lodf_bld.type,
                                                                bld->texel_bld.type,
                                                                lod_fpart);
  
@@ -1312,8 +1312,14 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
                              mip_filter,
                              &lod_ipart, lod_fpart, lod_pos_or_zero);
     } else {
-      lod_ipart = bld->leveli_bld.zero;
-      *lod_pos_or_zero = bld->leveli_bld.zero;
+      lod_ipart = bld->lodi_bld.zero;
+      *lod_pos_or_zero = bld->lodi_bld.zero;
+   }
+
+   if (bld->num_lods != bld->num_mips) {
+      /* only makes sense if there's just a single mip level */
+      assert(bld->num_mips == 1);
+      lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
     }
  
     /*
@@ -1641,7 +1647,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
         * some max probably could hack up the weights in the linear
         * path with selects to work for nearest.
         */
-      if (bld->leveli_bld.type.length > 1)
+      if (bld->num_lods > 1)
           lod_positive = LLVMBuildExtractElement(builder, lod_positive,
                                                  lp_build_const_int32(bld->gallivm, 0), "");
  
@@ -1692,7 +1698,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
                       const LLVMValueRef *offsets,
                       LLVMValueRef *colors_out)
  {
-   struct lp_build_context *perquadi_bld = &bld->leveli_bld;
+   struct lp_build_context *perquadi_bld = &bld->lodi_bld;
     struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
     unsigned dims = bld->dims, chan;
     unsigned target = bld->static_texture_state->target;
@@ -1706,7 +1712,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
     out_of_bounds = int_coord_bld->zero;
  
     if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
-      if (bld->num_lods != int_coord_bld->type.length) {
+      if (bld->num_mips != int_coord_bld->type.length) {
           ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
                                              perquadi_bld->type, explicit_lod, 0);
        }
@@ -1717,7 +1723,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
                                   out_of_bound_ret_zero ? &out_of_bounds : NULL);
     }
     else {
-      assert(bld->num_lods == 1);
+      assert(bld->num_mips == 1);
        if (bld->static_texture_state->target != PIPE_BUFFER) {
           ilevel = bld->dynamic_state->first_level(bld->dynamic_state,
                                                    bld->gallivm, texture_unit);
@@ -1856,7 +1862,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
     unsigned target = static_texture_state->target;
     unsigned dims = texture_dims(target);
     unsigned num_quads = type.length / 4;
-   unsigned mip_filter, i;
+   unsigned mip_filter, min_img_filter, mag_img_filter, i;
     struct lp_build_sample_context bld;
     struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
     LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
@@ -1919,6 +1925,10 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
        debug_printf("  .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
     }
  
+   min_img_filter = static_sampler_state->min_img_filter;
+   mag_img_filter = static_sampler_state->mag_img_filter;
+
+
     /*
      * This is all a bit complicated different paths are chosen for performance
      * reasons.
@@ -1936,38 +1946,51 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
     /*
      * There are other situations where at least the multiple int lods could be
      * avoided like min and max lod being equal.
-    * XXX if num_lods == 1 (for multiple quads) the level bld contexts will still
-    * have length 4. Because lod_selector is always using per quad calcs in this
-    * case, but minification etc. don't need to bother. This is very brittle though
-    * e.g. num_lods might be 1 but still have multiple positive_lod values!
      */
+   bld.num_mips = bld.num_lods = 1;
     if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
         (explicit_lod || lod_bias ||
-        (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE)) &&
-       ((is_fetch && target != PIPE_BUFFER) ||
-        (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
-      bld.num_lods = type.length;
+        (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) {
+      if ((is_fetch && target != PIPE_BUFFER) ||
+          (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+         bld.num_mips = type.length;
+         bld.num_lods = type.length;
+      }
+      else if (!is_fetch && min_img_filter != mag_img_filter) {
+         bld.num_mips = 1;
+         bld.num_lods = type.length;
+      }
+   }
     /* TODO: for true scalar_lod should only use 1 lod value */
-   else if ((is_fetch && explicit_lod && target != PIPE_BUFFER ) ||
+   else if ((is_fetch && explicit_lod && target != PIPE_BUFFER) ||
              (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+      bld.num_mips = num_quads;
        bld.num_lods = num_quads;
     }
-   else {
-      bld.num_lods = 1;
+   else if (!is_fetch && min_img_filter != mag_img_filter) {
+      bld.num_mips = 1;
+      bld.num_lods = num_quads;
     }
  
-   bld.levelf_type = type;
+
+   bld.lodf_type = type;
     /* we want native vector size to be able to use our intrinsics */
     if (bld.num_lods != type.length) {
-      bld.levelf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
+      /* TODO: this currently always has to be per-quad or per-element */
+      bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
+   }
+   bld.lodi_type = lp_int_type(bld.lodf_type);
+   bld.levelf_type = bld.lodf_type;
+   if (bld.num_mips == 1) {
+      bld.levelf_type.length = 1;
     }
     bld.leveli_type = lp_int_type(bld.levelf_type);
     bld.float_size_type = bld.float_size_in_type;
     /* Note: size vectors may not be native. They contain minified w/h/d/_ values,
      * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
-   if (bld.num_lods > 1) {
-      bld.float_size_type.length = bld.num_lods == type.length ?
-                                      bld.num_lods * bld.float_size_in_type.length :
+   if (bld.num_mips > 1) {
+      bld.float_size_type.length = bld.num_mips == type.length ?
+                                      bld.num_mips * bld.float_size_in_type.length :
                                        type.length;
     }
     bld.int_size_type = lp_int_type(bld.float_size_type);
@@ -1984,6 +2007,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
     lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
     lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
     lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
+   lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
+   lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
  
     /* Get the dynamic state */
     tex_width = dynamic_state->width(dynamic_state, gallivm, texture_index);
@@ -2071,16 +2096,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
         * (It should be faster if we'd support avx2)
         */
        if (num_quads == 1 || !use_aos) {
-
-         if (num_quads > 1) {
-            if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
-               LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
-               /*
-                * This parameter is the same for all quads could probably simplify.
-                */
-               ilevel0 = LLVMBuildExtractElement(builder, ilevel0, index0, "");
-            }
-         }
           if (use_aos) {
              /* do sampling/filtering with fixed pt arithmetic */
              lp_build_sample_aos(&bld, sampler_index,
@@ -2134,30 +2149,37 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
           bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
           bld4.texel_type = bld.texel_type;
           bld4.texel_type.length = 4;
-         bld4.levelf_type = type4;
-         /* we want native vector size to be able to use our intrinsics */
-         bld4.levelf_type.length = 1;
-         bld4.leveli_type = lp_int_type(bld4.levelf_type);
  
+         bld4.num_mips = bld4.num_lods = 1;
           if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
-               (explicit_lod || lod_bias ||
-                (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE)) &&
-               ((is_fetch && target != PIPE_BUFFER) ||
-                (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
-            bld4.num_lods = type4.length;
-         else
-            bld4.num_lods = 1;
+             (explicit_lod || lod_bias ||
+              (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) {
+            if ((is_fetch && target != PIPE_BUFFER) ||
+                (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+               bld4.num_mips = type4.length;
+               bld4.num_lods = type4.length;
+            }
+            else if (!is_fetch && min_img_filter != mag_img_filter) {
+               bld4.num_mips = 1;
+               bld4.num_lods = type4.length;
+            }
+         }
  
-         bld4.levelf_type = type4;
           /* we want native vector size to be able to use our intrinsics */
+         bld4.lodf_type = type4;
           if (bld4.num_lods != type4.length) {
+            bld4.lodf_type.length = 1;
+         }
+         bld4.lodi_type = lp_int_type(bld4.lodf_type);
+         bld4.levelf_type = type4;
+         if (bld4.num_mips != type4.length) {
              bld4.levelf_type.length = 1;
           }
           bld4.leveli_type = lp_int_type(bld4.levelf_type);
           bld4.float_size_type = bld4.float_size_in_type;
-         if (bld4.num_lods > 1) {
-            bld4.float_size_type.length = bld4.num_lods == type4.length ?
-                                            bld4.num_lods * bld4.float_size_in_type.length :
+         if (bld4.num_mips > 1) {
+            bld4.float_size_type.length = bld4.num_mips == type4.length ?
+                                            bld4.num_mips * bld4.float_size_in_type.length :
                                              type4.length;
           }
           bld4.int_size_type = lp_int_type(bld4.float_size_type);
@@ -2174,6 +2196,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
           lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
           lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
           lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
+         lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
+         lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
  
           for (i = 0; i < num_quads; i++) {
              LLVMValueRef s4, t4, r4;
@@ -2196,7 +2220,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
                 }
              }
              lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
-            ilevel04 = lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
+            ilevel04 = bld.num_mips == 1 ? ilevel0 :
+                          lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
              if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
                 ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
                 lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
author	Roland Scheidegger <sroland@vmware.com>
	Wed, 28 Aug 2013 14:26:43 +0000 (16:26 +0200)
committer	Roland Scheidegger <sroland@vmware.com>
	Fri, 30 Aug 2013 00:16:45 +0000 (02:16 +0200)
src/gallium/auxiliary/gallivm/lp_bld_sample.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_sample.h		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c		patch \| blob \| history