gallivm: fix a maybe-uninitialized warning

[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_soa.c
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

index 1727105e4f41a7feecf5d511a3ae78c77fcc95bc..cb4660e424d709fd6c8ecead01c73c48b79e363b 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -42,7 +42,7 @@
  #include "util/u_math.h"
  #include "util/u_format.h"
  #include "util/u_cpu_detect.h"
-#include "util/u_format_rgb9e5.h"
+#include "util/format_rgb9e5.h"
  #include "lp_bld_debug.h"
  #include "lp_bld_type.h"
  #include "lp_bld_const.h"
@@ -60,6 +60,7 @@
  #include "lp_bld_struct.h"
  #include "lp_bld_quad.h"
  #include "lp_bld_pack.h"
+#include "lp_bld_intr.h"
  
  
  /**
@@ -158,7 +159,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
  
     lp_build_fetch_rgba_soa(bld->gallivm,
                             bld->format_desc,
-                           bld->texel_type,
+                           bld->texel_type, TRUE,
                             data_ptr, offset,
                             i, j,
                             bld->cache,
@@ -228,11 +229,16 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld,
     LLVMValueRef fract, flr, isOdd;
  
     lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
+   /* kill off NaNs */
+   /* XXX: not safe without arch rounding, fract can be anything. */
+   fract = lp_build_max_ext(coord_bld, fract, coord_bld->zero,
+                            GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
  
     /* isOdd = flr & 1 */
     isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, "");
  
     /* make coord positive or negative depending on isOdd */
+   /* XXX slight overkill masking out sign bit is unnecessary */
     coord = lp_build_set_sign(coord_bld, fract, isOdd);
  
     /* convert isOdd to float */
@@ -272,10 +278,15 @@ lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
      * we avoided the 0.5/length division before the repeat wrap,
      * now need to fix up edge cases with selects
      */
+   /*
+    * Note we do a float (unordered) compare so we can eliminate NaNs.
+    * (Otherwise would need fract_safe above).
+    */
+   mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
+                           PIPE_FUNC_LESS, coord_f, coord_bld->zero);
+
     /* convert to int, compute lerp weight */
     lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
-   mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
-                           PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
     *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
  }
  
@@ -375,7 +386,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
           }
  
           /* clamp to length max */
-         coord = lp_build_min(coord_bld, coord, length_f);
+         coord = lp_build_min_ext(coord_bld, coord, length_f,
+                                  GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
           /* subtract 0.5 */
           coord = lp_build_sub(coord_bld, coord, half);
           /* clamp to [0, length - 0.5] */
@@ -398,7 +410,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
           coord = lp_build_add(coord_bld, coord, offset);
        }
        /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
-      /* can skip clamp (though might not work for very large coord values */
+      /* can skip clamp (though might not work for very large coord values) */
        coord = lp_build_sub(coord_bld, coord, half);
        /* convert to int, compute lerp weight */
        lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
@@ -465,7 +477,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
           coord = lp_build_abs(coord_bld, coord);
  
           /* clamp to length max */
-         coord = lp_build_min(coord_bld, coord, length_f);
+         coord = lp_build_min_ext(coord_bld, coord, length_f,
+                                  GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
           /* subtract 0.5 */
           coord = lp_build_sub(coord_bld, coord, half);
           /* clamp to [0, length - 0.5] */
@@ -628,9 +641,15 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
  
        /* itrunc == ifloor here */
        icoord = lp_build_itrunc(coord_bld, coord);
-
-      /* clamp to [0, length - 1] */
-      icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
+      /*
+       * Use unsigned min due to possible undef values (NaNs, overflow)
+       */
+      {
+         struct lp_build_context abs_coord_bld = *int_coord_bld;
+         abs_coord_bld.type.sign = FALSE;
+         /* clamp to [0, length - 1] */
+         icoord = lp_build_min(&abs_coord_bld, icoord, length_minus_one);
+      }
        break;
  
     case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
@@ -1360,7 +1379,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
           if (is_gather) {
              /* more hacks for swizzling, should be X, ONE or ZERO... */
              unsigned chan_swiz = bld->static_texture_state->swizzle_r;
-            if (chan_swiz <= PIPE_SWIZZLE_ALPHA) {
+            if (chan_swiz <= PIPE_SWIZZLE_W) {
                 colors0[0] = lp_build_select(texel_bld, cmpval10,
                                              texel_bld->one, texel_bld->zero);
                 colors0[1] = lp_build_select(texel_bld, cmpval11,
@@ -1370,7 +1389,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                 colors0[3] = lp_build_select(texel_bld, cmpval00,
                                              texel_bld->one, texel_bld->zero);
              }
-            else if (chan_swiz == PIPE_SWIZZLE_ZERO) {
+            else if (chan_swiz == PIPE_SWIZZLE_0) {
                 colors0[0] = colors0[1] = colors0[2] = colors0[3] =
                              texel_bld->zero;
              }
@@ -1838,7 +1857,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
        const struct util_format_description *format_desc = bld->format_desc;
        unsigned chan_type;
        /* not entirely sure we couldn't end up with non-valid swizzle here */
-      chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
+      chan_type = format_desc->swizzle[0] <= PIPE_SWIZZLE_W ?
                       format_desc->channel[format_desc->swizzle[0]].type :
                       UTIL_FORMAT_TYPE_FLOAT;
        if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
@@ -1957,7 +1976,7 @@ lp_build_clamp_border_color(struct lp_build_sample_context *bld,
        else {
           chan = util_format_get_first_non_void_channel(format_desc->format);
        }
-      if (chan >= 0 && chan <= UTIL_FORMAT_SWIZZLE_W) {
+      if (chan >= 0 && chan <= PIPE_SWIZZLE_W) {
           unsigned chan_type = format_desc->channel[chan].type;
           unsigned chan_norm = format_desc->channel[chan].normalized;
           unsigned chan_pure = format_desc->channel[chan].pure_integer;
@@ -2387,7 +2406,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
  
     lp_build_fetch_rgba_soa(bld->gallivm,
                             bld->format_desc,
-                           bld->texel_type,
+                           bld->texel_type, TRUE,
                             bld->base_ptr, offset,
                             i, j,
                             bld->cache,
@@ -2842,12 +2861,13 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
        }
  
        /*
-       * we only try 8-wide sampling with soa as it appears to
-       * be a loss with aos with AVX (but it should work, except
-       * for conformance if min_filter != mag_filter if num_lods > 1).
-       * (It should be faster if we'd support avx2)
+       * we only try 8-wide sampling with soa or if we have AVX2
+       * as it appears to be a loss with just AVX)
         */
-      if (num_quads == 1 || !use_aos) {
+      if (num_quads == 1 || !use_aos ||
+          (util_cpu_caps.has_avx2 &&
+           (bld.num_lods == 1 ||
+            derived_sampler_state.min_img_filter == derived_sampler_state.mag_img_filter))) {
           if (use_aos) {
              /* do sampling/filtering with fixed pt arithmetic */
              lp_build_sample_aos(&bld, sampler_index,
@@ -3297,12 +3317,13 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
  
        for (i = 0; i < num_param; ++i) {
           if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
-            LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute);
+
+            lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
           }
        }
  
        LLVMSetFunctionCallConv(function, LLVMFastCallConv);
-      LLVMSetLinkage(function, LLVMPrivateLinkage);
+      LLVMSetLinkage(function, LLVMInternalLinkage);
  
        lp_build_sample_gen_func(gallivm,
                                 static_texture_state,
@@ -3441,7 +3462,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
                          struct lp_sampler_dynamic_state *dynamic_state,
                          const struct lp_sampler_size_query_params *params)
  {
-   LLVMValueRef lod, level, size;
+   LLVMValueRef lod, level = 0, size;
     LLVMValueRef first_level = NULL;
     int dims, i;
     boolean has_array;