gallivm: fix out-of-bounds behavior for fetch/ld
authorRoland Scheidegger <sroland@vmware.com>
Tue, 6 Aug 2013 18:50:47 +0000 (20:50 +0200)
committerRoland Scheidegger <sroland@vmware.com>
Thu, 8 Aug 2013 16:55:57 +0000 (18:55 +0200)
For d3d10 and ARB_robust_buffer_access_behavior, we are required to return
0 for out-of-bounds coordinates (for which we can just enable the code already
there was just disabled). Additionally, also need to return 0 for
out-of-bounds mip level and out-of-bounds layer. This changes the logic
so instead of clamping the level/layer, an out-of-bound mask is computed
instead in this case (actual clamping then can be omitted just like with
coordinates, since we set the fetch offset to zero if that happens anyway).

Reviewed-by: Zack Rusin <zackr@vmware.com>
src/gallium/auxiliary/gallivm/lp_bld_sample.c
src/gallium/auxiliary/gallivm/lp_bld_sample.h
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

index 573a2d00bbf7d15e28a9f6a5b65170d6f8ce2df7..a0b1d1444de88244267af28d797d6377ce0a153e 100644 (file)
@@ -47,6 +47,7 @@
 #include "lp_bld_logic.h"
 #include "lp_bld_pack.h"
 #include "lp_bld_quad.h"
+#include "lp_bld_bitarit.h"
 
 
 /*
@@ -777,17 +778,19 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
 
 
 /**
- * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
- * mipmap level index.
+ * For PIPE_TEX_MIPFILTER_NEAREST, convert int part of lod
+ * to actual mip level.
  * Note: this is all scalar per quad code.
  * \param lod_ipart  int texture level of detail
- * \param level_out  returns integer 
+ * \param level_out  returns integer
+ * \param out_of_bounds returns per coord out_of_bounds mask if provided
  */
 void
 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
                            unsigned texture_unit,
                            LLVMValueRef lod_ipart,
-                           LLVMValueRef *level_out)
+                           LLVMValueRef *level_out,
+                           LLVMValueRef *out_of_bounds)
 {
    struct lp_build_context *leveli_bld = &bld->leveli_bld;
    LLVMValueRef first_level, last_level, level;
@@ -801,8 +804,31 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
 
    level = lp_build_add(leveli_bld, lod_ipart, first_level);
 
-   /* clamp level to legal range of levels */
-   *level_out = lp_build_clamp(leveli_bld, level, first_level, last_level);
+   if (out_of_bounds) {
+      LLVMValueRef out, out1;
+      out = lp_build_cmp(leveli_bld, PIPE_FUNC_LESS, level, first_level);
+      out1 = lp_build_cmp(leveli_bld, PIPE_FUNC_GREATER, level, last_level);
+      out = lp_build_or(leveli_bld, out, out1);
+      if (bld->num_lods == bld->coord_bld.type.length) {
+         *out_of_bounds = out;
+      }
+      else if (bld->num_lods == 1) {
+         *out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld, out);
+      }
+      else {
+         assert(bld->num_lods == bld->coord_bld.type.length / 4);
+         *out_of_bounds = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
+                                                                leveli_bld->type,
+                                                                bld->int_coord_bld.type,
+                                                                out);
+      }
+      *level_out = level;
+   }
+   else {
+      /* clamp level to legal range of levels */
+      *level_out = lp_build_clamp(leveli_bld, level, first_level, last_level);
+
+   }
 }
 
 
index a3ecc05877c6f8fda0e7bb5af15bf6a213dc9a06..f9a2b3fe5f598855792f4c0e29a2ce6be6c9ec2c 100644 (file)
@@ -382,7 +382,8 @@ void
 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
                            unsigned texture_unit,
                            LLVMValueRef lod,
-                           LLVMValueRef *level_out);
+                           LLVMValueRef *level_out,
+                           LLVMValueRef *out_of_bounds);
 
 void
 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
index fa9edb5489c48939954821955d0a38a91fdcd449..122ec4f271a053643e522766f3cacfd58c879838 100644 (file)
@@ -1055,22 +1055,36 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 
 
 /**
- * Clamp layer coord to valid values.
+ * Build (per-coord) layer value.
+ * Either clamp layer to valid values or fill in optional out_of_bounds
+ * value and just return value unclamped.
  */
 static LLVMValueRef
 lp_build_layer_coord(struct lp_build_sample_context *bld,
                      unsigned texture_unit,
-                     LLVMValueRef layer)
+                     LLVMValueRef layer,
+                     LLVMValueRef *out_of_bounds)
 {
-   LLVMValueRef maxlayer;
+   LLVMValueRef num_layers;
+   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 
-   maxlayer = bld->dynamic_state->depth(bld->dynamic_state,
-                                        bld->gallivm, texture_unit);
-   maxlayer = lp_build_sub(&bld->int_bld, maxlayer, bld->int_bld.one);
-   maxlayer = lp_build_broadcast_scalar(&bld->int_coord_bld, maxlayer);
-   return lp_build_clamp(&bld->int_coord_bld, layer,
-                         bld->int_coord_bld.zero, maxlayer);
+   num_layers = bld->dynamic_state->depth(bld->dynamic_state,
+                                          bld->gallivm, texture_unit);
 
+   if (out_of_bounds) {
+      LLVMValueRef out1, out;
+      num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
+      out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
+      out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
+      *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
+      return layer;
+   }
+   else {
+      LLVMValueRef maxlayer;
+      maxlayer = lp_build_sub(&bld->int_bld, num_layers, bld->int_bld.one);
+      maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
+      return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
+   }
 }
 
 
@@ -1123,11 +1137,11 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
    }
    else if (target == PIPE_TEXTURE_1D_ARRAY) {
       *r = lp_build_iround(&bld->coord_bld, *t);
-      *r = lp_build_layer_coord(bld, texture_index, *r);
+      *r = lp_build_layer_coord(bld, texture_index, *r, NULL);
    }
    else if (target == PIPE_TEXTURE_2D_ARRAY) {
       *r = lp_build_iround(&bld->coord_bld, *r);
-      *r = lp_build_layer_coord(bld, texture_index, *r);
+      *r = lp_build_layer_coord(bld, texture_index, *r, NULL);
    }
 
    /*
@@ -1162,7 +1176,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
           * bad x86 code to be emitted.
           */
          assert(*lod_ipart);
-         lp_build_nearest_mip_level(bld, texture_index, *lod_ipart, ilevel0);
+         lp_build_nearest_mip_level(bld, texture_index, *lod_ipart, ilevel0, NULL);
       }
       else {
          first_level = bld->dynamic_state->first_level(bld->dynamic_state,
@@ -1173,7 +1187,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
       break;
    case PIPE_TEX_MIPFILTER_NEAREST:
       assert(*lod_ipart);
-      lp_build_nearest_mip_level(bld, texture_index, *lod_ipart, ilevel0);
+      lp_build_nearest_mip_level(bld, texture_index, *lod_ipart, ilevel0, NULL);
       break;
    case PIPE_TEX_MIPFILTER_LINEAR:
       assert(*lod_ipart);
@@ -1300,12 +1314,15 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    unsigned dims = bld->dims, chan;
    unsigned target = bld->static_texture_state->target;
+   boolean out_of_bound_ret_zero = TRUE;
    LLVMValueRef size, ilevel;
    LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
    LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
    LLVMValueRef width, height, depth, i, j;
    LLVMValueRef offset, out_of_bounds, out1;
 
+   out_of_bounds = int_coord_bld->zero;
+
    if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
       if (bld->num_lods != int_coord_bld->type.length) {
          ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
@@ -1314,11 +1331,18 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
       else {
          ilevel = explicit_lod;
       }
-      lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel);
+      lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel,
+                                 out_of_bound_ret_zero ? &out_of_bounds : NULL);
    }
    else {
-      bld->num_lods = 1;
-      ilevel = lp_build_const_int32(bld->gallivm, 0);
+      assert(bld->num_lods == 1);
+      if (bld->static_texture_state->target != PIPE_BUFFER) {
+         ilevel = bld->dynamic_state->first_level(bld->dynamic_state,
+                                                  bld->gallivm, texture_unit);
+      }
+      else {
+         ilevel = lp_build_const_int32(bld->gallivm, 0);
+      }
    }
    lp_build_mipmap_level_sizes(bld, ilevel,
                                &size,
@@ -1329,19 +1353,27 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
    if (target == PIPE_TEXTURE_1D_ARRAY ||
        target == PIPE_TEXTURE_2D_ARRAY) {
       if (target == PIPE_TEXTURE_1D_ARRAY) {
-         z = lp_build_layer_coord(bld, texture_unit, y);
+         z = y;
+      }
+      if (out_of_bound_ret_zero) {
+         z = lp_build_layer_coord(bld, texture_unit, z, &out1);
+         out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
       }
       else {
-         z = lp_build_layer_coord(bld, texture_unit, z);
+         z = lp_build_layer_coord(bld, texture_unit, z, NULL);
       }
    }
 
    /* This is a lot like border sampling */
    if (offsets[0]) {
-      /* XXX coords are really unsigned, offsets are signed */
+      /*
+       * coords are really unsigned, offsets are signed, but I don't think
+       * exceeding 31 bits is possible
+       */
       x = lp_build_add(int_coord_bld, x, offsets[0]);
    }
-   out_of_bounds = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
+   out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
+   out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
 
@@ -1384,11 +1416,10 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
                            i, j,
                            colors_out);
 
-   if (0) {
+   if (out_of_bound_ret_zero) {
       /*
-       * Not needed except for ARB_robust_buffer_access_behavior.
+       * Only needed for ARB_robust_buffer_access_behavior and d3d10.
        * Could use min/max above instead of out-of-bounds comparisons
-       * (in fact cast to unsigned and min only is sufficient)
        * if we don't care about the result returned for out-of-bounds.
        */
       for (chan = 0; chan < 4; chan++) {