gallivm/sample: add num samples query for txqs (v2)
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_soa.c
index 018cca8f9dfa48757ac5f4f815c63762f0cf87c3..5b623eda26bd19408353c678c1751b3a70b9f8d6 100644 (file)
@@ -40,7 +40,7 @@
 #include "util/u_dump.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
 #include "util/u_cpu_detect.h"
 #include "util/format_rgb9e5.h"
 #include "lp_bld_debug.h"
@@ -61,6 +61,7 @@
 #include "lp_bld_quad.h"
 #include "lp_bld_pack.h"
 #include "lp_bld_intr.h"
+#include "lp_bld_misc.h"
 
 
 /**
@@ -194,9 +195,17 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
       for (chan = 0; chan < 4; chan++) {
          unsigned chan_s;
          /* reverse-map channel... */
-         for (chan_s = 0; chan_s < 4; chan_s++) {
-            if (chan_s == format_desc->swizzle[chan]) {
+         if (util_format_has_stencil(format_desc)) {
+            if (chan == 0)
+               chan_s = 0;
+            else
                break;
+         }
+         else {
+            for (chan_s = 0; chan_s < 4; chan_s++) {
+               if (chan_s == format_desc->swizzle[chan]) {
+                  break;
+               }
             }
          }
          if (chan_s <= 3) {
@@ -1032,11 +1041,27 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
    boolean seamless_cube_filter, accurate_cube_corners;
    unsigned chan_swiz = bld->static_texture_state->swizzle_r;
 
+   if (is_gather) {
+      switch (bld->gather_comp) {
+      case 0: chan_swiz = bld->static_texture_state->swizzle_r; break;
+      case 1: chan_swiz = bld->static_texture_state->swizzle_g; break;
+      case 2: chan_swiz = bld->static_texture_state->swizzle_b; break;
+      case 3: chan_swiz = bld->static_texture_state->swizzle_a; break;
+      default:
+        break;
+      }
+   }
+
    seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
                            bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
                           bld->static_sampler_state->seamless_cube_map;
 
-   accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter;
+   /*
+    * Disable accurate cube corners for integer textures, which should only
+    * get here in the gather path.
+    */
+   accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter &&
+     !util_format_is_pure_integer(bld->static_texture_state->format);
 
    lp_build_extract_image_sizes(bld,
                                 &bld->int_size_bld,
@@ -2340,13 +2365,16 @@ lp_build_clamp_border_color(struct lp_build_sample_context *bld,
          max_clamp = vec4_bld.one;
       }
       else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
-               format_desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
+               format_desc->layout == UTIL_FORMAT_LAYOUT_ETC ||
+               format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
          switch (format_desc->format) {
          case PIPE_FORMAT_RGTC1_UNORM:
          case PIPE_FORMAT_RGTC2_UNORM:
          case PIPE_FORMAT_LATC1_UNORM:
          case PIPE_FORMAT_LATC2_UNORM:
          case PIPE_FORMAT_ETC1_RGB8:
+         case PIPE_FORMAT_BPTC_RGBA_UNORM:
+         case PIPE_FORMAT_BPTC_SRGBA:
             min_clamp = vec4_bld.zero;
             max_clamp = vec4_bld.one;
             break;
@@ -2357,6 +2385,12 @@ lp_build_clamp_border_color(struct lp_build_sample_context *bld,
             min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
             max_clamp = vec4_bld.one;
             break;
+         case PIPE_FORMAT_BPTC_RGB_FLOAT:
+            /* not sure if we should clamp to max half float? */
+            break;
+         case PIPE_FORMAT_BPTC_RGB_UFLOAT:
+            min_clamp = vec4_bld.zero;
+            break;
          default:
             assert(0);
             break;
@@ -2582,6 +2616,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
 static void
 lp_build_fetch_texel(struct lp_build_sample_context *bld,
                      unsigned texture_unit,
+                     LLVMValueRef ms_index,
                      const LLVMValueRef *coords,
                      LLVMValueRef explicit_lod,
                      const LLVMValueRef *offsets,
@@ -2681,6 +2716,18 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
                             lp_build_get_mip_offsets(bld, ilevel));
    }
 
+   if (bld->fetch_ms) {
+      LLVMValueRef num_samples;
+      num_samples = bld->dynamic_state->num_samples(bld->dynamic_state, bld->gallivm,
+                                                    bld->context_ptr, texture_unit);
+      out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, ms_index, int_coord_bld->zero);
+      out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
+      out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, ms_index, lp_build_broadcast_scalar(int_coord_bld, num_samples));
+      out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
+      offset = lp_build_add(int_coord_bld, offset,
+                            lp_build_mul(int_coord_bld, bld->sample_stride, ms_index));
+   }
+
    offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
 
    lp_build_fetch_rgba_soa(bld->gallivm,
@@ -2747,6 +2794,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
                          const LLVMValueRef *offsets,
                          const struct lp_derivatives *derivs, /* optional */
                          LLVMValueRef lod, /* optional */
+                         LLVMValueRef ms_index, /* optional */
                          LLVMValueRef texel_out[4])
 {
    unsigned target = static_texture_state->target;
@@ -2763,7 +2811,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
    enum lp_sampler_op_type op_type;
    LLVMValueRef lod_bias = NULL;
    LLVMValueRef explicit_lod = NULL;
-   boolean op_is_tex, op_is_lodq, op_is_gather;
+   boolean op_is_tex, op_is_lodq, op_is_gather, fetch_ms;
 
    if (0) {
       enum pipe_format fmt = static_texture_state->format;
@@ -2776,6 +2824,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
                     LP_SAMPLER_LOD_CONTROL_SHIFT;
    op_type = (sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
                  LP_SAMPLER_OP_TYPE_SHIFT;
+   fetch_ms = !!(sample_key & LP_SAMPLER_FETCH_MS);
 
    op_is_tex = op_type == LP_SAMPLER_OP_TEXTURE;
    op_is_lodq = op_type == LP_SAMPLER_OP_LODQ;
@@ -2972,7 +3021,9 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
       bld.num_lods = num_quads;
    }
 
-
+   bld.fetch_ms = fetch_ms;
+   if (op_is_gather)
+      bld.gather_comp = (sample_key & LP_SAMPLER_GATHER_COMP_MASK) >> LP_SAMPLER_GATHER_COMP_SHIFT;
    bld.lodf_type = type;
    /* we want native vector size to be able to use our intrinsics */
    if (bld.num_lods != type.length) {
@@ -3021,6 +3072,10 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
                                           context_ptr, texture_index);
    bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm,
                                                 context_ptr, texture_index);
+
+   if (fetch_ms)
+      bld.sample_stride = lp_build_broadcast_scalar(&bld.int_coord_bld, dynamic_state->sample_stride(dynamic_state, gallivm,
+                                                                                                     context_ptr, texture_index));
    /* Note that mip_offsets is an array[level] of offsets to texture images */
 
    if (dynamic_state->cache_ptr && thread_data_ptr) {
@@ -3093,7 +3148,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
    }
 
    else if (op_type == LP_SAMPLER_OP_FETCH) {
-      lp_build_fetch_texel(&bld, texture_index, newcoords,
+      lp_build_fetch_texel(&bld, texture_index, ms_index, newcoords,
                            lod, offsets,
                            texel_out);
    }
@@ -3112,6 +3167,11 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
       use_aos &= bld.num_lods <= num_quads ||
                  derived_sampler_state.min_img_filter ==
                     derived_sampler_state.mag_img_filter;
+
+      if(gallivm_perf & GALLIVM_PERF_NO_AOS_SAMPLING) {
+         use_aos = 0;
+      }
+
       if (dims > 1) {
          use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t);
          if (dims > 2) {
@@ -3414,6 +3474,7 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm,
    LLVMValueRef coords[5];
    LLVMValueRef offsets[3] = { NULL };
    LLVMValueRef lod = NULL;
+   LLVMValueRef ms_index = NULL;
    LLVMValueRef context_ptr;
    LLVMValueRef thread_data_ptr = NULL;
    LLVMValueRef texel_out[4];
@@ -3456,6 +3517,9 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm,
    if (sample_key & LP_SAMPLER_SHADOW) {
       coords[4] = LLVMGetParam(function, num_param++);
    }
+   if (sample_key & LP_SAMPLER_FETCH_MS) {
+      ms_index = LLVMGetParam(function, num_param++);
+   }
    if (sample_key & LP_SAMPLER_OFFSETS) {
       for (i = 0; i < num_offsets; i++) {
          offsets[i] = LLVMGetParam(function, num_param++);
@@ -3498,6 +3562,7 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm,
                             offsets,
                             deriv_ptr,
                             lod,
+                            ms_index,
                             texel_out);
 
    LLVMBuildAggregateRet(gallivm->builder, texel_out, 4);
@@ -3549,10 +3614,6 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
       const struct util_format_description *format_desc;
       format_desc = util_format_description(static_texture_state->format);
       if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
-         /*
-          * This is not 100% correct, if we have cache but the
-          * util_format_s3tc_prefer is true the cache won't get used
-          * regardless (could hook up the block decode there...) */
          need_cache = TRUE;
       }
    }
@@ -3564,8 +3625,8 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
     * Additionally lod_property has to be included too.
     */
 
-   util_snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x",
-                 texture_index, sampler_index, sample_key);
+   snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x",
+            texture_index, sampler_index, sample_key);
 
    function = LLVMGetNamedFunction(module, func_name);
 
@@ -3595,6 +3656,9 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
       if (sample_key & LP_SAMPLER_SHADOW) {
          arg_types[num_param++] = LLVMTypeOf(coords[0]);
       }
+      if (sample_key & LP_SAMPLER_FETCH_MS) {
+         arg_types[num_param++] = LLVMTypeOf(params->ms_index);
+      }
       if (sample_key & LP_SAMPLER_OFFSETS) {
          for (i = 0; i < num_offsets; i++) {
             arg_types[num_param++] = LLVMTypeOf(offsets[0]);
@@ -3656,6 +3720,9 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
    if (sample_key & LP_SAMPLER_SHADOW) {
       args[num_args++] = coords[4];
    }
+   if (sample_key & LP_SAMPLER_FETCH_MS) {
+      args[num_args++] = params->ms_index;
+   }
    if (sample_key & LP_SAMPLER_OFFSETS) {
       for (i = 0; i < num_offsets; i++) {
          args[num_args++] = offsets[i];
@@ -3756,6 +3823,7 @@ lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state,
                                params->offsets,
                                params->derivs,
                                params->lod,
+                               params->ms_index,
                                params->texel);
    }
 }
@@ -3834,6 +3902,12 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
 
    lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
 
+   if (params->samples_only) {
+      params->sizes_out[0] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, params->int_type),
+                                                dynamic_state->num_samples(dynamic_state, gallivm,
+                                                                           context_ptr, texture_unit));
+      return;
+   }
    if (params->explicit_lod) {
       /* FIXME: this needs to honor per-element lod */
       lod = LLVMBuildExtractElement(gallivm->builder, params->explicit_lod,
@@ -3946,3 +4020,210 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
                                         num_levels);
    }
 }
+
+static void
+lp_build_do_atomic_soa(struct gallivm_state *gallivm,
+                       const struct util_format_description *format_desc,
+                       struct lp_type type,
+                       LLVMValueRef exec_mask,
+                       LLVMValueRef base_ptr,
+                       LLVMValueRef offset,
+                       LLVMValueRef out_of_bounds,
+                       unsigned img_op,
+                       LLVMAtomicRMWBinOp op,
+                       const LLVMValueRef rgba_in[4],
+                       const LLVMValueRef rgba2_in[4],
+                       LLVMValueRef atomic_result[4])
+{
+   enum pipe_format format = format_desc->format;
+
+   if (format != PIPE_FORMAT_R32_UINT && format != PIPE_FORMAT_R32_SINT && format != PIPE_FORMAT_R32_FLOAT) {
+      atomic_result[0] = lp_build_zero(gallivm, type);
+      return;
+   }
+
+   LLVMValueRef atom_res = lp_build_alloca(gallivm,
+                                           LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), type.length), "");
+
+   offset = LLVMBuildGEP(gallivm->builder, base_ptr, &offset, 1, "");
+   struct lp_build_loop_state loop_state;
+   lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+   struct lp_build_if_state ifthen;
+   LLVMValueRef cond;
+   LLVMValueRef packed = rgba_in[0], packed2 = rgba2_in[0];
+
+   LLVMValueRef should_store_mask = LLVMBuildAnd(gallivm->builder, exec_mask, LLVMBuildNot(gallivm->builder, out_of_bounds, ""), "store_mask");
+   assert(exec_mask);
+
+   cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, should_store_mask, lp_build_const_int_vec(gallivm, type, 0), "");
+   cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
+   lp_build_if(&ifthen, gallivm, cond);
+
+   LLVMValueRef data = LLVMBuildExtractElement(gallivm->builder, packed, loop_state.counter, "");
+   LLVMValueRef cast_base_ptr = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
+   cast_base_ptr = LLVMBuildBitCast(gallivm->builder, cast_base_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
+   data = LLVMBuildBitCast(gallivm->builder, data, LLVMInt32TypeInContext(gallivm->context), "");
+
+   if (img_op == LP_IMG_ATOMIC_CAS) {
+      LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, packed2, loop_state.counter, "");
+      LLVMValueRef cas_src = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, LLVMInt32TypeInContext(gallivm->context), "");
+      data = LLVMBuildAtomicCmpXchg(gallivm->builder, cast_base_ptr, data,
+                                    cas_src,
+                                    LLVMAtomicOrderingSequentiallyConsistent,
+                                    LLVMAtomicOrderingSequentiallyConsistent,
+                                    false);
+      data = LLVMBuildExtractValue(gallivm->builder, data, 0, "");
+   } else {
+      data = LLVMBuildAtomicRMW(gallivm->builder, op,
+                                cast_base_ptr, data,
+                                LLVMAtomicOrderingSequentiallyConsistent,
+                                false);
+   }
+
+   LLVMValueRef temp_res = LLVMBuildLoad(gallivm->builder, atom_res, "");
+   temp_res = LLVMBuildInsertElement(gallivm->builder, temp_res, data, loop_state.counter, "");
+   LLVMBuildStore(gallivm->builder, temp_res, atom_res);
+
+   lp_build_endif(&ifthen);
+   lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, type.length),
+                          NULL, LLVMIntUGE);
+   atomic_result[0] = LLVMBuildLoad(gallivm->builder, atom_res, "");
+}
+
+void
+lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state,
+                    struct lp_sampler_dynamic_state *dynamic_state,
+                    struct gallivm_state *gallivm,
+                    const struct lp_img_params *params)
+{
+   unsigned target = params->target;
+   unsigned dims = texture_dims(target);
+   /** regular scalar int type */
+   struct lp_type int_type, int_coord_type;
+   struct lp_build_context int_bld, int_coord_bld;
+   const struct util_format_description *format_desc = util_format_description(static_texture_state->format);
+   LLVMValueRef x = params->coords[0], y = params->coords[1], z = params->coords[2];
+   LLVMValueRef ms_index = params->ms_index;
+   LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
+   int_type = lp_type_int(32);
+   int_coord_type = lp_int_type(params->type);
+   lp_build_context_init(&int_bld, gallivm, int_type);
+   lp_build_context_init(&int_coord_bld, gallivm, int_coord_type);
+
+   LLVMValueRef offset, i, j;
+
+   LLVMValueRef row_stride = dynamic_state->row_stride(dynamic_state, gallivm,
+                                                       params->context_ptr, params->image_index);
+   LLVMValueRef img_stride = dynamic_state->img_stride(dynamic_state, gallivm,
+                                                       params->context_ptr, params->image_index);
+   LLVMValueRef base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm,
+                                                   params->context_ptr, params->image_index);
+   LLVMValueRef width = dynamic_state->width(dynamic_state, gallivm,
+                                                params->context_ptr, params->image_index);
+   LLVMValueRef height = dynamic_state->height(dynamic_state, gallivm,
+                                               params->context_ptr, params->image_index);
+   LLVMValueRef depth = dynamic_state->depth(dynamic_state, gallivm,
+                                              params->context_ptr, params->image_index);
+   LLVMValueRef num_samples = NULL, sample_stride = NULL;
+   if (ms_index) {
+      num_samples = dynamic_state->num_samples(dynamic_state, gallivm,
+                                               params->context_ptr, params->image_index);
+      sample_stride = dynamic_state->sample_stride(dynamic_state, gallivm,
+                                                   params->context_ptr, params->image_index);
+   }
+
+   boolean layer_coord = has_layer_coord(target);
+
+   width = lp_build_broadcast_scalar(&int_coord_bld, width);
+   if (dims >= 2) {
+      height = lp_build_broadcast_scalar(&int_coord_bld, height);
+      row_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, row_stride);
+   }
+   if (dims >= 3 || layer_coord) {
+      depth = lp_build_broadcast_scalar(&int_coord_bld, depth);
+      img_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, img_stride);
+   }
+
+   LLVMValueRef out_of_bounds = int_coord_bld.zero;
+   LLVMValueRef out1;
+   out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
+   out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
+
+   if (dims >= 2) {
+      out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
+      out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
+   }
+   if (dims >= 3) {
+      out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
+      out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
+   }
+   lp_build_sample_offset(&int_coord_bld,
+                          format_desc,
+                          x, y, z, row_stride_vec, img_stride_vec,
+                          &offset, &i, &j);
+
+   if (ms_index) {
+      out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, ms_index, lp_build_broadcast_scalar(&int_coord_bld, num_samples));
+      out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
+
+      offset = lp_build_add(&int_coord_bld, offset,
+                            lp_build_mul(&int_coord_bld, lp_build_broadcast_scalar(&int_coord_bld, sample_stride),
+                                         ms_index));
+   }
+   if (params->img_op == LP_IMG_LOAD) {
+      struct lp_type texel_type = params->type;
+      if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
+          format_desc->channel[0].pure_integer) {
+         if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+            texel_type = lp_type_int_vec(params->type.width, params->type.width * params->type.length);
+         } else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+            texel_type = lp_type_uint_vec(params->type.width, params->type.width * params->type.length);
+         }
+      }
+
+      if (static_texture_state->format == PIPE_FORMAT_NONE) {
+         /*
+          * If there's nothing bound, format is NONE, and we must return
+          * all zero as mandated by d3d10 in this case.
+          */
+         unsigned chan;
+         LLVMValueRef zero = lp_build_zero(gallivm, params->type);
+         for (chan = 0; chan < 4; chan++) {
+            params->outdata[chan] = zero;
+         }
+         return;
+      }
+
+      offset = lp_build_andnot(&int_coord_bld, offset, out_of_bounds);
+      struct lp_build_context texel_bld;
+      lp_build_context_init(&texel_bld, gallivm, texel_type);
+      lp_build_fetch_rgba_soa(gallivm,
+                              format_desc,
+                              texel_type, TRUE,
+                              base_ptr, offset,
+                              i, j,
+                              NULL,
+                              params->outdata);
+
+      for (unsigned chan = 0; chan < 4; chan++) {
+         params->outdata[chan] = lp_build_select(&texel_bld, out_of_bounds,
+                                                 texel_bld.zero, params->outdata[chan]);
+      }
+   } else if (params->img_op == LP_IMG_STORE) {
+      if (static_texture_state->format == PIPE_FORMAT_NONE)
+         return;
+      lp_build_store_rgba_soa(gallivm, format_desc, params->type, params->exec_mask, base_ptr, offset, out_of_bounds,
+                              params->indata);
+   } else {
+      if (static_texture_state->format == PIPE_FORMAT_NONE) {
+         /*
+         * For atomic operation just return 0 in the unbound case to avoid a crash.
+          */
+         LLVMValueRef zero = lp_build_zero(gallivm, params->type);
+         params->outdata[0] = zero;
+         return;
+      }
+      lp_build_do_atomic_soa(gallivm, format_desc, params->type, params->exec_mask, base_ptr, offset, out_of_bounds,
+                             params->img_op, params->op, params->indata, params->indata2, params->outdata);
+   }
+}