gallivm: add image load/store/atomic support
authorDave Airlie <airlied@redhat.com>
Fri, 19 Jul 2019 09:06:48 +0000 (19:06 +1000)
committerDave Airlie <airlied@redhat.com>
Tue, 27 Aug 2019 02:29:58 +0000 (12:29 +1000)
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
src/gallium/auxiliary/gallivm/lp_bld_format.h
src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
src/gallium/auxiliary/gallivm/lp_bld_sample.c
src/gallium/auxiliary/gallivm/lp_bld_sample.h
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c

index ade8825e1fbcd531797e5a229a623946a42587e8..cd1eaec9e5fff09a27fcf52bb3aa857109e891d6 100644 (file)
@@ -151,6 +151,16 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
                         LLVMValueRef cache,
                         LLVMValueRef rgba_out[4]);
 
+void
+lp_build_store_rgba_soa(struct gallivm_state *gallivm,
+                        const struct util_format_description *format_desc,
+                        struct lp_type type,
+                        LLVMValueRef exec_mask,
+                        LLVMValueRef base_ptr,
+                        LLVMValueRef offset,
+                        LLVMValueRef out_of_bounds,
+                        const LLVMValueRef rgba_in[4]);
+
 /*
  * YUV
  */
index 22c19b10dbd57a821233d5097a4e8e24cd1cea48..a6ec91c4a3f3863ccb04b20f38eee6685192224a 100644 (file)
@@ -42,7 +42,9 @@
 #include "lp_bld_format.h"
 #include "lp_bld_arit.h"
 #include "lp_bld_pack.h"
-
+#include "lp_bld_flow.h"
+#include "lp_bld_printf.h"
+#include "lp_bld_intr.h"
 
 static void
 convert_to_soa(struct gallivm_state *gallivm,
@@ -858,3 +860,230 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
       convert_to_soa(gallivm, aos_fetch, rgba_out, type);
    }
 }
+
+static void
+lp_build_insert_soa_chan(struct lp_build_context *bld,
+                         unsigned blockbits,
+                         struct util_format_channel_description chan_desc,
+                         LLVMValueRef *output,
+                         LLVMValueRef rgba)
+{
+    struct gallivm_state *gallivm = bld->gallivm;
+    LLVMBuilderRef builder = gallivm->builder;
+    struct lp_type type = bld->type;
+    const unsigned width = chan_desc.size;
+    const unsigned start = chan_desc.shift;
+    const unsigned stop = start + width;
+    LLVMValueRef chan;
+    switch(chan_desc.type) {
+    case UTIL_FORMAT_TYPE_UNSIGNED:
+
+       if (chan_desc.pure_integer)
+          chan = LLVMBuildBitCast(builder, rgba, bld->int_vec_type, "");
+       else if (type.floating) {
+          if (chan_desc.normalized)
+             chan = lp_build_clamped_float_to_unsigned_norm(gallivm, type, width, rgba);
+          else
+             chan = LLVMBuildFPToSI(builder, rgba, bld->vec_type, "");
+       }
+       if (start)
+          chan = LLVMBuildShl(builder, chan,
+                              lp_build_const_int_vec(gallivm, type, start), "");
+       if (!*output)
+          *output = chan;
+       else
+          *output = LLVMBuildOr(builder, *output, chan, "");
+       break;
+    case UTIL_FORMAT_TYPE_SIGNED:
+       if (chan_desc.pure_integer)
+          chan = LLVMBuildBitCast(builder, rgba, bld->int_vec_type, "");
+       else if (type.floating) {
+          uint32_t mask_val = (1UL << chan_desc.size) - 1;
+          if (chan_desc.normalized) {
+             char intrin[32];
+             double scale = ((1 << (chan_desc.size - 1)) - 1);
+             LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
+             rgba = lp_build_clamp(bld, rgba, lp_build_negate(bld, bld->one), bld->one);
+             rgba = LLVMBuildFMul(builder, rgba, scale_val, "");
+             lp_format_intrinsic(intrin, sizeof intrin, "llvm.rint", bld->vec_type);
+             rgba = lp_build_intrinsic_unary(builder, intrin, bld->vec_type, rgba);
+          }
+          chan = LLVMBuildFPToSI(builder, rgba, bld->int_vec_type, "");
+          chan = LLVMBuildAnd(builder, chan, lp_build_const_int_vec(gallivm, type, mask_val), "");
+       }
+       if (start)
+          chan = LLVMBuildShl(builder, chan,
+                              lp_build_const_int_vec(gallivm, type, start), "");
+       if (!*output)
+          *output = chan;
+       else
+          *output = LLVMBuildOr(builder, *output, chan, "");
+       break;
+    case UTIL_FORMAT_TYPE_FLOAT:
+       if (type.floating) {
+          if (chan_desc.size == 16) {
+             chan = lp_build_float_to_half(gallivm, rgba);
+             chan = LLVMBuildZExt(builder, chan, bld->int_vec_type, "");
+             if (start)
+                chan = LLVMBuildShl(builder, chan,
+                                    lp_build_const_int_vec(gallivm, type, start), "");
+             if (!*output)
+                *output = chan;
+             else
+                *output = LLVMBuildOr(builder, *output, chan, "");
+          } else {
+             assert(start == 0);
+             assert(stop == 32);
+             assert(type.width == 32);
+             *output = LLVMBuildBitCast(builder, rgba, bld->int_vec_type, "");
+          }
+       } else
+          assert(0);
+       break;
+    default:
+       assert(0);
+       *output = bld->undef;
+    }
+}
+
+static void
+lp_build_pack_rgba_soa(struct gallivm_state *gallivm,
+                       const struct util_format_description *format_desc,
+                       struct lp_type type,
+                       const LLVMValueRef rgba_in[4],
+                       LLVMValueRef *packed)
+{
+   unsigned chan;
+   struct lp_build_context bld;
+   assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
+   assert(format_desc->block.width == 1);
+   assert(format_desc->block.height == 1);
+   assert(format_desc->block.bits <= type.width);
+   /* FIXME: Support more output types */
+   assert(type.width == 32);
+
+   lp_build_context_init(&bld, gallivm, type);
+   for (chan = 0; chan < format_desc->nr_channels; ++chan) {
+      struct util_format_channel_description chan_desc = format_desc->channel[chan];
+
+      lp_build_insert_soa_chan(&bld, format_desc->block.bits,
+                               chan_desc,
+                               packed,
+                               rgba_in[chan]);
+   }
+}
+
+void
+lp_build_store_rgba_soa(struct gallivm_state *gallivm,
+                        const struct util_format_description *format_desc,
+                        struct lp_type type,
+                        LLVMValueRef exec_mask,
+                        LLVMValueRef base_ptr,
+                        LLVMValueRef offset,
+                        LLVMValueRef out_of_bounds,
+                        const LLVMValueRef rgba_in[4])
+{
+   enum pipe_format format = format_desc->format;
+   LLVMValueRef packed[4] = {};
+   unsigned num_stores;
+
+   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
+       format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
+       format_desc->block.width == 1 &&
+       format_desc->block.height == 1 &&
+       format_desc->block.bits <= type.width &&
+       (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
+        format_desc->channel[0].size == 32 ||
+        format_desc->channel[0].size == 16))
+   {
+      lp_build_pack_rgba_soa(gallivm, format_desc, type, rgba_in, &packed[0]);
+
+      num_stores = 1;
+   } else if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
+       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB) &&
+       format_desc->block.width == 1 &&
+       format_desc->block.height == 1 &&
+       format_desc->block.bits > type.width &&
+       ((format_desc->block.bits <= type.width * type.length &&
+         format_desc->channel[0].size <= type.width) ||
+        (format_desc->channel[0].size == 64 &&
+         format_desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
+         type.floating)))
+   {
+      /*
+       * Similar to above, but the packed pixel is larger than what fits
+       * into an element of the destination format. The packed pixels will be
+       * shuffled into SoA vectors appropriately, and then the extraction will
+       * be done in parallel as much as possible.
+       * Good for 16xn (n > 2) and 32xn (n > 1) formats, care is taken so
+       * the gathered vectors can be shuffled easily (even with avx).
+       * 64xn float -> 32xn float is handled too but it's a bit special as
+       * it does the conversion pre-shuffle.
+       */
+      struct lp_build_context bld;
+
+      lp_build_context_init(&bld, gallivm, type);
+      assert(type.width == 32);
+      assert(format_desc->block.bits > type.width);
+
+      unsigned store_width = util_next_power_of_two(format_desc->block.bits);
+      num_stores = store_width / type.width;
+      for (unsigned i = 0; i < format_desc->nr_channels; i++) {
+            struct util_format_channel_description chan_desc = format_desc->channel[i];
+            unsigned blockbits = type.width;
+            unsigned vec_nr;
+
+            vec_nr = chan_desc.shift / type.width;
+            chan_desc.shift %= type.width;
+
+            lp_build_insert_soa_chan(&bld, blockbits,
+                                     chan_desc,
+                                     &packed[vec_nr],
+                                     rgba_in[i]);
+      }
+
+      assert(num_stores == 4 || num_stores == 2);
+      /* we can transpose and store at the same time */
+   } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
+      packed[0] = lp_build_float_to_r11g11b10(gallivm, rgba_in);
+      num_stores = 1;
+   } else
+      assert(0);
+
+   assert(exec_mask);
+
+   LLVMTypeRef int32_ptr_type = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0);
+   LLVMTypeRef int16_ptr_type = LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0);
+   LLVMTypeRef int8_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
+
+   LLVMValueRef should_store_mask = LLVMBuildAnd(gallivm->builder, exec_mask, LLVMBuildNot(gallivm->builder, out_of_bounds, ""), "store_mask");
+   should_store_mask = LLVMBuildICmp(gallivm->builder, LLVMIntNE, should_store_mask, lp_build_const_int_vec(gallivm, type, 0), "");
+   for (unsigned i = 0; i < num_stores; i++) {
+      struct lp_build_loop_state loop_state;
+
+      LLVMValueRef store_offset = LLVMBuildAdd(gallivm->builder, offset, lp_build_const_int_vec(gallivm, type, i * 4), "");
+      store_offset = LLVMBuildGEP(gallivm->builder, base_ptr, &store_offset, 1, "");
+
+      lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+
+      struct lp_build_if_state ifthen;
+      LLVMValueRef cond = LLVMBuildExtractElement(gallivm->builder, should_store_mask, loop_state.counter, "");
+      lp_build_if(&ifthen, gallivm, cond);
+
+      LLVMValueRef data = LLVMBuildExtractElement(gallivm->builder, packed[i], loop_state.counter, "");
+      LLVMValueRef this_offset = LLVMBuildExtractElement(gallivm->builder, store_offset, loop_state.counter, "");
+
+      if (format_desc->block.bits == 8) {
+         this_offset = LLVMBuildBitCast(gallivm->builder, this_offset, int8_ptr_type, "");
+         data = LLVMBuildTrunc(gallivm->builder, data, LLVMInt8TypeInContext(gallivm->context), "");
+      } else if (format_desc->block.bits == 16) {
+         this_offset = LLVMBuildBitCast(gallivm->builder, this_offset, int16_ptr_type, "");
+         data = LLVMBuildTrunc(gallivm->builder, data, LLVMInt16TypeInContext(gallivm->context), "");
+      } else
+         this_offset = LLVMBuildBitCast(gallivm->builder, this_offset, int32_ptr_type, "");
+      LLVMBuildStore(gallivm->builder, data, this_offset);
+      lp_build_endif(&ifthen);
+      lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, type.length),
+                             NULL, LLVMIntUGE);
+   }
+}
index 81cb50607116ecdc8afc4445b6a258c54f0f516e..e49ae810a5e34748d1a241b263e0e451e2c72984 100644 (file)
@@ -125,6 +125,41 @@ lp_sampler_static_texture_state(struct lp_static_texture_state *state,
     */
 }
 
+/**
+ * Initialize lp_sampler_static_texture_state object with the gallium
+ * texture/sampler_view state (this contains the parts which are
+ * considered static).
+ */
+void
+lp_sampler_static_texture_state_image(struct lp_static_texture_state *state,
+                                      const struct pipe_image_view *view)
+{
+   const struct pipe_resource *resource;
+
+   memset(state, 0, sizeof *state);
+
+   if (!view || !view->resource)
+      return;
+
+   resource = view->resource;
+
+   state->format            = view->format;
+   state->swizzle_r         = PIPE_SWIZZLE_X;
+   state->swizzle_g         = PIPE_SWIZZLE_Y;
+   state->swizzle_b         = PIPE_SWIZZLE_Z;
+   state->swizzle_a         = PIPE_SWIZZLE_W;
+
+   state->target            = view->resource->target;
+   state->pot_width         = util_is_power_of_two_or_zero(resource->width0);
+   state->pot_height        = util_is_power_of_two_or_zero(resource->height0);
+   state->pot_depth         = util_is_power_of_two_or_zero(resource->depth0);
+   state->level_zero_only   = 0;
+
+   /*
+    * the layer / element / level parameters are all either dynamic
+    * state or handled transparently wrt execution.
+    */
+}
 
 /**
  * Initialize lp_sampler_static_sampler_state object with the gallium sampler
index c00997b898308db9722048b2690ee21f784aa7b2..8509179d08fc97c49ae5e32463912176268356b8 100644 (file)
@@ -49,6 +49,7 @@ extern "C" {
 struct pipe_resource;
 struct pipe_sampler_view;
 struct pipe_sampler_state;
+struct pipe_image_view;
 struct util_format_description;
 struct lp_type;
 struct lp_build_context;
@@ -122,6 +123,27 @@ struct lp_sampler_size_query_params
    LLVMValueRef explicit_lod;
    LLVMValueRef *sizes_out;
 };
+
+#define LP_IMG_LOAD 0
+#define LP_IMG_STORE 1
+#define LP_IMG_ATOMIC 2
+#define LP_IMG_ATOMIC_CAS 3
+
+struct lp_img_params
+{
+   struct lp_type type;
+   unsigned image_index;
+   unsigned img_op;
+   unsigned target;
+   LLVMAtomicRMWBinOp op;
+   LLVMValueRef exec_mask;
+   LLVMValueRef context_ptr;
+   LLVMValueRef thread_data_ptr;
+   const LLVMValueRef *coords;
+   LLVMValueRef indata[4];
+   LLVMValueRef indata2[4];
+   LLVMValueRef *outdata;
+};
 /**
  * Texture static state.
  *
@@ -489,6 +511,9 @@ void
 lp_sampler_static_texture_state(struct lp_static_texture_state *state,
                                 const struct pipe_sampler_view *view);
 
+void
+lp_sampler_static_texture_state_image(struct lp_static_texture_state *state,
+                                      const struct pipe_image_view *view);
 
 void
 lp_build_lod_selector(struct lp_build_sample_context *bld,
@@ -639,6 +664,12 @@ lp_build_minify(struct lp_build_context *bld,
                 LLVMValueRef level,
                 boolean lod_scalar);
 
+void
+lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state,
+                      struct lp_sampler_dynamic_state *dynamic_state,
+                      struct gallivm_state *gallivm,
+                      const struct lp_img_params *params);
+
 #ifdef __cplusplus
 }
 #endif
index d5bd9161119f11aa6c5033fc9ce751e4109d45f2..adb6adf143a98dce798789d4765ad3e0760df2d9 100644 (file)
@@ -61,6 +61,7 @@
 #include "lp_bld_quad.h"
 #include "lp_bld_pack.h"
 #include "lp_bld_intr.h"
+#include "lp_bld_misc.h"
 
 
 /**
@@ -3947,3 +3948,185 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
                                         num_levels);
    }
 }
+
+static void
+lp_build_do_atomic_soa(struct gallivm_state *gallivm,
+                       const struct util_format_description *format_desc,
+                       struct lp_type type,
+                       LLVMValueRef exec_mask,
+                       LLVMValueRef base_ptr,
+                       LLVMValueRef offset,
+                       LLVMValueRef out_of_bounds,
+                       unsigned img_op,
+                       LLVMAtomicRMWBinOp op,
+                       const LLVMValueRef rgba_in[4],
+                       const LLVMValueRef rgba2_in[4],
+                       LLVMValueRef atomic_result[4])
+{
+   enum pipe_format format = format_desc->format;
+
+   if (format != PIPE_FORMAT_R32_UINT && format != PIPE_FORMAT_R32_SINT && format != PIPE_FORMAT_R32_FLOAT)
+      return;
+
+   LLVMValueRef atom_res = lp_build_alloca(gallivm,
+                                           LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), type.length), "");
+
+   offset = LLVMBuildGEP(gallivm->builder, base_ptr, &offset, 1, "");
+   struct lp_build_loop_state loop_state;
+   lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+   struct lp_build_if_state ifthen;
+   LLVMValueRef cond;
+   LLVMValueRef packed = rgba_in[0], packed2 = rgba2_in[0];
+
+   LLVMValueRef should_store_mask = LLVMBuildAnd(gallivm->builder, exec_mask, LLVMBuildNot(gallivm->builder, out_of_bounds, ""), "store_mask");
+   assert(exec_mask);
+
+   cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, should_store_mask, lp_build_const_int_vec(gallivm, type, 0), "");
+   cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
+   lp_build_if(&ifthen, gallivm, cond);
+
+   LLVMValueRef data = LLVMBuildExtractElement(gallivm->builder, packed, loop_state.counter, "");
+   LLVMValueRef cast_base_ptr = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
+   cast_base_ptr = LLVMBuildBitCast(gallivm->builder, cast_base_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
+   data = LLVMBuildBitCast(gallivm->builder, data, LLVMInt32TypeInContext(gallivm->context), "");
+
+   if (img_op == LP_IMG_ATOMIC_CAS) {
+      LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, packed2, loop_state.counter, "");
+      LLVMValueRef cas_src = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, LLVMInt32TypeInContext(gallivm->context), "");
+      data = LLVMBuildAtomicCmpXchg(gallivm->builder, cast_base_ptr, data,
+                                    cas_src,
+                                    LLVMAtomicOrderingSequentiallyConsistent,
+                                    LLVMAtomicOrderingSequentiallyConsistent,
+                                    false);
+      data = LLVMBuildExtractValue(gallivm->builder, data, 0, "");
+   } else {
+      data = LLVMBuildAtomicRMW(gallivm->builder, op,
+                                cast_base_ptr, data,
+                                LLVMAtomicOrderingSequentiallyConsistent,
+                                false);
+   }
+
+   LLVMValueRef temp_res = LLVMBuildLoad(gallivm->builder, atom_res, "");
+   temp_res = LLVMBuildInsertElement(gallivm->builder, temp_res, data, loop_state.counter, "");
+   LLVMBuildStore(gallivm->builder, temp_res, atom_res);
+
+   lp_build_endif(&ifthen);
+   lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, type.length),
+                          NULL, LLVMIntUGE);
+   atomic_result[0] = LLVMBuildLoad(gallivm->builder, atom_res, "");
+}
+
+void
+lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state,
+                    struct lp_sampler_dynamic_state *dynamic_state,
+                    struct gallivm_state *gallivm,
+                    const struct lp_img_params *params)
+{
+   unsigned target = params->target;
+   unsigned dims = texture_dims(target);
+   /** regular scalar int type */
+   struct lp_type int_type, int_coord_type;
+   struct lp_build_context int_bld, int_coord_bld;
+   const struct util_format_description *format_desc = util_format_description(static_texture_state->format);
+   LLVMValueRef x = params->coords[0], y = params->coords[1], z = params->coords[2];
+   LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
+   int_type = lp_type_int(32);
+   int_coord_type = lp_int_type(params->type);
+   lp_build_context_init(&int_bld, gallivm, int_type);
+   lp_build_context_init(&int_coord_bld, gallivm, int_coord_type);
+
+   LLVMValueRef offset, i, j;
+
+   LLVMValueRef row_stride = dynamic_state->row_stride(dynamic_state, gallivm,
+                                                       params->context_ptr, params->image_index);
+   LLVMValueRef img_stride = dynamic_state->img_stride(dynamic_state, gallivm,
+                                                       params->context_ptr, params->image_index);
+   LLVMValueRef base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm,
+                                                   params->context_ptr, params->image_index);
+   LLVMValueRef width = dynamic_state->width(dynamic_state, gallivm,
+                                                params->context_ptr, params->image_index);
+   LLVMValueRef height = dynamic_state->height(dynamic_state, gallivm,
+                                               params->context_ptr, params->image_index);
+   LLVMValueRef depth = dynamic_state->depth(dynamic_state, gallivm,
+                                              params->context_ptr, params->image_index);
+   boolean layer_coord = has_layer_coord(target);
+
+   width = lp_build_broadcast_scalar(&int_coord_bld, width);
+   if (dims >= 2) {
+      height = lp_build_broadcast_scalar(&int_coord_bld, height);
+      row_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, row_stride);
+   }
+   if (dims >= 3 || layer_coord) {
+      depth = lp_build_broadcast_scalar(&int_coord_bld, depth);
+      img_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, img_stride);
+   }
+
+   LLVMValueRef out_of_bounds = int_coord_bld.zero;
+   LLVMValueRef out1;
+   out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
+   out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
+
+   if (dims >= 2) {
+      out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
+      out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
+   }
+   if (dims >= 3) {
+      out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
+      out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
+   }
+   lp_build_sample_offset(&int_coord_bld,
+                          format_desc,
+                          x, y, z, row_stride_vec, img_stride_vec,
+                          &offset, &i, &j);
+
+   if (params->img_op == LP_IMG_LOAD) {
+      struct lp_type texel_type = params->type;
+      if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
+          format_desc->channel[0].pure_integer) {
+         if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+            texel_type = lp_type_int_vec(params->type.width, params->type.width * params->type.length);
+         } else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+            texel_type = lp_type_uint_vec(params->type.width, params->type.width * params->type.length);
+         }
+      }
+
+      if (static_texture_state->format == PIPE_FORMAT_NONE) {
+         /*
+          * If there's nothing bound, format is NONE, and we must return
+          * all zero as mandated by d3d10 in this case.
+          */
+         unsigned chan;
+         LLVMValueRef zero = lp_build_zero(gallivm, params->type);
+         for (chan = 0; chan < 4; chan++) {
+            params->outdata[chan] = zero;
+         }
+         return;
+      }
+
+      offset = lp_build_andnot(&int_coord_bld, offset, out_of_bounds);
+      struct lp_build_context texel_bld;
+      lp_build_context_init(&texel_bld, gallivm, texel_type);
+      lp_build_fetch_rgba_soa(gallivm,
+                              format_desc,
+                              texel_type, TRUE,
+                              base_ptr, offset,
+                              i, j,
+                              NULL,
+                              params->outdata);
+
+      for (unsigned chan = 0; chan < 4; chan++) {
+         params->outdata[chan] = lp_build_select(&texel_bld, out_of_bounds,
+                                                 texel_bld.zero, params->outdata[chan]);
+      }
+   } else if (params->img_op == LP_IMG_STORE) {
+      if (static_texture_state->format == PIPE_FORMAT_NONE)
+         return;
+      lp_build_store_rgba_soa(gallivm, format_desc, params->type, params->exec_mask, base_ptr, offset, out_of_bounds,
+                              params->indata);
+   } else {
+      if (static_texture_state->format == PIPE_FORMAT_NONE)
+         return;
+      lp_build_do_atomic_soa(gallivm, format_desc, params->type, params->exec_mask, base_ptr, offset, out_of_bounds,
+                             params->img_op, params->op, params->indata, params->indata2, params->outdata);
+   }
+}
index 7871dce9103a9abbd40f71994f6a7dd9406b7856..672d3a503ca29f589a290681bc3f344a8213779c 100644 (file)
@@ -309,6 +309,8 @@ analyse_instruction(struct analysis_context *ctx,
          continue;
       } else if (dst->File == TGSI_FILE_BUFFER) {
          continue;
+      } else if (dst->File == TGSI_FILE_IMAGE) {
+         continue;
       } else {
          assert(0);
          continue;
index ab78e6bb50c0fe38c36afa6d72d29c6e0927c54e..d6625c422765cd9d811e988e32ffd27d0c127f38 100644 (file)
@@ -3392,6 +3392,79 @@ lod_emit(
                FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
 }
 
+static void target_to_dims_layer(unsigned target,
+                                 unsigned *dims,
+                                 unsigned *layer_coord)
+{
+   *layer_coord = 0;
+   switch (target) {
+   case TGSI_TEXTURE_1D:
+   case TGSI_TEXTURE_BUFFER:
+      *dims = 1;
+      break;
+   case TGSI_TEXTURE_1D_ARRAY:
+      *layer_coord = 1;
+      *dims = 1;
+      break;
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_RECT:
+      *dims = 2;
+      break;
+   case TGSI_TEXTURE_2D_ARRAY:
+      *layer_coord = 2;
+      *dims = 2;
+      break;
+   case TGSI_TEXTURE_3D:
+   case TGSI_TEXTURE_CUBE:
+   case TGSI_TEXTURE_CUBE_ARRAY:
+      *dims = 3;
+      break;
+   default:
+      assert(0);
+      return;
+   }
+}
+
+static void
+img_load_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+   struct lp_img_params params;
+   LLVMValueRef coords[5];
+   LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
+   unsigned dims;
+   unsigned target = emit_data->inst->Memory.Texture;
+   unsigned layer_coord;
+
+   target_to_dims_layer(target, &dims, &layer_coord);
+
+   for (unsigned i = 0; i < dims; i++) {
+      coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
+   }
+   for (unsigned i = dims; i < 5; i++) {
+      coords[i] = coord_undef;
+   }
+   if (layer_coord)
+      coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
+
+   memset(&params, 0, sizeof(params));
+
+   params.type = bld->bld_base.base.type;
+   params.context_ptr = bld->context_ptr;
+   params.thread_data_ptr = bld->thread_data_ptr;
+   params.coords = coords;
+   params.outdata = emit_data->output;
+   params.target = tgsi_to_pipe_tex_target(target);
+   params.image_index = emit_data->inst->Src[0].Register.Index;
+   params.img_op = LP_IMG_LOAD;
+   bld->image->emit_op(bld->image,
+                         bld->bld_base.base.gallivm,
+                         &params);
+}
+
 static void
 load_emit(
    const struct lp_build_tgsi_action * action,
@@ -3403,10 +3476,12 @@ load_emit(
    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
    unsigned buf = bufreg->Register.Index;
-   assert(bufreg->Register.File == TGSI_FILE_BUFFER);
+   assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
    struct lp_build_context *uint_bld = &bld_base->uint_bld;
 
-   if (0) {
+   if (bufreg->Register.File == TGSI_FILE_IMAGE)
+      img_load_emit(action, bld_base, emit_data);
+   else if (0) {
       /* for indirect support with ARB_gpu_shader5 */
    } else {
       LLVMValueRef index;
@@ -3461,6 +3536,48 @@ load_emit(
    }
 }
 
+static void
+img_store_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+   struct lp_img_params params;
+   LLVMValueRef coords[5];
+   LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
+   unsigned dims;
+   unsigned target = emit_data->inst->Memory.Texture;
+   unsigned layer_coord;
+
+   target_to_dims_layer(target, &dims, &layer_coord);
+   for (unsigned i = 0; i < dims; i++) {
+      coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i);
+   }
+   for (unsigned i = dims; i < 5; i++) {
+      coords[i] = coord_undef;
+   }
+   if (layer_coord)
+      coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord);
+   memset(&params, 0, sizeof(params));
+
+   params.type = bld->bld_base.base.type;
+   params.context_ptr = bld->context_ptr;
+   params.thread_data_ptr = bld->thread_data_ptr;
+   params.coords = coords;
+   params.outdata = NULL;
+   params.exec_mask = mask_vec(bld_base);
+   params.target = tgsi_to_pipe_tex_target(target);
+   params.image_index = emit_data->inst->Dst[0].Register.Index;
+   params.img_op = LP_IMG_STORE;
+   for (unsigned i = 0; i < 4; i++)
+      params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
+
+   bld->image->emit_op(bld->image,
+                       bld->bld_base.base.gallivm,
+                       &params);
+}
+
 static void
 store_emit(
    const struct lp_build_tgsi_action * action,
@@ -3473,9 +3590,11 @@ store_emit(
    struct lp_build_context *uint_bld = &bld_base->uint_bld;
    const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
    unsigned buf = bufreg->Register.Index;
-   assert(bufreg->Register.File == TGSI_FILE_BUFFER);
+   assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
 
-   if (0) {
+   if (bufreg->Register.File == TGSI_FILE_IMAGE) {
+      img_store_emit(action, bld_base, emit_data);
+   } else if (0) {
 
    } else {
       LLVMValueRef index;  /* index into the const buffer */
@@ -3539,11 +3658,74 @@ resq_emit(
    const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
 
    unsigned buf = bufreg->Register.Index;
-   assert(bufreg->Register.File == TGSI_FILE_BUFFER);
+   assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
+
+   if (bufreg->Register.File == TGSI_FILE_IMAGE) {
+      unsigned target = emit_data->inst->Memory.Texture;
+      struct lp_sampler_size_query_params params = { 0 };
+      params.int_type = bld->bld_base.int_bld.type;
+      params.texture_unit = buf;
+      params.target = tgsi_to_pipe_tex_target(target);
+      params.context_ptr = bld->context_ptr;
+      params.sizes_out = emit_data->output;
+
+      bld->image->emit_size_query(bld->image,
+                                  bld->bld_base.base.gallivm,
+                                  &params);
+   } else {
+      LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
 
-   LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
+      emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
+   }
+}
+
+static void
+img_atomic_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data,
+   LLVMAtomicRMWBinOp op)
+{
+   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+   struct lp_img_params params;
+   LLVMValueRef coords[5];
+   LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
+   unsigned dims;
+   unsigned layer_coord;
+   unsigned target = emit_data->inst->Memory.Texture;
+
+   target_to_dims_layer(target, &dims, &layer_coord);
 
-   emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
+   for (unsigned i = 0; i < dims; i++) {
+      coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
+   }
+   for (unsigned i = dims; i < 5; i++) {
+      coords[i] = coord_undef;
+   }
+   if (layer_coord)
+      coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
+   memset(&params, 0, sizeof(params));
+
+   params.type = bld->bld_base.base.type;
+   params.context_ptr = bld->context_ptr;
+   params.thread_data_ptr = bld->thread_data_ptr;
+   params.exec_mask = mask_vec(bld_base);
+   params.image_index = emit_data->inst->Src[0].Register.Index;
+   params.coords = coords;
+   params.target = tgsi_to_pipe_tex_target(target);
+   params.op = op;
+   params.outdata = emit_data->output;
+   params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
+
+   for (unsigned i = 0; i < 4; i++)
+      params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i);
+   if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
+      for (unsigned i = 0; i < 4; i++)
+         params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i);
+   }
+   bld->image->emit_op(bld->image,
+                       bld->bld_base.base.gallivm,
+                       &params);
 }
 
 static void
@@ -3558,7 +3740,7 @@ atomic_emit(
    struct lp_build_context *uint_bld = &bld_base->uint_bld;
    const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
 
-   assert(bufreg->Register.File == TGSI_FILE_BUFFER);
+   assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
    unsigned buf = bufreg->Register.Index;
 
    LLVMAtomicRMWBinOp op;
@@ -3597,7 +3779,9 @@ atomic_emit(
       return;
    }
 
-   if (0) {
+   if (bufreg->Register.File == TGSI_FILE_IMAGE) {
+      img_atomic_emit(action, bld_base, emit_data, op);
+   } else if (0) {
    } else {
       LLVMValueRef index;  /* index into the const buffer */
       LLVMValueRef scalar, scalar_ptr;