gallivm,llvmpipe: handle TXF (texelFetch) instruction, including offsets
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample.c
index 0727fd2b91af532d53daf78069a6ce2e7e54ed5a..ea7dd95b78be2d209b8985fcbd9a3dc5fb682ce4 100644 (file)
@@ -186,8 +186,8 @@ lp_build_rho(struct lp_build_sample_context *bld,
              const struct lp_derivatives *derivs)
 {
    struct gallivm_state *gallivm = bld->gallivm;
-   struct lp_build_context *int_size_bld = &bld->int_size_bld;
-   struct lp_build_context *float_size_bld = &bld->float_size_bld;
+   struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
+   struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
    struct lp_build_context *float_bld = &bld->float_bld;
    struct lp_build_context *coord_bld = &bld->coord_bld;
    struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
@@ -316,7 +316,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
          }
       }
       rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                      perquadf_bld->type, rho);
+                                      perquadf_bld->type, rho, 0);
    }
    else {
       if (dims <= 1) {
@@ -517,7 +517,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
    else {
       if (explicit_lod) {
          lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
-                                         perquadf_bld->type, explicit_lod);
+                                         perquadf_bld->type, explicit_lod, 0);
       }
       else {
          LLVMValueRef rho;
@@ -562,7 +562,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
          /* add shader lod bias */
          if (lod_bias) {
             lod_bias = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
-                  perquadf_bld->type, lod_bias);
+                  perquadf_bld->type, lod_bias, 0);
             lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
          }
       }
@@ -725,7 +725,6 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
 
 /**
  * Return pointer to a single mipmap level.
- * \param data_array  array of pointers to mipmap levels
  * \param level  integer mipmap level
  */
 LLVMValueRef
@@ -743,6 +742,55 @@ lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
    return data_ptr;
 }
 
+/**
+ * Return (per-pixel) offsets to mip levels.
+ * \param level  integer mipmap level
+ */
+LLVMValueRef
+lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
+                         LLVMValueRef level)
+{
+   LLVMBuilderRef builder = bld->gallivm->builder;
+   LLVMValueRef indexes[2], offsets, offset1;
+
+   indexes[0] = lp_build_const_int32(bld->gallivm, 0);
+   if (bld->num_lods == 1) {
+      indexes[1] = level;
+      offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
+      offset1 = LLVMBuildLoad(builder, offset1, "");
+      offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
+   }
+   else if (bld->num_lods == bld->coord_bld.type.length / 4) {
+      unsigned i;
+
+      offsets = bld->int_coord_bld.undef;
+      for (i = 0; i < bld->num_lods; i++) {
+         LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
+         LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
+         indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
+         offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
+         offset1 = LLVMBuildLoad(builder, offset1, "");
+         offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexo, "");
+      }
+      offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0);
+   }
+   else {
+      unsigned i;
+
+      assert (bld->num_lods == bld->coord_bld.type.length);
+
+      offsets = bld->int_coord_bld.undef;
+      for (i = 0; i < bld->num_lods; i++) {
+         LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
+         indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
+         offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
+         offset1 = LLVMBuildLoad(builder, offset1, "");
+         offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexi, "");
+      }
+   }
+   return offsets;
+}
+
 
 /**
  * Codegen equivalent for u_minify().
@@ -780,12 +828,44 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
                               LLVMValueRef stride_array, LLVMValueRef level)
 {
    LLVMBuilderRef builder = bld->gallivm->builder;
-   LLVMValueRef indexes[2], stride;
+   LLVMValueRef indexes[2], stride, stride1;
    indexes[0] = lp_build_const_int32(bld->gallivm, 0);
-   indexes[1] = level;
-   stride = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
-   stride = LLVMBuildLoad(builder, stride, "");
-   stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
+   if (bld->num_lods == 1) {
+      indexes[1] = level;
+      stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
+      stride1 = LLVMBuildLoad(builder, stride1, "");
+      stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
+   }
+   else if (bld->num_lods == bld->coord_bld.type.length / 4) {
+      LLVMValueRef stride1;
+      unsigned i;
+
+      stride = bld->int_coord_bld.undef;
+      for (i = 0; i < bld->num_lods; i++) {
+         LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
+         LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, i);
+         indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
+         stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
+         stride1 = LLVMBuildLoad(builder, stride1, "");
+         stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, "");
+      }
+      stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0);
+   }
+   else {
+      LLVMValueRef stride1;
+      unsigned i;
+
+      assert (bld->num_lods == bld->coord_bld.type.length);
+
+      stride = bld->int_coord_bld.undef;
+      for (i = 0; i < bld->coord_bld.type.length; i++) {
+         LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
+         indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
+         stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
+         stride1 = LLVMBuildLoad(builder, stride1, "");
+         stride = LLVMBuildInsertElement(builder, stride, stride1, indexi, "");
+      }
+   }
    return stride;
 }
 
@@ -805,12 +885,102 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
    const unsigned dims = bld->dims;
    LLVMValueRef ilevel_vec;
 
-   ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
-
    /*
     * Compute width, height, depth at mipmap level 'ilevel'
     */
-   *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
+   if (bld->num_lods == 1) {
+      ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
+      *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
+   }
+   else {
+      LLVMValueRef int_size_vec;
+      LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
+      unsigned num_quads = bld->coord_bld.type.length / 4;
+      unsigned i;
+
+      if (bld->num_lods == num_quads) {
+         /*
+          * XXX: this should be #ifndef SANE_INSTRUCTION_SET.
+          * intel "forgot" the variable shift count instruction until avx2.
+          * A harmless 8x32 shift gets translated into 32 instructions
+          * (16 extracts, 8 scalar shifts, 8 inserts), llvm is apparently
+          * unable to recognize if there are really just 2 different shift
+          * count values. So do the shift 4-wide before expansion.
+          */
+         struct lp_build_context bld4;
+         struct lp_type type4;
+
+         type4 = bld->int_coord_bld.type;
+         type4.length = 4;
+
+         lp_build_context_init(&bld4, bld->gallivm, type4);
+
+         if (bld->dims == 1) {
+            assert(bld->int_size_in_bld.type.length == 1);
+            int_size_vec = lp_build_broadcast_scalar(&bld4,
+                                                     bld->int_size);
+         }
+         else {
+            assert(bld->int_size_in_bld.type.length == 4);
+            int_size_vec = bld->int_size;
+         }
+
+         for (i = 0; i < num_quads; i++) {
+            LLVMValueRef ileveli;
+            LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
+
+            ileveli = lp_build_extract_broadcast(bld->gallivm,
+                                                 bld->perquadi_bld.type,
+                                                 bld4.type,
+                                                 ilevel,
+                                                 indexi);
+            tmp[i] = lp_build_minify(&bld4, int_size_vec, ileveli);
+         }
+         /*
+          * out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for dims > 1,
+          * [w0, w0, w0, w0, w1, w1, w1, w1, ...] otherwise.
+          */
+         *out_size = lp_build_concat(bld->gallivm,
+                                     tmp,
+                                     bld4.type,
+                                     num_quads);
+      }
+      else {
+        /* FIXME: this is terrible and results in _huge_ vector
+         * (for the dims > 1 case).
+         * Should refactor this (together with extract_image_sizes) and do
+         * something more useful. Could for instance if we have width,height
+         * with 4-wide vector pack all elements into a 8xi16 vector
+         * (on which we can still do useful math) instead of using a 16xi32
+         * vector.
+         * FIXME: some callers can't handle this yet.
+         * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
+         * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] vector.
+         */
+         assert(bld->num_lods == bld->coord_bld.type.length);
+         if (bld->dims == 1) {
+            assert(bld->int_size_bld.type.length == 1);
+            int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
+                                                     bld->int_size);
+            /* vector shift with variable shift count alert... */
+            *out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec, ilevel);
+         }
+         else {
+            LLVMValueRef ilevel1;
+            for (i = 0; i < bld->num_lods; i++) {
+               LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
+               ilevel1 = lp_build_extract_broadcast(bld->gallivm, bld->int_coord_type,
+                                                    bld->int_size_in_bld.type, ilevel, indexi);
+               tmp[i] = bld->int_size;
+               tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i], ilevel1);
+            }
+            int_size_vec = lp_build_concat(bld->gallivm,
+                                           tmp,
+                                           bld->int_size_in_bld.type,
+                                           bld->num_lods);
+         }
+      }
+   }
 
    if (dims >= 2) {
       *row_stride_vec = lp_build_get_level_stride_vec(bld,
@@ -836,7 +1006,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
  */
 void
 lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
-                             struct lp_type size_type,
+                             struct lp_build_context *size_bld,
                              struct lp_type coord_type,
                              LLVMValueRef size,
                              LLVMValueRef *out_width,
@@ -845,24 +1015,56 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
 {
    const unsigned dims = bld->dims;
    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
+   struct lp_type size_type = size_bld->type;
+
+   if (bld->num_lods == 1) {
+      *out_width = lp_build_extract_broadcast(bld->gallivm,
+                                              size_type,
+                                              coord_type,
+                                              size,
+                                              LLVMConstInt(i32t, 0, 0));
+      if (dims >= 2) {
+         *out_height = lp_build_extract_broadcast(bld->gallivm,
+                                                  size_type,
+                                                  coord_type,
+                                                  size,
+                                                  LLVMConstInt(i32t, 1, 0));
+         if (dims == 3) {
+            *out_depth = lp_build_extract_broadcast(bld->gallivm,
+                                                    size_type,
+                                                    coord_type,
+                                                    size,
+                                                    LLVMConstInt(i32t, 2, 0));
+         }
+      }
+   }
+   else {
+      unsigned num_quads = bld->coord_bld.type.length / 4;
 
-   *out_width = lp_build_extract_broadcast(bld->gallivm,
-                                           size_type,
-                                           coord_type,
-                                           size,
-                                           LLVMConstInt(i32t, 0, 0));
-   if (dims >= 2) {
-      *out_height = lp_build_extract_broadcast(bld->gallivm,
-                                               size_type,
-                                               coord_type,
-                                               size,
-                                               LLVMConstInt(i32t, 1, 0));
-      if (dims == 3) {
-         *out_depth = lp_build_extract_broadcast(bld->gallivm,
-                                                 size_type,
-                                                 coord_type,
-                                                 size,
-                                                 LLVMConstInt(i32t, 2, 0));
+      if (dims == 1) {
+         *out_width = size;
+      }
+      else if (bld->num_lods == num_quads) {
+         *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0);
+         if (dims >= 2) {
+            *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1);
+            if (dims == 3) {
+               *out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2);
+            }
+         }
+      }
+      else {
+         assert(bld->num_lods == bld->coord_type.length);
+         *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
+                                                coord_type, size, 0);
+         if (dims >= 2) {
+            *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
+                                                   coord_type, size, 1);
+            if (dims == 3) {
+               *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
+                                                      coord_type, size, 2);
+            }
+         }
       }
    }
 }
@@ -886,7 +1088,7 @@ lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
    LLVMValueRef depth;
 
    lp_build_extract_image_sizes(bld,
-                                bld->float_size_type,
+                                &bld->float_size_bld,
                                 bld->coord_type,
                                 flt_size,
                                 &width,