gallivm: use the new mip per quad handling in texture fetch path

author Roland Scheidegger <sroland@vmware.com>

Tue, 27 Nov 2012 02:30:55 +0000 (03:30 +0100)

committer Roland Scheidegger <sroland@vmware.com>

Tue, 27 Nov 2012 02:30:55 +0000 (03:30 +0100)
author Roland Scheidegger <sroland@vmware.com>
Tue, 27 Nov 2012 02:30:55 +0000 (03:30 +0100)
committer Roland Scheidegger <sroland@vmware.com>
Tue, 27 Nov 2012 02:30:55 +0000 (03:30 +0100)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c

index ea7dd95b78be2d209b8985fcbd9a3dc5fb682ce4..37490e47c85c065bdecfc61746d2b311c8ef0ade 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -265,7 +265,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
  
     first_level = bld->dynamic_state->first_level(bld->dynamic_state,
                                                   bld->gallivm, unit);
-   first_level_vec = lp_build_broadcast_scalar(&bld->int_size_bld, first_level);
+   first_level_vec = lp_build_broadcast_scalar(int_size_bld, first_level);
     int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec);
     float_size = lp_build_int_to_float(float_size_bld, int_size);
  
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c

index 236b68bb0cef98e191b504a7f817c15406c3c507..1f3a98a689607d1c05fdc41df425079ec19c0985 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -516,6 +516,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
                                LLVMValueRef row_stride_vec,
                                LLVMValueRef img_stride_vec,
                                LLVMValueRef data_ptr,
+                              LLVMValueRef mipoffsets,
                                LLVMValueRef s,
                                LLVMValueRef t,
                                LLVMValueRef r,
@@ -625,6 +626,9 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
           offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
        }
     }
+   if (mipoffsets) {
+      offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
+   }
  
     lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
                                         x_subcoord, y_subcoord,
@@ -645,6 +649,7 @@ lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld,
                                       LLVMValueRef row_stride_vec,
                                       LLVMValueRef img_stride_vec,
                                       LLVMValueRef data_ptr,
+                                     LLVMValueRef mipoffsets,
                                       LLVMValueRef s,
                                       LLVMValueRef t,
                                       LLVMValueRef r,
@@ -711,6 +716,9 @@ lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld,
                            row_stride_vec, img_stride_vec,
                            &offset,
                            &x_subcoord, &y_subcoord);
+   if (mipoffsets) {
+      offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
+   }
  
     lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
                                         x_subcoord, y_subcoord,
@@ -966,6 +974,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                               LLVMValueRef row_stride_vec,
                               LLVMValueRef img_stride_vec,
                               LLVMValueRef data_ptr,
+                             LLVMValueRef mipoffsets,
                               LLVMValueRef s,
                               LLVMValueRef t,
                               LLVMValueRef r,
@@ -1073,6 +1082,11 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                                     bld->static_state->wrap_s,
                                     &x_offset0, &x_offset1,
                                     &x_subcoord[0], &x_subcoord[1]);
+   if (mipoffsets) {
+      x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
+      x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
+   }
+
     for (z = 0; z < 2; z++) {
        for (y = 0; y < 2; y++) {
           offset[z][y][0] = x_offset0;
@@ -1149,6 +1163,7 @@ lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld,
                                      LLVMValueRef row_stride_vec,
                                      LLVMValueRef img_stride_vec,
                                      LLVMValueRef data_ptr,
+                                    LLVMValueRef mipoffsets,
                                      LLVMValueRef s,
                                      LLVMValueRef t,
                                      LLVMValueRef r,
@@ -1238,6 +1253,11 @@ lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld,
                                    bld->format_desc->block.width,
                                    x_icoord1, x_stride,
                                    &x_offset1, &x_subcoord[1]);
+   if (mipoffsets) {
+      x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
+      x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
+   }
+
     for (z = 0; z < 2; z++) {
        for (y = 0; y < 2; y++) {
           offset[z][y][0] = x_offset0;
@@ -1330,6 +1350,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
     LLVMValueRef img_stride1_vec = NULL;
     LLVMValueRef data_ptr0;
     LLVMValueRef data_ptr1;
+   LLVMValueRef mipoff0 = NULL;
+   LLVMValueRef mipoff1 = NULL;
     LLVMValueRef colors0_lo, colors0_hi;
     LLVMValueRef colors1_lo, colors1_hi;
  
@@ -1337,13 +1359,21 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
     lp_build_mipmap_level_sizes(bld, ilevel0,
                                 &size0,
                                 &row_stride0_vec, &img_stride0_vec);
-   data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
+   if (bld->num_lods == 1) {
+      data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
+   }
+   else {
+      /* This path should work for num_lods 1 too but slightly less efficient */
+      data_ptr0 = bld->base_ptr;
+      mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
+   }
+
     if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
        if (img_filter == PIPE_TEX_FILTER_NEAREST) {
           lp_build_sample_image_nearest_afloat(bld,
                                                size0,
                                                row_stride0_vec, img_stride0_vec,
-                                              data_ptr0, s, t, r,
+                                              data_ptr0, mipoff0, s, t, r,
                                                &colors0_lo, &colors0_hi);
        }
        else {
@@ -1351,7 +1381,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
           lp_build_sample_image_linear_afloat(bld,
                                               size0,
                                               row_stride0_vec, img_stride0_vec,
-                                             data_ptr0, s, t, r,
+                                             data_ptr0, mipoff0, s, t, r,
                                               &colors0_lo, &colors0_hi);
        }
     }
@@ -1360,7 +1390,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
           lp_build_sample_image_nearest(bld,
                                         size0,
                                         row_stride0_vec, img_stride0_vec,
-                                       data_ptr0, s, t, r,
+                                       data_ptr0, mipoff0, s, t, r,
                                         &colors0_lo, &colors0_hi);
        }
        else {
@@ -1368,7 +1398,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
           lp_build_sample_image_linear(bld,
                                        size0,
                                        row_stride0_vec, img_stride0_vec,
-                                      data_ptr0, s, t, r,
+                                      data_ptr0, mipoff0, s, t, r,
                                        &colors0_lo, &colors0_hi);
        }
     }
@@ -1422,21 +1452,30 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
           lp_build_mipmap_level_sizes(bld, ilevel1,
                                       &size1,
                                       &row_stride1_vec, &img_stride1_vec);
-         data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
+         lp_build_mipmap_level_sizes(bld, ilevel1,
+                                     &size1,
+                                     &row_stride1_vec, &img_stride1_vec);
+         if (bld->num_lods == 1) {
+            data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
+         }
+         else {
+            data_ptr1 = bld->base_ptr;
+            mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
+         }
  
           if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
              if (img_filter == PIPE_TEX_FILTER_NEAREST) {
                 lp_build_sample_image_nearest_afloat(bld,
                                                      size1,
                                                      row_stride1_vec, img_stride1_vec,
-                                                    data_ptr1, s, t, r,
+                                                    data_ptr1, mipoff1, s, t, r,
                                                      &colors1_lo, &colors1_hi);
              }
              else {
                 lp_build_sample_image_linear_afloat(bld,
                                                     size1,
                                                     row_stride1_vec, img_stride1_vec,
-                                                   data_ptr1, s, t, r,
+                                                   data_ptr1, mipoff1, s, t, r,
                                                     &colors1_lo, &colors1_hi);
              }
           }
@@ -1445,14 +1484,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
                 lp_build_sample_image_nearest(bld,
                                               size1,
                                               row_stride1_vec, img_stride1_vec,
-                                             data_ptr1, s, t, r,
+                                             data_ptr1, mipoff1, s, t, r,
                                               &colors1_lo, &colors1_hi);
              }
              else {
                 lp_build_sample_image_linear(bld,
                                              size1,
                                              row_stride1_vec, img_stride1_vec,
-                                            data_ptr1, s, t, r,
+                                            data_ptr1, mipoff1, s, t, r,
                                              &colors1_lo, &colors1_hi);
              }
           }
@@ -1580,6 +1619,19 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
        struct lp_build_if_state if_ctx;
        LLVMValueRef minify;
  
+      /*
+       * XXX this should to all lods into account, if some are min
+       * some max probably could hack up the coords/weights in the linear
+       * path with selects to work for nearest.
+       * If that's just two quads sitting next to each other it seems
+       * quite ok to do the same filtering method on both though, at
+       * least unless we have explicit lod (and who uses different
+       * min/mag filter with that?)
+       */
+      if (bld->num_lods > 1)
+         lod_ipart = LLVMBuildExtractElement(builder, lod_ipart,
+                                              lp_build_const_int32(bld->gallivm, 0), "");
+
        /* minify = lod >= 0.0 */
        minify = LLVMBuildICmp(builder, LLVMIntSGE,
                               lod_ipart, int_bld->zero, "");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

index daa49506ca5db26f3bb17d2db2d8027aefa55788..97a23df4a875d798923e2814eac999c7d49d4c24 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -82,6 +82,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
                            LLVMValueRef y_stride,
                            LLVMValueRef z_stride,
                            LLVMValueRef data_ptr,
+                          LLVMValueRef mipoffsets,
                            LLVMValueRef texel_out[4])
  {
     const struct lp_sampler_static_state *static_state = bld->static_state;
@@ -139,6 +140,9 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
                            bld->format_desc,
                            x, y, z, y_stride, z_stride,
                            &offset, &i, &j);
+   if (mipoffsets) {
+      offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
+   }
  
     if (use_border) {
        /* If we can sample the border color, it means that texcoords may
@@ -594,6 +598,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
                                LLVMValueRef row_stride_vec,
                                LLVMValueRef img_stride_vec,
                                LLVMValueRef data_ptr,
+                              LLVMValueRef mipoffsets,
                                LLVMValueRef s,
                                LLVMValueRef t,
                                LLVMValueRef r,
@@ -661,7 +666,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
                               width_vec, height_vec, depth_vec,
                               x, y, z,
                               row_stride_vec, img_stride_vec,
-                             data_ptr, colors_out);
+                             data_ptr, mipoffsets, colors_out);
  }
  
  
@@ -676,6 +681,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                               LLVMValueRef row_stride_vec,
                               LLVMValueRef img_stride_vec,
                               LLVMValueRef data_ptr,
+                             LLVMValueRef mipoffsets,
                               LLVMValueRef s,
                               LLVMValueRef t,
                               LLVMValueRef r,
@@ -756,12 +762,12 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                               width_vec, height_vec, depth_vec,
                               x0, y0, z0,
                               row_stride_vec, img_stride_vec,
-                             data_ptr, neighbors[0][0]);
+                             data_ptr, mipoffsets, neighbors[0][0]);
     lp_build_sample_texel_soa(bld, unit,
                               width_vec, height_vec, depth_vec,
                               x1, y0, z0,
                               row_stride_vec, img_stride_vec,
-                             data_ptr, neighbors[0][1]);
+                             data_ptr, mipoffsets, neighbors[0][1]);
  
     if (dims == 1) {
        /* Interpolate two samples from 1D image to produce one color */
@@ -780,12 +786,12 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                                  width_vec, height_vec, depth_vec,
                                  x0, y1, z0,
                                  row_stride_vec, img_stride_vec,
-                                data_ptr, neighbors[1][0]);
+                                data_ptr, mipoffsets, neighbors[1][0]);
        lp_build_sample_texel_soa(bld, unit,
                                  width_vec, height_vec, depth_vec,
                                  x1, y1, z0,
                                  row_stride_vec, img_stride_vec,
-                                data_ptr, neighbors[1][1]);
+                                data_ptr, mipoffsets, neighbors[1][1]);
  
        /* Bilinear interpolate the four samples from the 2D image / 3D slice */
        for (chan = 0; chan < 4; chan++) {
@@ -806,22 +812,22 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                                     width_vec, height_vec, depth_vec,
                                     x0, y0, z1,
                                     row_stride_vec, img_stride_vec,
-                                   data_ptr, neighbors1[0][0]);
+                                   data_ptr, mipoffsets, neighbors1[0][0]);
           lp_build_sample_texel_soa(bld, unit,
                                     width_vec, height_vec, depth_vec,
                                     x1, y0, z1,
                                     row_stride_vec, img_stride_vec,
-                                   data_ptr, neighbors1[0][1]);
+                                   data_ptr, mipoffsets, neighbors1[0][1]);
           lp_build_sample_texel_soa(bld, unit,
                                     width_vec, height_vec, depth_vec,
                                     x0, y1, z1,
                                     row_stride_vec, img_stride_vec,
-                                   data_ptr, neighbors1[1][0]);
+                                   data_ptr, mipoffsets, neighbors1[1][0]);
           lp_build_sample_texel_soa(bld, unit,
                                     width_vec, height_vec, depth_vec,
                                     x1, y1, z1,
                                     row_stride_vec, img_stride_vec,
-                                   data_ptr, neighbors1[1][1]);
+                                   data_ptr, mipoffsets, neighbors1[1][1]);
  
           /* Bilinear interpolate the four samples from the second Z slice */
           for (chan = 0; chan < 4; chan++) {
@@ -878,6 +884,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
     LLVMValueRef img_stride1_vec = NULL;
     LLVMValueRef data_ptr0 = NULL;
     LLVMValueRef data_ptr1 = NULL;
+   LLVMValueRef mipoff0 = NULL;
+   LLVMValueRef mipoff1 = NULL;
     LLVMValueRef colors0[4], colors1[4];
     unsigned chan;
  
@@ -885,12 +893,19 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
     lp_build_mipmap_level_sizes(bld, ilevel0,
                                 &size0,
                                 &row_stride0_vec, &img_stride0_vec);
-   data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
+   if (bld->num_lods == 1) {
+      data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
+   }
+   else {
+      /* This path should work for num_lods 1 too but slightly less efficient */
+      data_ptr0 = bld->base_ptr;
+      mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
+   }
     if (img_filter == PIPE_TEX_FILTER_NEAREST) {
        lp_build_sample_image_nearest(bld, unit,
                                      size0,
                                      row_stride0_vec, img_stride0_vec,
-                                    data_ptr0, s, t, r,
+                                    data_ptr0, mipoff0, s, t, r,
                                      colors0);
     }
     else {
@@ -898,7 +913,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
        lp_build_sample_image_linear(bld, unit,
                                     size0,
                                     row_stride0_vec, img_stride0_vec,
-                                   data_ptr0, s, t, r,
+                                   data_ptr0, mipoff0, s, t, r,
                                     colors0);
     }
  
@@ -943,19 +958,25 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
           lp_build_mipmap_level_sizes(bld, ilevel1,
                                       &size1,
                                       &row_stride1_vec, &img_stride1_vec);
-         data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
+         if (bld->num_lods == 1) {
+            data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
+         }
+         else {
+            data_ptr1 = bld->base_ptr;
+            mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
+         }
           if (img_filter == PIPE_TEX_FILTER_NEAREST) {
              lp_build_sample_image_nearest(bld, unit,
                                            size1,
                                            row_stride1_vec, img_stride1_vec,
-                                          data_ptr1, s, t, r,
+                                          data_ptr1, mipoff1, s, t, r,
                                            colors1);
           }
           else {
              lp_build_sample_image_linear(bld, unit,
                                           size1,
                                           row_stride1_vec, img_stride1_vec,
-                                         data_ptr1, s, t, r,
+                                         data_ptr1, mipoff1, s, t, r,
                                           colors1);
           }
  
@@ -1125,6 +1146,16 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
        struct lp_build_if_state if_ctx;
        LLVMValueRef minify;
  
+      /*
+       * XXX this should to all lods into account, if some are min
+       * some max probably could hack up the coords/weights in the linear
+       * path with selects to work for nearest.
+       * If that's just two quads sitting next to each other it seems
+       * quite ok to do the same filtering method on both though, at
+       * least unless we have explicit lod (and who uses different
+       * min/mag filter with that?)
+       */
+
        /* minify = lod >= 0.0 */
        minify = LLVMBuildICmp(builder, LLVMIntSGE,
                               lod_ipart, int_bld->zero, "");
@@ -1184,16 +1215,6 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
  
     /* XXX just like ordinary sampling, we don't handle per-pixel lod (yet). */
     if (explicit_lod && bld->static_state->target != PIPE_BUFFER) {
-      /* could also avoid this if there are no mipmaps */
-      /* XXX temporary hack until ordinary sampling handles per-quad lod the same */
-      bld->num_lods = bld->coord_type.length / 4;
-      bld->float_size_type = bld->float_size_in_type;
-      bld->float_size_type.length = bld->num_lods > 1 ? bld->coord_type.length :
-                                      bld->float_size_in_type.length;
-      bld->int_size_type = lp_int_type(bld->float_size_type);
-      lp_build_context_init(&bld->int_size_bld, bld->gallivm, bld->int_size_type);
-      lp_build_context_init(&bld->float_size_bld, bld->gallivm, bld->float_size_type);
-
        ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
                                           perquadi_bld->type, explicit_lod, 0);
        lp_build_nearest_mip_level(bld, unit, ilevel, &ilevel);
@@ -1275,6 +1296,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
   * Do shadow test/comparison.
   * \param p  the texcoord Z (aka R, aka P) component
   * \param texel  the texel to compare against (use the X channel)
+ * Ideally this should really be done per-sample.
   */
  static void
  lp_build_sample_compare(struct lp_build_sample_context *bld,
@@ -1358,6 +1380,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
                      LLVMValueRef texel_out[4])
  {
     unsigned dims = texture_dims(static_state->target);
+   unsigned num_quads = type.length / 4;
+   unsigned mip_filter = static_state->min_mip_filter;
     struct lp_build_sample_context bld;
     LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
     LLVMBuilderRef builder = gallivm->builder;
@@ -1396,7 +1420,18 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
     bld.perquadf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
     bld.perquadi_type = lp_int_type(bld.perquadf_type);
  
-   bld.num_lods = 1;
+   /*
+    * There are other situations where at least the multiple int lods could be
+    * avoided like min and max lod being equal.
+    */
+   if ((is_fetch && explicit_lod && bld.static_state->target != PIPE_BUFFER) ||
+       (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+      bld.num_lods = num_quads;
+   }
+   else {
+      bld.num_lods = 1;
+   }
+
     bld.float_size_type = bld.float_size_in_type;
     bld.float_size_type.length = bld.num_lods > 1 ? type.length :
                                     bld.float_size_in_type.length;
@@ -1456,8 +1491,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
     else {
        LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
        LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
-      unsigned num_quads = type.length / 4;
-      const unsigned mip_filter = bld.static_state->min_mip_filter;
        boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
                          lp_is_simple_wrap_mode(static_state->wrap_s) &&
                          lp_is_simple_wrap_mode(static_state->wrap_t);
@@ -1494,16 +1527,21 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
  
        /*
         * we only try 8-wide sampling with soa as it appears to
-       * be a loss with aos with AVX.
+       * be a loss with aos with AVX (but it should work).
+       * (It should be faster if we'd support avx2)
         */
-      if (num_quads == 1 || (mip_filter == PIPE_TEX_MIPFILTER_NONE &&
-                             !use_aos)) {
+      if (num_quads == 1 || !use_aos) {
  
           if (num_quads > 1) {
-            LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
-            /* These parameters are the same for all quads */
-            lod_ipart = LLVMBuildExtractElement(builder, lod_ipart, index0, "");
-            ilevel0 = LLVMBuildExtractElement(builder, ilevel0, index0, "");
+            if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+               LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
+               /*
+                * These parameters are the same for all quads,
+                * could probably simplify.
+                */
+               lod_ipart = LLVMBuildExtractElement(builder, lod_ipart, index0, "");
+               ilevel0 = LLVMBuildExtractElement(builder, ilevel0, index0, "");
+            }
           }
           if (use_aos) {
              /* do sampling/filtering with fixed pt arithmetic */
@@ -1523,170 +1561,99 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
           }
        }
        else {
-         struct lp_build_if_state if_ctx;
-         LLVMValueRef notsame_levels, notsame;
-         LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
-         LLVMValueRef texels[4];
-         LLVMValueRef texelout[4];
           unsigned j;
-
-         texels[0] = lp_build_alloca(gallivm, bld.texel_bld.vec_type, "texr");
-         texels[1] = lp_build_alloca(gallivm, bld.texel_bld.vec_type, "texg");
-         texels[2] = lp_build_alloca(gallivm, bld.texel_bld.vec_type, "texb");
-         texels[3] = lp_build_alloca(gallivm, bld.texel_bld.vec_type, "texa");
-
-         /* only build the if if we MAY split, otherwise always split */
-         if (!use_aos) {
-            notsame = lp_build_extract_broadcast(gallivm,
-                                                 bld.perquadi_bld.type,
-                                                 bld.perquadi_bld.type,
-                                                 ilevel0, index0);
-            notsame = lp_build_sub(&bld.perquadi_bld, ilevel0, notsame);
-            notsame_levels = lp_build_any_true_range(&bld.perquadi_bld, num_quads,
-                                                     notsame);
+         struct lp_build_sample_context bld4;
+         struct lp_type type4 = type;
+         unsigned i;
+         LLVMValueRef texelout4[4];
+         LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
+
+         type4.length = 4;
+
+         /* Setup our build context */
+         memset(&bld4, 0, sizeof bld4);
+         bld4.gallivm = bld.gallivm;
+         bld4.static_state = bld.static_state;
+         bld4.dynamic_state = bld.dynamic_state;
+         bld4.format_desc = bld.format_desc;
+         bld4.dims = bld.dims;
+         bld4.row_stride_array = bld.row_stride_array;
+         bld4.img_stride_array = bld.img_stride_array;
+         bld4.base_ptr = bld.base_ptr;
+         bld4.mip_offsets = bld.mip_offsets;
+         bld4.int_size = bld.int_size;
+
+         bld4.vector_width = lp_type_width(type4);
+
+         bld4.float_type = lp_type_float(32);
+         bld4.int_type = lp_type_int(32);
+         bld4.coord_type = type4;
+         bld4.int_coord_type = lp_int_type(type4);
+         bld4.float_size_in_type = lp_type_float(32);
+         bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
+         bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
+         bld4.texel_type = type4;
+         bld4.perquadf_type = type4;
+         /* we want native vector size to be able to use our intrinsics */
+         bld4.perquadf_type.length = 1;
+         bld4.perquadi_type = lp_int_type(bld4.perquadf_type);
+
+         bld4.num_lods = 1;
+         bld4.int_size_type = bld4.int_size_in_type;
+         bld4.float_size_type = bld4.float_size_in_type;
+
+         lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
+         lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
+         lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
+         lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
+         lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
+         lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
+         lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
+         lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
+         lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
+         lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
+         lp_build_context_init(&bld4.perquadf_bld, gallivm, bld4.perquadf_type);
+         lp_build_context_init(&bld4.perquadi_bld, gallivm, bld4.perquadi_type);
+
+         for (i = 0; i < num_quads; i++) {
+            LLVMValueRef s4, t4, r4;
+            LLVMValueRef lod_iparts, lod_fparts = NULL;
+            LLVMValueRef ilevel0s, ilevel1s = NULL;
+            LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
+
+            s4 = lp_build_extract_range(gallivm, s, 4*i, 4);
+            t4 = lp_build_extract_range(gallivm, t, 4*i, 4);
+            r4 = lp_build_extract_range(gallivm, r, 4*i, 4);
+            lod_iparts = LLVMBuildExtractElement(builder, lod_ipart, indexi, "");
+            ilevel0s = LLVMBuildExtractElement(builder, ilevel0, indexi, "");
              if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
-               notsame = lp_build_extract_broadcast(gallivm,
-                                                    bld.perquadi_bld.type,
-                                                    bld.perquadi_bld.type,
-                                                    ilevel1, index0);
-               notsame = lp_build_sub(&bld.perquadi_bld, ilevel1, notsame);
-               notsame = lp_build_any_true_range(&bld.perquadi_bld, num_quads, notsame);
-               notsame_levels = LLVMBuildOr(builder, notsame_levels, notsame, "");
-            }
-            lp_build_if(&if_ctx, gallivm, notsame_levels);
-         }
-
-         {
-            struct lp_build_sample_context bld4;
-            struct lp_type type4 = type;
-            unsigned i;
-            LLVMValueRef texelout4[4];
-            LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
-
-            type4.length = 4;
-
-            /* Setup our build context */
-            memset(&bld4, 0, sizeof bld4);
-            bld4.gallivm = bld.gallivm;
-            bld4.static_state = bld.static_state;
-            bld4.dynamic_state = bld.dynamic_state;
-            bld4.format_desc = bld.format_desc;
-            bld4.dims = bld.dims;
-            bld4.row_stride_array = bld.row_stride_array;
-            bld4.img_stride_array = bld.img_stride_array;
-            bld4.base_ptr = bld.base_ptr;
-            bld4.mip_offsets = bld.mip_offsets;
-            bld4.int_size = bld.int_size;
-
-            bld4.vector_width = lp_type_width(type4);
-
-            bld4.float_type = lp_type_float(32);
-            bld4.int_type = lp_type_int(32);
-            bld4.coord_type = type4;
-            bld4.int_coord_type = lp_int_type(type4);
-            bld4.float_size_in_type = lp_type_float(32);
-            bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
-            bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
-            bld4.float_size_type = bld4.float_size_in_type;
-            bld4.int_size_type =  bld4.int_size_in_type;
-            bld4.texel_type = type4;
-            bld4.perquadf_type = type4;
-            /* we want native vector size to be able to use our intrinsics */
-            bld4.perquadf_type.length = 1;
-            bld4.perquadi_type = lp_int_type(bld4.perquadf_type);
-            bld4.num_lods = 1;
-
-            lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
-            lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
-            lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
-            lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
-            lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
-            lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
-            lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
-            lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
-            lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
-            lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
-            lp_build_context_init(&bld4.perquadf_bld, gallivm, bld4.perquadf_type);
-            lp_build_context_init(&bld4.perquadi_bld, gallivm, bld4.perquadi_type);
-
-            for (i = 0; i < num_quads; i++) {
-               LLVMValueRef s4, t4, r4;
-               LLVMValueRef lod_iparts, lod_fparts = NULL;
-               LLVMValueRef ilevel0s, ilevel1s = NULL;
-               LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
-
-               s4 = lp_build_extract_range(gallivm, s, 4*i, 4);
-               t4 = lp_build_extract_range(gallivm, t, 4*i, 4);
-               r4 = lp_build_extract_range(gallivm, r, 4*i, 4);
-               lod_iparts = LLVMBuildExtractElement(builder, lod_ipart, indexi, "");
-               ilevel0s = LLVMBuildExtractElement(builder, ilevel0, indexi, "");
-               if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
-                  ilevel1s = LLVMBuildExtractElement(builder, ilevel1, indexi, "");
-                  lod_fparts = LLVMBuildExtractElement(builder, lod_fpart, indexi, "");
-               }
-
-               if (use_aos) {
-                  /* do sampling/filtering with fixed pt arithmetic */
-                  lp_build_sample_aos(&bld4, unit,
-                                      s4, t4, r4,
-                                      lod_iparts, lod_fparts,
-                                      ilevel0s, ilevel1s,
-                                      texelout4);
-               }
-
-               else {
-                  lp_build_sample_general(&bld4, unit,
-                                          s4, t4, r4,
-                                          lod_iparts, lod_fparts,
-                                          ilevel0s, ilevel1s,
-                                          texelout4);
-               }
-               for (j = 0; j < 4; j++) {
-                  texelouttmp[j][i] = texelout4[j];
-               }
-            }
-            for (j = 0; j < 4; j++) {
-               texelout[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
-               LLVMBuildStore(builder, texelout[j], texels[j]);
-            }
-         }
-         if (!use_aos) {
-            LLVMValueRef ilevel0s, lod_iparts, ilevel1s = NULL;
-
-            lp_build_else(&if_ctx);
-
-            /* These parameters are the same for all quads */
-            lod_iparts = LLVMBuildExtractElement(builder, lod_ipart, index0, "");
-            ilevel0s = LLVMBuildExtractElement(builder, ilevel0, index0, "");
-            if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
-               ilevel1s = LLVMBuildExtractElement(builder, ilevel1, index0, "");
+               ilevel1s = LLVMBuildExtractElement(builder, ilevel1, indexi, "");
+               lod_fparts = LLVMBuildExtractElement(builder, lod_fpart, indexi, "");
              }
  
              if (use_aos) {
                 /* do sampling/filtering with fixed pt arithmetic */
-               lp_build_sample_aos(&bld, unit,
-                                   s, t, r,
-                                   lod_iparts, lod_fpart,
+               lp_build_sample_aos(&bld4, unit,
+                                   s4, t4, r4,
+                                   lod_iparts, lod_fparts,
                                     ilevel0s, ilevel1s,
-                                   texelout);
+                                   texelout4);
              }
  
              else {
-               lp_build_sample_general(&bld, unit,
-                                       s, t, r,
-                                       lod_iparts, lod_fpart,
+               lp_build_sample_general(&bld4, unit,
+                                       s4, t4, r4,
+                                       lod_iparts, lod_fparts,
                                         ilevel0s, ilevel1s,
-                                       texelout);
+                                       texelout4);
              }
              for (j = 0; j < 4; j++) {
-               LLVMBuildStore(builder, texelout[j], texels[j]);
+               texelouttmp[j][i] = texelout4[j];
              }
-
-            lp_build_endif(&if_ctx);
           }
  
           for (j = 0; j < 4; j++) {
-            texel_out[j] = LLVMBuildLoad(builder, texels[j], "");
+            texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
           }
        }
     }
author	Roland Scheidegger <sroland@vmware.com>
	Tue, 27 Nov 2012 02:30:55 +0000 (03:30 +0100)
committer	Roland Scheidegger <sroland@vmware.com>
	Tue, 27 Nov 2012 02:30:55 +0000 (03:30 +0100)
src/gallium/auxiliary/gallivm/lp_bld_sample.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c		patch \| blob \| history