X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fllvmpipe%2Flp_bld_sample_soa.c;h=47b68b71e25ff5a6aae4255df93dd153ab2d0cf6;hb=7d6c8f980d1e23ad6f557d650e89c715861a3b0c;hp=8ca1be6f1be80e826a136b59d5cb2c101e5cfa2b;hpb=c9f7a23ef05adfd2ebae56ee9f1b19897a589831;p=mesa.git

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index 8ca1be6f1be..47b68b71e25 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -27,7 +27,7 @@
 
 /**
  * @file
- * Texture sampling.
+ * Texture sampling -- SoA.
  *
  * @author Jose Fonseca <jfonseca@vmware.com>
  */
@@ -35,54 +35,23 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
 #include "util/u_debug.h"
+#include "util/u_debug_dump.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
 #include "util/u_format.h"
+#include "util/u_cpu_detect.h"
 #include "lp_bld_debug.h"
 #include "lp_bld_type.h"
 #include "lp_bld_const.h"
+#include "lp_bld_conv.h"
 #include "lp_bld_arit.h"
 #include "lp_bld_logic.h"
 #include "lp_bld_swizzle.h"
+#include "lp_bld_pack.h"
 #include "lp_bld_format.h"
 #include "lp_bld_sample.h"
 
 
-void
-lp_sampler_static_state(struct lp_sampler_static_state *state,
-                        const struct pipe_texture *texture,
-                        const struct pipe_sampler_state *sampler)
-{
-   memset(state, 0, sizeof *state);
-
-   if(!texture)
-      return;
-
-   if(!sampler)
-      return;
-
-   state->format            = texture->format;
-   state->target            = texture->target;
-   state->pot_width         = util_is_pot(texture->width[0]);
-   state->pot_height        = util_is_pot(texture->height[0]);
-   state->pot_depth         = util_is_pot(texture->depth[0]);
-
-   state->wrap_s            = sampler->wrap_s;
-   state->wrap_t            = sampler->wrap_t;
-   state->wrap_r            = sampler->wrap_r;
-   state->min_img_filter    = sampler->min_img_filter;
-   state->min_mip_filter    = sampler->min_mip_filter;
-   state->mag_img_filter    = sampler->mag_img_filter;
-   if(sampler->compare_mode) {
-      state->compare_mode      = sampler->compare_mode;
-      state->compare_func      = sampler->compare_func;
-   }
-   state->normalized_coords = sampler->normalized_coords;
-   state->prefilter         = sampler->prefilter;
-}
-
-
-
 /**
  * Keep all information for sampling code generation in a single place.
  */
@@ -111,66 +80,61 @@ struct lp_build_sample_context
 
 
 static void
-lp_build_sample_texel(struct lp_build_sample_context *bld,
-                      LLVMValueRef x,
-                      LLVMValueRef y,
-                      LLVMValueRef y_stride,
-                      LLVMValueRef data_ptr,
-                      LLVMValueRef *texel)
+lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
+                          LLVMValueRef x,
+                          LLVMValueRef y,
+                          LLVMValueRef y_stride,
+                          LLVMValueRef data_ptr,
+                          LLVMValueRef *texel)
 {
-   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
-   LLVMValueRef x_stride;
    LLVMValueRef offset;
+   LLVMValueRef packed;
+
+   offset = lp_build_sample_offset(&bld->int_coord_bld,
+                                   bld->format_desc,
+                                   x, y, y_stride,
+                                   data_ptr);
+
+   assert(bld->format_desc->block.width == 1);
+   assert(bld->format_desc->block.height == 1);
+   assert(bld->format_desc->block.bits <= bld->texel_type.width);
+
+   packed = lp_build_gather(bld->builder,
+                            bld->texel_type.length,
+                            bld->format_desc->block.bits,
+                            bld->texel_type.width,
+                            data_ptr, offset);
+
+   lp_build_unpack_rgba_soa(bld->builder,
+                            bld->format_desc,
+                            bld->texel_type,
+                            packed, texel);
+}
 
-   x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8);
-
-   if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
-      LLVMValueRef x_lo, x_hi;
-      LLVMValueRef y_lo, y_hi;
-      LLVMValueRef x_stride_lo, x_stride_hi;
-      LLVMValueRef y_stride_lo, y_stride_hi;
-      LLVMValueRef x_offset_lo, x_offset_hi;
-      LLVMValueRef y_offset_lo, y_offset_hi;
-      LLVMValueRef offset_lo, offset_hi;
-
-      x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, "");
-      y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, "");
-
-      x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, "");
-      y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, "");
-
-      x_stride_lo = x_stride;
-      y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8);
-
-      x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8);
-      y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, "");
-
-      x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo);
-      y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo);
-      offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo);
-
-      x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi);
-      y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi);
-      offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi);
 
-      offset = lp_build_add(int_coord_bld, offset_hi, offset_lo);
-   }
-   else {
-      LLVMValueRef x_offset;
-      LLVMValueRef y_offset;
+static LLVMValueRef
+lp_build_sample_packed(struct lp_build_sample_context *bld,
+                       LLVMValueRef x,
+                       LLVMValueRef y,
+                       LLVMValueRef y_stride,
+                       LLVMValueRef data_ptr)
+{
+   LLVMValueRef offset;
 
-      x_offset = lp_build_mul(int_coord_bld, x, x_stride);
-      y_offset = lp_build_mul(int_coord_bld, y, y_stride);
+   offset = lp_build_sample_offset(&bld->int_coord_bld,
+                                   bld->format_desc,
+                                   x, y, y_stride,
+                                   data_ptr);
 
-      offset = lp_build_add(int_coord_bld, x_offset, y_offset);
-   }
+   assert(bld->format_desc->block.width == 1);
+   assert(bld->format_desc->block.height == 1);
+   assert(bld->format_desc->block.bits <= bld->texel_type.width);
 
-   lp_build_load_rgba_soa(bld->builder,
-                          bld->format_desc,
-                          bld->texel_type,
-                          data_ptr,
-                          offset,
-                          texel);
+   return lp_build_gather(bld->builder,
+                          bld->texel_type.length,
+                          bld->format_desc->block.bits,
+                          bld->texel_type.width,
+                          data_ptr, offset);
 }
 
 
@@ -208,7 +172,8 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld,
    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
       /* FIXME */
-      _debug_printf("warning: failed to translate texture wrap mode %u\n", wrap_mode);
+      _debug_printf("warning: failed to translate texture wrap mode %s\n",
+                    debug_dump_tex_wrap(wrap_mode, TRUE));
       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
       break;
@@ -240,7 +205,7 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
    x = lp_build_sample_wrap(bld, x, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
    y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t);
 
-   lp_build_sample_texel(bld, x, y, stride, data_ptr, texel);
+   lp_build_sample_texel_soa(bld, x, y, stride, data_ptr, texel);
 }
 
 
@@ -274,8 +239,8 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
    s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart);
    t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart);
 
-   x0 = lp_build_int(&bld->coord_bld, s_ipart);
-   y0 = lp_build_int(&bld->coord_bld, t_ipart);
+   x0 = lp_build_itrunc(&bld->coord_bld, s_ipart);
+   y0 = lp_build_itrunc(&bld->coord_bld, t_ipart);
 
    x0 = lp_build_sample_wrap(bld, x0, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
    y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t);
@@ -286,10 +251,10 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
    x1 = lp_build_sample_wrap(bld, x1, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
    y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
 
-   lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
-   lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
-   lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
-   lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
+   lp_build_sample_texel_soa(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
+   lp_build_sample_texel_soa(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
+   lp_build_sample_texel_soa(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
+   lp_build_sample_texel_soa(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
 
    /* TODO: Don't interpolate missing channels */
    for(chan = 0; chan < 4; ++chan) {
@@ -303,6 +268,217 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
 }
 
 
+static void
+lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
+                          struct lp_type dst_type,
+                          LLVMValueRef packed,
+                          LLVMValueRef *rgba)
+{
+   LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff);
+   unsigned chan;
+
+   /* Decode the input vector components */
+   for (chan = 0; chan < 4; ++chan) {
+      unsigned start = chan*8;
+      unsigned stop = start + 8;
+      LLVMValueRef input;
+
+      input = packed;
+
+      if(start)
+         input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), "");
+
+      if(stop < 32)
+         input = LLVMBuildAnd(builder, input, mask, "");
+
+      input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
+
+      rgba[chan] = input;
+   }
+}
+
+
+static void
+lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
+                              LLVMValueRef s,
+                              LLVMValueRef t,
+                              LLVMValueRef width,
+                              LLVMValueRef height,
+                              LLVMValueRef stride,
+                              LLVMValueRef data_ptr,
+                              LLVMValueRef *texel)
+{
+   LLVMBuilderRef builder = bld->builder;
+   struct lp_build_context i32, h16, u8n;
+   LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
+   LLVMValueRef i32_c8, i32_c128, i32_c255;
+   LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
+   LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
+   LLVMValueRef x0, x1;
+   LLVMValueRef y0, y1;
+   LLVMValueRef neighbors[2][2];
+   LLVMValueRef neighbors_lo[2][2];
+   LLVMValueRef neighbors_hi[2][2];
+   LLVMValueRef packed, packed_lo, packed_hi;
+   LLVMValueRef unswizzled[4];
+
+   lp_build_context_init(&i32, builder, lp_type_int(32));
+   lp_build_context_init(&h16, builder, lp_type_ufixed(16));
+   lp_build_context_init(&u8n, builder, lp_type_unorm(8));
+
+   i32_vec_type = lp_build_vec_type(i32.type);
+   h16_vec_type = lp_build_vec_type(h16.type);
+   u8n_vec_type = lp_build_vec_type(u8n.type);
+
+   s = lp_build_mul_imm(&bld->coord_bld, s, 256);
+   t = lp_build_mul_imm(&bld->coord_bld, t, 256);
+
+   s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
+   t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
+
+   i32_c128 = lp_build_int_const_scalar(i32.type, -128);
+   s = LLVMBuildAdd(builder, s, i32_c128, "");
+   t = LLVMBuildAdd(builder, t, i32_c128, "");
+
+   i32_c8 = lp_build_int_const_scalar(i32.type, 8);
+   s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
+   t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
+
+   i32_c255 = lp_build_int_const_scalar(i32.type, 255);
+   s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
+   t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
+
+   x0 = s_ipart;
+   y0 = t_ipart;
+
+   x0 = lp_build_sample_wrap(bld, x0, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
+   y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+   x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
+   y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
+
+   x1 = lp_build_sample_wrap(bld, x1, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
+   y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+   /*
+    * Transform 4 x i32 in
+    *
+    *   s_fpart = {s0, s1, s2, s3}
+    *
+    * into 8 x i16
+    *
+    *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
+    *
+    * into two 8 x i16
+    *
+    *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
+    *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
+    *
+    * and likewise for t_fpart. There is no risk of loosing precision here
+    * since the fractional parts only use the lower 8bits.
+    */
+
+   s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
+   t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
+
+   {
+      LLVMTypeRef elem_type = LLVMInt32Type();
+      LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
+      LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
+      LLVMValueRef shuffle_lo;
+      LLVMValueRef shuffle_hi;
+      unsigned i, j;
+
+      for(j = 0; j < h16.type.length; j += 4) {
+         unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
+         LLVMValueRef index;
+
+         index = LLVMConstInt(elem_type, j/2 + subindex, 0);
+         for(i = 0; i < 4; ++i)
+            shuffles_lo[j + i] = index;
+
+         index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
+         for(i = 0; i < 4; ++i)
+            shuffles_hi[j + i] = index;
+      }
+
+      shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
+      shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
+
+      s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
+      t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
+      s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
+      t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
+   }
+
+   /*
+    * Fetch the pixels as 4 x 32bit (rgba order might differ):
+    *
+    *   rgba0 rgba1 rgba2 rgba3
+    *
+    * bit cast them into 16 x u8
+    *
+    *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
+    *
+    * unpack them into two 8 x i16:
+    *
+    *   r0 g0 b0 a0 r1 g1 b1 a1
+    *   r2 g2 b2 a2 r3 g3 b3 a3
+    *
+    * The higher 8 bits of the resulting elements will be zero.
+    */
+
+   neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr);
+   neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr);
+   neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr);
+   neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr);
+
+   neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
+   neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
+   neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
+   neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
+
+   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
+   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
+   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
+   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
+
+   /*
+    * Linear interpolate with 8.8 fixed point.
+    */
+
+   packed_lo = lp_build_lerp_2d(&h16,
+                                s_fpart_lo, t_fpart_lo,
+                                neighbors_lo[0][0],
+                                neighbors_lo[0][1],
+                                neighbors_lo[1][0],
+                                neighbors_lo[1][1]);
+
+   packed_hi = lp_build_lerp_2d(&h16,
+                                s_fpart_hi, t_fpart_hi,
+                                neighbors_hi[0][0],
+                                neighbors_hi[0][1],
+                                neighbors_hi[1][0],
+                                neighbors_hi[1][1]);
+
+   packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
+
+   /*
+    * Convert to SoA and swizzle.
+    */
+
+   packed = LLVMBuildBitCast(builder, packed, i32_vec_type, "");
+
+   lp_build_rgba8_to_f32_soa(bld->builder,
+                             bld->texel_type,
+                             packed, unswizzled);
+
+   lp_build_format_swizzle_soa(bld->format_desc,
+                               bld->texel_type, unswizzled,
+                               texel);
+}
+
+
 static void
 lp_build_sample_compare(struct lp_build_sample_context *bld,
                         LLVMValueRef p,
@@ -402,7 +578,10 @@ lp_build_sample_soa(LLVMBuilderRef builder,
       break;
    case PIPE_TEX_FILTER_LINEAR:
    case PIPE_TEX_FILTER_ANISO:
-      lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
+      if(lp_format_is_rgba8(bld.format_desc))
+         lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel);
+      else
+         lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
       break;
    default:
       assert(0);