gallivm: Do 4ubyte AoS texture filtering for any format that can be expressed.
authorJosé Fonseca <jfonseca@vmware.com>
Fri, 2 Jul 2010 17:42:49 +0000 (18:42 +0100)
committerJosé Fonseca <jfonseca@vmware.com>
Fri, 2 Jul 2010 17:45:49 +0000 (18:45 +0100)
Except if it has only one channel, as it would take the same number of
instructions.

src/gallium/auxiliary/gallivm/lp_bld_sample.c
src/gallium/auxiliary/gallivm/lp_bld_sample.h
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

index 6e556f9d4e6845838655174b2a6334968b1d34f1..800d121113606f881fe638425f335e879eb39468 100644 (file)
@@ -127,20 +127,51 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
 /**
  * Compute the offset of a pixel block.
  *
- * x, y, z, y_stride, z_stride are vectors, and they refer to pixel blocks, as
- * per format description, and not individual pixels.
+ * x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
+ *
+ * Returns the relative offset and i,j sub-block coordinates
  */
-LLVMValueRef
+void
 lp_build_sample_offset(struct lp_build_context *bld,
                        const struct util_format_description *format_desc,
                        LLVMValueRef x,
                        LLVMValueRef y,
                        LLVMValueRef z,
                        LLVMValueRef y_stride,
-                       LLVMValueRef z_stride)
+                       LLVMValueRef z_stride,
+                       LLVMValueRef *out_offset,
+                       LLVMValueRef *out_i,
+                       LLVMValueRef *out_j)
 {
    LLVMValueRef x_stride;
    LLVMValueRef offset;
+   LLVMValueRef i;
+   LLVMValueRef j;
+
+   /*
+    * Describe the coordinates in terms of pixel blocks.
+    *
+    * TODO: pixel blocks are power of two. LLVM should convert rem/div to
+    * bit arithmetic. Verify this.
+    */
+
+   if (format_desc->block.width == 1) {
+      i = bld->zero;
+   }
+   else {
+      LLVMValueRef block_width = lp_build_const_int_vec(bld->type, format_desc->block.width);
+      i = LLVMBuildURem(bld->builder, x, block_width, "");
+      x = LLVMBuildUDiv(bld->builder, x, block_width, "");
+   }
+
+   if (format_desc->block.height == 1) {
+      j = bld->zero;
+   }
+   else {
+      LLVMValueRef block_height = lp_build_const_int_vec(bld->type, format_desc->block.height);
+      j = LLVMBuildURem(bld->builder, y, block_height, "");
+      y = LLVMBuildUDiv(bld->builder, y, block_height, "");
+   }
 
    x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8);
    offset = lp_build_mul(bld, x, x_stride);
@@ -155,5 +186,7 @@ lp_build_sample_offset(struct lp_build_context *bld,
       offset = lp_build_add(bld, offset, z_offset);
    }
 
-   return offset;
+   *out_offset = offset;
+   *out_i = i;
+   *out_j = j;
 }
index 64eb15a680a26074bce9789a9e8b41ed74d8d829..5b8f478094b1aa95564c8894396e803d64de53b1 100644 (file)
@@ -146,14 +146,17 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
                         const struct pipe_sampler_state *sampler);
 
 
-LLVMValueRef
+void
 lp_build_sample_offset(struct lp_build_context *bld,
                        const struct util_format_description *format_desc,
                        LLVMValueRef x,
                        LLVMValueRef y,
                        LLVMValueRef z,
                        LLVMValueRef y_stride,
-                       LLVMValueRef z_stride);
+                       LLVMValueRef z_stride,
+                       LLVMValueRef *out_offset,
+                       LLVMValueRef *out_i,
+                       LLVMValueRef *out_j);
 
 
 void
index 8cca3f639a5649e78864b292e702af8cc459fd9a..b8c1a7234bc0689feaf8cc5a40be7ece089f9d33 100644 (file)
@@ -265,35 +265,11 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
       }
    }
 
-   /*
-    * Describe the coordinates in terms of pixel blocks.
-    *
-    * TODO: pixel blocks are power of two. LLVM should convert rem/div to
-    * bit arithmetic. Verify this.
-    */
-
-   if (bld->format_desc->block.width == 1) {
-      i = bld->uint_coord_bld.zero;
-   }
-   else {
-      LLVMValueRef block_width = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.width);
-      i = LLVMBuildURem(bld->builder, x, block_width, "");
-      x = LLVMBuildUDiv(bld->builder, x, block_width, "");
-   }
-
-   if (bld->format_desc->block.height == 1) {
-      j = bld->uint_coord_bld.zero;
-   }
-   else {
-      LLVMValueRef block_height = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.height);
-      j = LLVMBuildURem(bld->builder, y, block_height, "");
-      y = LLVMBuildUDiv(bld->builder, y, block_height, "");
-   }
-
    /* convert x,y,z coords to linear offset from start of texture, in bytes */
-   offset = lp_build_sample_offset(&bld->uint_coord_bld,
-                                   bld->format_desc,
-                                   x, y, z, y_stride, z_stride);
+   lp_build_sample_offset(&bld->uint_coord_bld,
+                          bld->format_desc,
+                          x, y, z, y_stride, z_stride,
+                          &offset, &i, &j);
 
    if (use_border) {
       /* If we can sample the border color, it means that texcoords may
@@ -345,6 +321,9 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
 }
 
 
+/**
+ * Fetch the texels as <4n x i8> in AoS form.
+ */
 static LLVMValueRef
 lp_build_sample_packed(struct lp_build_sample_context *bld,
                        LLVMValueRef x,
@@ -352,25 +331,46 @@ lp_build_sample_packed(struct lp_build_sample_context *bld,
                        LLVMValueRef y_stride,
                        LLVMValueRef data_array)
 {
-   LLVMValueRef offset;
+   LLVMValueRef offset, i, j;
    LLVMValueRef data_ptr;
+   LLVMValueRef res;
 
-   offset = lp_build_sample_offset(&bld->uint_coord_bld,
-                                   bld->format_desc,
-                                   x, y, NULL, y_stride, NULL);
-
-   assert(bld->format_desc->block.width == 1);
-   assert(bld->format_desc->block.height == 1);
-   assert(bld->format_desc->block.bits <= bld->texel_type.width);
+   /* convert x,y,z coords to linear offset from start of texture, in bytes */
+   lp_build_sample_offset(&bld->uint_coord_bld,
+                          bld->format_desc,
+                          x, y, NULL, y_stride, NULL,
+                          &offset, &i, &j);
 
    /* get pointer to mipmap level 0 data */
    data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
 
-   return lp_build_gather(bld->builder,
-                          bld->texel_type.length,
-                          bld->format_desc->block.bits,
-                          bld->texel_type.width,
-                          data_ptr, offset);
+   if (util_format_is_rgba8_variant(bld->format_desc)) {
+      /* Just fetch the data directly without swizzling */
+      assert(bld->format_desc->block.width == 1);
+      assert(bld->format_desc->block.height == 1);
+      assert(bld->format_desc->block.bits <= bld->texel_type.width);
+
+      res = lp_build_gather(bld->builder,
+                            bld->texel_type.length,
+                            bld->format_desc->block.bits,
+                            bld->texel_type.width,
+                            data_ptr, offset);
+   }
+   else {
+      struct lp_type type;
+
+      assert(bld->texel_type.width == 32);
+
+      memset(&type, 0, sizeof type);
+      type.width = 8;
+      type.length = bld->texel_type.length*4;
+      type.norm = TRUE;
+
+      res = lp_build_fetch_rgba_aos(bld->builder, bld->format_desc, type,
+                                    data_ptr, offset, i, j);
+   }
+
+   return res;
 }
 
 
@@ -1910,9 +1910,16 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
                              bld->texel_type,
                              packed, unswizzled);
 
-   lp_build_format_swizzle_soa(bld->format_desc,
-                               &bld->texel_bld,
-                               unswizzled, texel_out);
+   if (util_format_is_rgba8_variant(bld->format_desc)) {
+      lp_build_format_swizzle_soa(bld->format_desc,
+                                  &bld->texel_bld,
+                                  unswizzled, texel_out);
+   } else {
+      texel_out[0] = unswizzled[0];
+      texel_out[1] = unswizzled[1];
+      texel_out[2] = unswizzled[2];
+      texel_out[3] = unswizzled[3];
+   }
 
    apply_sampler_swizzle(bld, texel_out);
 }
@@ -2048,7 +2055,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
       /* For debug: no-op texture sampling */
       lp_build_sample_nop(&bld, texel_out);
    }
-   else if (util_format_is_rgba8_variant(bld.format_desc) &&
+   else if (util_format_fits_8unorm(bld.format_desc) &&
+            bld.format_desc->nr_channels > 1 &&
             static_state->target == PIPE_TEXTURE_2D &&
             static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
             static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&