i965/tiled_memcopy: Add aligned mem_copy parameters to the [de]tiling functions

author Jason Ekstrand <jason.ekstrand@intel.com>

Thu, 7 Apr 2016 17:52:28 +0000 (10:52 -0700)

committer Jason Ekstrand <jason.ekstrand@intel.com>

Fri, 8 Apr 2016 19:08:51 +0000 (12:08 -0700)
author Jason Ekstrand <jason.ekstrand@intel.com>
Thu, 7 Apr 2016 17:52:28 +0000 (10:52 -0700)
committer Jason Ekstrand <jason.ekstrand@intel.com>
Fri, 8 Apr 2016 19:08:51 +0000 (12:08 -0700)
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c

index 313545829640757d8f12c178ecd1d78703e38de8..0ffd3ffcab52e57f2d324a111b14c923ad66e6cd 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -172,6 +172,12 @@ typedef void (*tile_copy_fn)(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
   * Copy texture data from linear to X tile layout.
   *
   * \copydoc tile_copy_fn
+ *
+ * The mem_copy parameters allow the user to specify an alternative mem_copy
+ * function that, for instance, may do RGBA -> BGRA swizzling.  The first
+ * function must handle any memory alignment while the second function must
+ * only handle 16-byte alignment in whichever side (source or destination) is
+ * tiled.
   */
  static inline void
  linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
@@ -179,7 +185,8 @@ linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
                   char *dst, const char *src,
                   int32_t src_pitch,
                   uint32_t swizzle_bit,
-                 mem_copy_fn mem_copy)
+                 mem_copy_fn mem_copy,
+                 mem_copy_fn mem_copy_align16)
  {
     /* The copy destination offset for each range copied is the sum of
      * an X offset 'x0' or 'xo' and a Y offset 'yo.'
@@ -200,10 +207,10 @@ linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
        mem_copy(dst + ((x0 + yo) ^ swizzle), src + x0, x1 - x0);
  
        for (xo = x1; xo < x2; xo += xtile_span) {
-         mem_copy(dst + ((xo + yo) ^ swizzle), src + xo, xtile_span);
+         mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + xo, xtile_span);
        }
  
-      mem_copy(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
+      mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
  
        src += src_pitch;
     }
@@ -220,7 +227,8 @@ linear_to_ytiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
                   char *dst, const char *src,
                   int32_t src_pitch,
                   uint32_t swizzle_bit,
-                 mem_copy_fn mem_copy)
+                 mem_copy_fn mem_copy,
+                 mem_copy_fn mem_copy_align16)
  {
     /* Y tiles consist of columns that are 'ytile_span' wide (and the same height
      * as the tile).  Thus the destination offset for (x,y) is the sum of:
@@ -259,12 +267,12 @@ linear_to_ytiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
         * at each step so we don't need to calculate it explicitly.
         */
        for (x = x1; x < x2; x += ytile_span) {
-         mem_copy(dst + ((xo + yo) ^ swizzle), src + x, ytile_span);
+         mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x, ytile_span);
           xo += bytes_per_column;
           swizzle ^= swizzle_bit;
        }
  
-      mem_copy(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
+      mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
  
        src += src_pitch;
     }
@@ -281,7 +289,8 @@ xtiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
                   char *dst, const char *src,
                   int32_t dst_pitch,
                   uint32_t swizzle_bit,
-                 mem_copy_fn mem_copy)
+                 mem_copy_fn mem_copy,
+                 mem_copy_fn mem_copy_align16)
  {
     /* The copy destination offset for each range copied is the sum of
      * an X offset 'x0' or 'xo' and a Y offset 'yo.'
@@ -302,10 +311,10 @@ xtiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
        mem_copy(dst + x0, src + ((x0 + yo) ^ swizzle), x1 - x0);
  
        for (xo = x1; xo < x2; xo += xtile_span) {
-         mem_copy(dst + xo, src + ((xo + yo) ^ swizzle), xtile_span);
+         mem_copy_align16(dst + xo, src + ((xo + yo) ^ swizzle), xtile_span);
        }
  
-      mem_copy(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
+      mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
  
        dst += dst_pitch;
     }
@@ -322,7 +331,8 @@ ytiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
                   char *dst, const char *src,
                   int32_t dst_pitch,
                   uint32_t swizzle_bit,
-                 mem_copy_fn mem_copy)
+                 mem_copy_fn mem_copy,
+                 mem_copy_fn mem_copy_align16)
  {
     /* Y tiles consist of columns that are 'ytile_span' wide (and the same height
      * as the tile).  Thus the destination offset for (x,y) is the sum of:
@@ -361,12 +371,12 @@ ytiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
         * at each step so we don't need to calculate it explicitly.
         */
        for (x = x1; x < x2; x += ytile_span) {
-         mem_copy(dst + x, src + ((xo + yo) ^ swizzle), ytile_span);
+         mem_copy_align16(dst + x, src + ((xo + yo) ^ swizzle), ytile_span);
           xo += bytes_per_column;
           swizzle ^= swizzle_bit;
        }
  
-      mem_copy(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
+      mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
  
        dst += dst_pitch;
     }
@@ -393,26 +403,27 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
     if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) {
        if (mem_copy == memcpy)
           return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
-                                 dst, src, src_pitch, swizzle_bit, memcpy);
+                                 dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
        else if (mem_copy == rgba8_copy_aligned_dst)
           return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
                                   dst, src, src_pitch, swizzle_bit,
-                                 rgba8_copy_aligned_dst);
+                                 rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
        else
           unreachable("not reached");
     } else {
        if (mem_copy == memcpy)
           return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
-                                 dst, src, src_pitch, swizzle_bit, memcpy);
+                                 dst, src, src_pitch, swizzle_bit,
+                                 memcpy, memcpy);
        else if (mem_copy == rgba8_copy_aligned_dst)
           return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
                                   dst, src, src_pitch, swizzle_bit,
-                                 rgba8_copy_aligned_dst);
+                                 rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
        else
           unreachable("not reached");
     }
     linear_to_xtiled(x0, x1, x2, x3, y0, y1,
-                    dst, src, src_pitch, swizzle_bit, mem_copy);
+                    dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy);
  }
  
  /**
@@ -435,26 +446,26 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
     if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) {
        if (mem_copy == memcpy)
           return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
-                                 dst, src, src_pitch, swizzle_bit, memcpy);
+                                 dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
        else if (mem_copy == rgba8_copy_aligned_dst)
           return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
                                   dst, src, src_pitch, swizzle_bit,
-                                 rgba8_copy_aligned_dst);
+                                 rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
        else
           unreachable("not reached");
     } else {
        if (mem_copy == memcpy)
           return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
-                                 dst, src, src_pitch, swizzle_bit, memcpy);
+                                 dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
        else if (mem_copy == rgba8_copy_aligned_dst)
           return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
                                   dst, src, src_pitch, swizzle_bit,
-                                 rgba8_copy_aligned_dst);
+                                 rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
        else
           unreachable("not reached");
     }
     linear_to_ytiled(x0, x1, x2, x3, y0, y1,
-                    dst, src, src_pitch, swizzle_bit, mem_copy);
+                    dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy);
  }
  
  /**
@@ -477,26 +488,26 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
     if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) {
        if (mem_copy == memcpy)
           return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
-                                 dst, src, dst_pitch, swizzle_bit, memcpy);
+                                 dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
        else if (mem_copy == rgba8_copy_aligned_src)
           return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
                                   dst, src, dst_pitch, swizzle_bit,
-                                 rgba8_copy_aligned_src);
+                                 rgba8_copy_aligned_src, rgba8_copy_aligned_src);
        else
           unreachable("not reached");
     } else {
        if (mem_copy == memcpy)
           return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
-                                 dst, src, dst_pitch, swizzle_bit, memcpy);
+                                 dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
        else if (mem_copy == rgba8_copy_aligned_src)
           return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
                                   dst, src, dst_pitch, swizzle_bit,
-                                 rgba8_copy_aligned_src);
+                                 rgba8_copy_aligned_src, rgba8_copy_aligned_src);
        else
           unreachable("not reached");
     }
     xtiled_to_linear(x0, x1, x2, x3, y0, y1,
-                    dst, src, dst_pitch, swizzle_bit, mem_copy);
+                    dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy);
  }
  
  /**
@@ -519,26 +530,26 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
     if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) {
        if (mem_copy == memcpy)
           return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
-                                 dst, src, dst_pitch, swizzle_bit, memcpy);
+                                 dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
        else if (mem_copy == rgba8_copy_aligned_src)
           return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
                                   dst, src, dst_pitch, swizzle_bit,
-                                 rgba8_copy_aligned_src);
+                                 rgba8_copy_aligned_src, rgba8_copy_aligned_src);
        else
           unreachable("not reached");
     } else {
        if (mem_copy == memcpy)
           return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
-                                 dst, src, dst_pitch, swizzle_bit, memcpy);
+                                 dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
        else if (mem_copy == rgba8_copy_aligned_src)
           return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
                                   dst, src, dst_pitch, swizzle_bit,
-                                 rgba8_copy_aligned_src);
+                                 rgba8_copy_aligned_src, rgba8_copy_aligned_src);
        else
           unreachable("not reached");
     }
     ytiled_to_linear(x0, x1, x2, x3, y0, y1,
-                    dst, src, dst_pitch, swizzle_bit, mem_copy);
+                    dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy);
  }
  
  /**
author	Jason Ekstrand <jason.ekstrand@intel.com>
	Thu, 7 Apr 2016 17:52:28 +0000 (10:52 -0700)
committer	Jason Ekstrand <jason.ekstrand@intel.com>
	Fri, 8 Apr 2016 19:08:51 +0000 (12:08 -0700)