Merge remote-tracking branch 'origin/master' into pipe-video
[mesa.git] / src / gallium / auxiliary / util / u_format_s3tc.c
index d1f45d751fa8388157368d33827ac46b76658d7f..bb989c29d81869c6ea4fdff7205d5c7c97ed6e14 100644 (file)
@@ -30,6 +30,8 @@
 
 #if defined(_WIN32) || defined(WIN32)
 #define DXTN_LIBNAME "dxtn.dll"
+#elif defined(__APPLE__)
+#define DXTN_LIBNAME "libtxc_dxtn.dylib"
 #else
 #define DXTN_LIBNAME "libtxc_dxtn.so"
 #endif
@@ -118,7 +120,7 @@ util_format_s3tc_init(void)
    library = util_dl_open(DXTN_LIBNAME);
    if (!library) {
       debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn "
-         "compression/decompression unavailable");
+         "compression/decompression unavailable\n");
       return;
    }
 
@@ -140,7 +142,7 @@ util_format_s3tc_init(void)
        !util_format_dxtn_pack) {
       debug_printf("couldn't reference all symbols in " DXTN_LIBNAME
                    ", software DXTn compression/decompression "
-                   "unavailable");
+                   "unavailable\n");
       util_dl_close(library);
       return;
    }
@@ -159,31 +161,31 @@ util_format_s3tc_init(void)
  */
 
 void
-util_format_dxt1_rgb_fetch_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+util_format_dxt1_rgb_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
 {
    util_format_dxt1_rgb_fetch(0, src, i, j, dst);
 }
 
 void
-util_format_dxt1_rgba_fetch_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+util_format_dxt1_rgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
 {
    util_format_dxt1_rgba_fetch(0, src, i, j, dst);
 }
 
 void
-util_format_dxt3_rgba_fetch_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+util_format_dxt3_rgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
 {
    util_format_dxt3_rgba_fetch(0, src, i, j, dst);
 }
 
 void
-util_format_dxt5_rgba_fetch_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+util_format_dxt5_rgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
 {
    util_format_dxt5_rgba_fetch(0, src, i, j, dst);
 }
 
 void
-util_format_dxt1_rgb_fetch_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+util_format_dxt1_rgb_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
 {
    uint8_t tmp[4];
    util_format_dxt1_rgb_fetch(0, src, i, j, tmp);
@@ -194,7 +196,7 @@ util_format_dxt1_rgb_fetch_float(float *dst, const uint8_t *src, unsigned i, uns
 }
 
 void
-util_format_dxt1_rgba_fetch_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+util_format_dxt1_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
 {
    uint8_t tmp[4];
    util_format_dxt1_rgba_fetch(0, src, i, j, tmp);
@@ -205,7 +207,7 @@ util_format_dxt1_rgba_fetch_float(float *dst, const uint8_t *src, unsigned i, un
 }
 
 void
-util_format_dxt3_rgba_fetch_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+util_format_dxt3_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
 {
    uint8_t tmp[4];
    util_format_dxt3_rgba_fetch(0, src, i, j, tmp);
@@ -216,7 +218,7 @@ util_format_dxt3_rgba_fetch_float(float *dst, const uint8_t *src, unsigned i, un
 }
 
 void
-util_format_dxt5_rgba_fetch_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+util_format_dxt5_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
 {
    uint8_t tmp[4];
    util_format_dxt5_rgba_fetch(0, src, i, j, tmp);
@@ -231,108 +233,80 @@ util_format_dxt5_rgba_fetch_float(float *dst, const uint8_t *src, unsigned i, un
  * Block decompression.
  */
 
-void
-util_format_dxt1_rgb_unpack_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+static INLINE void
+util_format_dxtn_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+                                        const uint8_t *src_row, unsigned src_stride,
+                                        unsigned width, unsigned height,
+                                        util_format_dxtn_fetch_t fetch,
+                                        unsigned block_size)
 {
+   const unsigned bw = 4, bh = 4, comps = 4;
    unsigned x, y, i, j;
-   for(y = 0; y < height; y += 4) {
+   for(y = 0; y < height; y += bh) {
       const uint8_t *src = src_row;
-      for(x = 0; x < width; x += 4) {
-         for(j = 0; j < 4; ++j) {
-            for(i = 0; i < 4; ++i) {
-               uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
-               util_format_dxt1_rgb_fetch(0, src, i, j, dst);
+      for(x = 0; x < width; x += bw) {
+         for(j = 0; j < bh; ++j) {
+            for(i = 0; i < bw; ++i) {
+               uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*comps;
+               fetch(0, src, i, j, dst);
             }
          }
-         src += 8;
+         src += block_size;
       }
       src_row += src_stride;
    }
 }
 
 void
-util_format_dxt1_rgba_unpack_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+                                        const uint8_t *src_row, unsigned src_stride,
+                                        unsigned width, unsigned height)
 {
-   unsigned x, y, i, j;
-   for(y = 0; y < height; y += 4) {
-      const uint8_t *src = src_row;
-      for(x = 0; x < width; x += 4) {
-         for(j = 0; j < 4; ++j) {
-            for(i = 0; i < 4; ++i) {
-               uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
-               util_format_dxt1_rgba_fetch(0, src, i, j, dst);
-            }
-         }
-         src += 8;
-      }
-      src_row += src_stride;
-   }
+   util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride,
+                                           src_row, src_stride,
+                                           width, height,
+                                           util_format_dxt1_rgb_fetch, 8);
 }
 
 void
-util_format_dxt3_rgba_unpack_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+                                         const uint8_t *src_row, unsigned src_stride,
+                                         unsigned width, unsigned height)
 {
-   unsigned x, y, i, j;
-   for(y = 0; y < height; y += 4) {
-      const uint8_t *src = src_row;
-      for(x = 0; x < width; x += 4) {
-         for(j = 0; j < 4; ++j) {
-            for(i = 0; i < 4; ++i) {
-               uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
-               util_format_dxt3_rgba_fetch(0, src, i, j, dst);
-            }
-         }
-         src += 16;
-      }
-      src_row += src_stride;
-   }
+   util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride,
+                                           src_row, src_stride,
+                                           width, height,
+                                           util_format_dxt1_rgba_fetch, 8);
 }
 
 void
-util_format_dxt5_rgba_unpack_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt3_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+                                         const uint8_t *src_row, unsigned src_stride,
+                                         unsigned width, unsigned height)
 {
-   unsigned x, y, i, j;
-   for(y = 0; y < height; y += 4) {
-      const uint8_t *src = src_row;
-      for(x = 0; x < width; x += 4) {
-         for(j = 0; j < 4; ++j) {
-            for(i = 0; i < 4; ++i) {
-               uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
-               util_format_dxt5_rgba_fetch(0, src, i, j, dst);
-            }
-         }
-         src += 16;
-      }
-      src_row += src_stride;
-   }
+   util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride,
+                                           src_row, src_stride,
+                                           width, height,
+                                           util_format_dxt3_rgba_fetch, 16);
 }
 
 void
-util_format_dxt1_rgb_unpack_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt5_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+                                         const uint8_t *src_row, unsigned src_stride,
+                                         unsigned width, unsigned height)
 {
-   unsigned x, y, i, j;
-   for(y = 0; y < height; y += 4) {
-      const uint8_t *src = src_row;
-      for(x = 0; x < width; x += 4) {
-         for(j = 0; j < 4; ++j) {
-            for(i = 0; i < 4; ++i) {
-               float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
-               uint8_t tmp[4];
-               util_format_dxt1_rgb_fetch(0, src, i, j, tmp);
-               dst[0] = ubyte_to_float(tmp[0]);
-               dst[1] = ubyte_to_float(tmp[1]);
-               dst[2] = ubyte_to_float(tmp[2]);
-               dst[3] = 1.0;
-            }
-         }
-         src += 8;
-      }
-      src_row += src_stride;
-   }
+   util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride,
+                                           src_row, src_stride,
+                                           width, height,
+                                           util_format_dxt5_rgba_fetch, 16);
 }
 
-void
-util_format_dxt1_rgba_unpack_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+static INLINE void
+util_format_dxtn_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+                                       const uint8_t *src_row, unsigned src_stride,
+                                       unsigned width, unsigned height,
+                                       util_format_dxtn_fetch_t fetch,
+                                       unsigned block_size)
 {
    unsigned x, y, i, j;
    for(y = 0; y < height; y += 4) {
@@ -342,65 +316,61 @@ util_format_dxt1_rgba_unpack_float(float *dst_row, unsigned dst_stride, const ui
             for(i = 0; i < 4; ++i) {
                float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
                uint8_t tmp[4];
-               util_format_dxt1_rgba_fetch(0, src, i, j, tmp);
+               fetch(0, src, i, j, tmp);
                dst[0] = ubyte_to_float(tmp[0]);
                dst[1] = ubyte_to_float(tmp[1]);
                dst[2] = ubyte_to_float(tmp[2]);
                dst[3] = ubyte_to_float(tmp[3]);
             }
          }
-         src += 8;
+         src += block_size;
       }
       src_row += src_stride;
    }
 }
 
 void
-util_format_dxt3_rgba_unpack_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+                                       const uint8_t *src_row, unsigned src_stride,
+                                       unsigned width, unsigned height)
 {
-   unsigned x, y, i, j;
-   for(y = 0; y < height; y += 4) {
-      const uint8_t *src = src_row;
-      for(x = 0; x < width; x += 4) {
-         for(j = 0; j < 4; ++j) {
-            for(i = 0; i < 4; ++i) {
-               float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
-               uint8_t tmp[4];
-               util_format_dxt3_rgba_fetch(0, src, i, j, tmp);
-               dst[0] = ubyte_to_float(tmp[0]);
-               dst[1] = ubyte_to_float(tmp[1]);
-               dst[2] = ubyte_to_float(tmp[2]);
-               dst[3] = ubyte_to_float(tmp[3]);
-            }
-         }
-         src += 16;
-      }
-      src_row += src_stride;
-   }
+   util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride,
+                                          src_row, src_stride,
+                                          width, height,
+                                          util_format_dxt1_rgb_fetch, 8);
 }
 
 void
-util_format_dxt5_rgba_unpack_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+                                        const uint8_t *src_row, unsigned src_stride,
+                                        unsigned width, unsigned height)
 {
-   unsigned x, y, i, j;
-   for(y = 0; y < height; y += 4) {
-      const uint8_t *src = src_row;
-      for(x = 0; x < width; x += 4) {
-         for(j = 0; j < 4; ++j) {
-            for(i = 0; i < 4; ++i) {
-               float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4;
-               uint8_t tmp[4];
-               util_format_dxt5_rgba_fetch(0, src, i, j, tmp);
-               dst[0] = ubyte_to_float(tmp[0]);
-               dst[1] = ubyte_to_float(tmp[1]);
-               dst[2] = ubyte_to_float(tmp[2]);
-               dst[3] = ubyte_to_float(tmp[3]);
-            }
-         }
-         src += 16;
-      }
-      src_row += src_stride;
-   }
+   util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride,
+                                          src_row, src_stride,
+                                          width, height,
+                                          util_format_dxt1_rgba_fetch, 8);
+}
+
+void
+util_format_dxt3_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+                                        const uint8_t *src_row, unsigned src_stride,
+                                        unsigned width, unsigned height)
+{
+   util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride,
+                                          src_row, src_stride,
+                                          width, height,
+                                          util_format_dxt3_rgba_fetch, 16);
+}
+
+void
+util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride,
+                                        const uint8_t *src_row, unsigned src_stride,
+                                        unsigned width, unsigned height)
+{
+   util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride,
+                                          src_row, src_stride,
+                                          width, height,
+                                          util_format_dxt5_rgba_fetch, 16);
 }
 
 
@@ -409,201 +379,198 @@ util_format_dxt5_rgba_unpack_float(float *dst_row, unsigned dst_stride, const ui
  */
 
 void
-util_format_dxt1_rgb_pack_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_rgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+                                      const uint8_t *src, unsigned src_stride,
+                                      unsigned width, unsigned height)
 {
+   const unsigned bw = 4, bh = 4, bytes_per_block = 8;
    unsigned x, y, i, j, k;
-   for(y = 0; y < height; y += 4) {
-      const uint8_t *src = src_row;
+   for(y = 0; y < height; y += bh) {
       uint8_t *dst = dst_row;
-      for(x = 0; x < width; x += 4) {
-         uint8_t tmp[4][4][3];
-         for(j = 0; j < 4; ++j) {
-            for(i = 0; i < 4; ++i) {
+      for(x = 0; x < width; x += bw) {
+         uint8_t tmp[4][4][3];  /* [bh][bw][comps] */
+         for(j = 0; j < bh; ++j) {
+            for(i = 0; i < bw; ++i) {
                for(k = 0; k < 3; ++k) {
-                  tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k];
+                  tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*4 + k];
                }
             }
          }
-         util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, dst_stride);
-         src += 4*4;
-         dst += 8;
+         util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, 0);
+         dst += bytes_per_block;
       }
-      src_row += src_stride;
-      dst_row += 4*dst_stride/sizeof(*dst_row);
+      dst_row += dst_stride / sizeof(*dst_row);
    }
 }
 
 void
-util_format_dxt1_rgba_pack_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+                                       const uint8_t *src, unsigned src_stride,
+                                       unsigned width, unsigned height)
 {
+   const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 8;
    unsigned x, y, i, j, k;
-   for(y = 0; y < height; y += 4) {
-      const uint8_t *src = src_row;
+   for(y = 0; y < height; y += bh) {
       uint8_t *dst = dst_row;
-      for(x = 0; x < width; x += 4) {
-         uint8_t tmp[4][4][4];
-         for(j = 0; j < 4; ++j) {
-            for(i = 0; i < 4; ++i) {
-               for(k = 0; k < 4; ++k) {
-                  tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k];
+      for(x = 0; x < width; x += bw) {
+         uint8_t tmp[4][4][4];  /* [bh][bw][comps] */
+         for(j = 0; j < bh; ++j) {
+            for(i = 0; i < bw; ++i) {
+               for(k = 0; k < comps; ++k) {
+                  tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k];
                }
             }
          }
-         util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, dst_stride);
-         src += 4*4;
-         dst += 8;
+         util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, 0);
+         dst += bytes_per_block;
       }
-      src_row += src_stride;
-      dst_row += 4*dst_stride/sizeof(*dst_row);
+      dst_row += dst_stride / sizeof(*dst_row);
    }
 }
 
 void
-util_format_dxt3_rgba_pack_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt3_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+                                       const uint8_t *src, unsigned src_stride,
+                                       unsigned width, unsigned height)
 {
+   const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 16;
    unsigned x, y, i, j, k;
-   for(y = 0; y < height; y += 4) {
-      const uint8_t *src = src_row;
+   for(y = 0; y < height; y += bh) {
       uint8_t *dst = dst_row;
-      for(x = 0; x < width; x += 4) {
-         uint8_t tmp[4][4][4];
-         for(j = 0; j < 4; ++j) {
-            for(i = 0; i < 4; ++i) {
-               for(k = 0; k < 4; ++k) {
-                  tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k];
+      for(x = 0; x < width; x += bw) {
+         uint8_t tmp[4][4][4];  /* [bh][bw][comps] */
+         for(j = 0; j < bh; ++j) {
+            for(i = 0; i < bw; ++i) {
+               for(k = 0; k < comps; ++k) {
+                  tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k];
                }
             }
          }
-         util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, dst_stride);
-         src += 4*4;
-         dst += 16;
+         util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, 0);
+         dst += bytes_per_block;
       }
-      src_row += src_stride;
-      dst_row += 4*dst_stride/sizeof(*dst_row);
+      dst_row += dst_stride / sizeof(*dst_row);
    }
 }
 
 void
-util_format_dxt5_rgba_pack_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt5_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
+                                       const uint8_t *src, unsigned src_stride,
+                                       unsigned width, unsigned height)
 {
+   const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 16;
    unsigned x, y, i, j, k;
-   for(y = 0; y < height; y += 4) {
-      const uint8_t *src = src_row;
+
+   for(y = 0; y < height; y += bh) {
       uint8_t *dst = dst_row;
-      for(x = 0; x < width; x += 4) {
-         uint8_t tmp[4][4][4];
-         for(j = 0; j < 4; ++j) {
-            for(i = 0; i < 4; ++i) {
-               for(k = 0; k < 4; ++k) {
-                  tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k];
+      for(x = 0; x < width; x += bw) {
+         uint8_t tmp[4][4][4];  /* [bh][bw][comps] */
+         for(j = 0; j < bh; ++j) {
+            for(i = 0; i < bw; ++i) {
+               for(k = 0; k < comps; ++k) {
+                  tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k];
                }
             }
          }
-         util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, dst_stride);
-         src += 4*4;
-         dst += 16;
+         util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, 0);
+         dst += bytes_per_block;
       }
-      src_row += src_stride;
-      dst_row += 4*dst_stride/sizeof(*dst_row);
+      dst_row += dst_stride / sizeof(*dst_row);
    }
 }
 
 void
-util_format_dxt1_rgb_pack_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_rgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+                                     const float *src, unsigned src_stride,
+                                     unsigned width, unsigned height)
 {
    unsigned x, y, i, j, k;
    for(y = 0; y < height; y += 4) {
-      const float *src = src_row;
       uint8_t *dst = dst_row;
       for(x = 0; x < width; x += 4) {
          uint8_t tmp[4][4][3];
          for(j = 0; j < 4; ++j) {
             for(i = 0; i < 4; ++i) {
                for(k = 0; k < 3; ++k) {
-                  tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]);
+                  tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]);
                }
             }
          }
-         util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, dst_stride);
-         src += 4*4;
+         util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, 0);
          dst += 8;
       }
-      src_row += src_stride;
       dst_row += 4*dst_stride/sizeof(*dst_row);
    }
 }
 
 void
-util_format_dxt1_rgba_pack_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+                                      const float *src, unsigned src_stride,
+                                      unsigned width, unsigned height)
 {
    unsigned x, y, i, j, k;
    for(y = 0; y < height; y += 4) {
-      const float *src = src_row;
       uint8_t *dst = dst_row;
       for(x = 0; x < width; x += 4) {
          uint8_t tmp[4][4][4];
          for(j = 0; j < 4; ++j) {
             for(i = 0; i < 4; ++i) {
                for(k = 0; k < 4; ++k) {
-                  tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]);
+                  tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]);
                }
             }
          }
-         util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, dst_stride);
-         src += 4*4;
+         util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, 0);
          dst += 8;
       }
-      src_row += src_stride;
       dst_row += 4*dst_stride/sizeof(*dst_row);
    }
 }
 
 void
-util_format_dxt3_rgba_pack_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt3_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+                                      const float *src, unsigned src_stride,
+                                      unsigned width, unsigned height)
 {
    unsigned x, y, i, j, k;
    for(y = 0; y < height; y += 4) {
-      const float *src = src_row;
       uint8_t *dst = dst_row;
       for(x = 0; x < width; x += 4) {
          uint8_t tmp[4][4][4];
          for(j = 0; j < 4; ++j) {
             for(i = 0; i < 4; ++i) {
                for(k = 0; k < 4; ++k) {
-                  tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]);
+                  tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]);
                }
             }
          }
-         util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, dst_stride);
-         src += 4*4;
+         util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, 0);
          dst += 16;
       }
-      src_row += src_stride;
       dst_row += 4*dst_stride/sizeof(*dst_row);
    }
 }
 
 void
-util_format_dxt5_rgba_pack_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt5_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
+                                      const float *src, unsigned src_stride,
+                                      unsigned width, unsigned height)
 {
    unsigned x, y, i, j, k;
    for(y = 0; y < height; y += 4) {
-      const float *src = src_row;
       uint8_t *dst = dst_row;
       for(x = 0; x < width; x += 4) {
          uint8_t tmp[4][4][4];
          for(j = 0; j < 4; ++j) {
             for(i = 0; i < 4; ++i) {
                for(k = 0; k < 4; ++k) {
-                  tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]);
+                  tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]);
                }
             }
          }
-         util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, dst_stride);
-         src += 4*4;
+         util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, 0);
          dst += 16;
       }
-      src_row += src_stride;
       dst_row += 4*dst_stride/sizeof(*dst_row);
    }
 }
@@ -616,146 +583,146 @@ util_format_dxt5_rgba_pack_float(uint8_t *dst_row, unsigned dst_stride, const fl
  */
 
 void
-util_format_dxt1_srgb_unpack_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_srgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
-   util_format_dxt1_rgb_unpack_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+   util_format_dxt1_rgb_unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
 }
 
 void
-util_format_dxt1_srgb_pack_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_srgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
-   util_format_dxt1_rgb_pack_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+   util_format_dxt1_rgb_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
 }
 
 void
-util_format_dxt1_srgb_fetch_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+util_format_dxt1_srgb_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
 {
-   util_format_dxt1_rgb_fetch_8unorm(dst, src, i, j);
+   util_format_dxt1_rgb_fetch_rgba_8unorm(dst, src, i, j);
 }
 
 void
-util_format_dxt1_srgba_unpack_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_srgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
-   util_format_dxt1_rgba_unpack_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+   util_format_dxt1_rgba_unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
 }
 
 void
-util_format_dxt1_srgba_pack_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_srgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
-   util_format_dxt1_rgba_pack_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+   util_format_dxt1_rgba_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
 }
 
 void
-util_format_dxt1_srgba_fetch_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+util_format_dxt1_srgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
 {
-   util_format_dxt1_rgba_fetch_8unorm(dst, src, i, j);
+   util_format_dxt1_rgba_fetch_rgba_8unorm(dst, src, i, j);
 }
 
 void
-util_format_dxt3_srgba_unpack_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt3_srgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
-   util_format_dxt3_rgba_unpack_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+   util_format_dxt3_rgba_unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
 }
 
 void
-util_format_dxt3_srgba_pack_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt3_srgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
-   util_format_dxt3_rgba_pack_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+   util_format_dxt3_rgba_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
 }
 
 void
-util_format_dxt3_srgba_fetch_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+util_format_dxt3_srgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
 {
-   util_format_dxt3_rgba_fetch_8unorm(dst, src, i, j);
+   util_format_dxt3_rgba_fetch_rgba_8unorm(dst, src, i, j);
 }
 
 void
-util_format_dxt5_srgba_unpack_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt5_srgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
-   util_format_dxt5_rgba_unpack_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+   util_format_dxt5_rgba_unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
 }
 
 void
-util_format_dxt5_srgba_pack_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt5_srgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
-   util_format_dxt5_rgba_pack_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
+   util_format_dxt5_rgba_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height);
 }
 
 void
-util_format_dxt5_srgba_fetch_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+util_format_dxt5_srgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
 {
-   util_format_dxt5_rgba_fetch_8unorm(dst, src, i, j);
+   util_format_dxt5_rgba_fetch_rgba_8unorm(dst, src, i, j);
 }
 
 void
-util_format_dxt1_srgb_unpack_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_srgb_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
-   util_format_dxt1_rgb_unpack_float(dst_row, dst_stride, src_row, src_stride, width, height);
+   util_format_dxt1_rgb_unpack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height);
 }
 
 void
-util_format_dxt1_srgb_pack_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_srgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
-   util_format_dxt1_rgb_pack_float(dst_row, dst_stride, src_row, src_stride, width, height);
+   util_format_dxt1_rgb_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height);
 }
 
 void
-util_format_dxt1_srgb_fetch_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+util_format_dxt1_srgb_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
 {
-   util_format_dxt1_rgb_fetch_float(dst, src, i, j);
+   util_format_dxt1_rgb_fetch_rgba_float(dst, src, i, j);
 }
 
 void
-util_format_dxt1_srgba_unpack_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_srgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
-   util_format_dxt1_rgba_unpack_float(dst_row, dst_stride, src_row, src_stride, width, height);
+   util_format_dxt1_rgba_unpack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height);
 }
 
 void
-util_format_dxt1_srgba_pack_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt1_srgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
-   util_format_dxt1_rgba_pack_float(dst_row, dst_stride, src_row, src_stride, width, height);
+   util_format_dxt1_rgba_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height);
 }
 
 void
-util_format_dxt1_srgba_fetch_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+util_format_dxt1_srgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
 {
-   util_format_dxt1_rgba_fetch_float(dst, src, i, j);
+   util_format_dxt1_rgba_fetch_rgba_float(dst, src, i, j);
 }
 
 void
-util_format_dxt3_srgba_unpack_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt3_srgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
-   util_format_dxt3_rgba_unpack_float(dst_row, dst_stride, src_row, src_stride, width, height);
+   util_format_dxt3_rgba_unpack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height);
 }
 
 void
-util_format_dxt3_srgba_pack_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt3_srgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
-   util_format_dxt3_rgba_pack_float(dst_row, dst_stride, src_row, src_stride, width, height);
+   util_format_dxt3_rgba_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height);
 }
 
 void
-util_format_dxt3_srgba_fetch_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+util_format_dxt3_srgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
 {
-   util_format_dxt3_rgba_fetch_float(dst, src, i, j);
+   util_format_dxt3_rgba_fetch_rgba_float(dst, src, i, j);
 }
 
 void
-util_format_dxt5_srgba_unpack_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt5_srgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
-   util_format_dxt5_rgba_unpack_float(dst_row, dst_stride, src_row, src_stride, width, height);
+   util_format_dxt5_rgba_unpack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height);
 }
 
 void
-util_format_dxt5_srgba_pack_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+util_format_dxt5_srgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
 {
-   util_format_dxt5_rgba_pack_float(dst_row, dst_stride, src_row, src_stride, width, height);
+   util_format_dxt5_rgba_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height);
 }
 
 void
-util_format_dxt5_srgba_fetch_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+util_format_dxt5_srgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
 {
-   util_format_dxt5_rgba_fetch_float(dst, src, i, j);
+   util_format_dxt5_rgba_fetch_rgba_float(dst, src, i, j);
 }