gallium/util: cache symbol lookup with libunwind
[mesa.git] / src / gallium / auxiliary / util / u_format_zs.c
index 68c88e520c98e9dba41fa23b3807579706ce80e3..69f2f2971f7a939f072afad482fed83850804fda 100644 (file)
 #include "u_format_zs.h"
 
 
+/*
+ * z32_unorm conversion functions
+ */
+
+static inline uint16_t
+z32_unorm_to_z16_unorm(uint32_t z)
+{
+   /* z * 0xffff / 0xffffffff */
+   return z >> 16;
+}
+
+static inline uint32_t
+z16_unorm_to_z32_unorm(uint16_t z)
+{
+   /* z * 0xffffffff / 0xffff */
+   return (z << 16) | z;
+}
+
+static inline uint32_t
+z32_unorm_to_z24_unorm(uint32_t z)
+{
+   /* z * 0xffffff / 0xffffffff */
+   return z >> 8;
+}
+
+static inline uint32_t
+z24_unorm_to_z32_unorm(uint32_t z)
+{
+   /* z * 0xffffffff / 0xffffff */
+   return (z << 8) | (z >> 16);
+}
+
+
+/*
+ * z32_float conversion functions
+ */
+
+static inline uint16_t
+z32_float_to_z16_unorm(float z)
+{
+   const float scale = 0xffff;
+   return (uint16_t)(z * scale + 0.5f);
+}
+
+static inline float
+z16_unorm_to_z32_float(uint16_t z)
+{
+   const float scale = 1.0 / 0xffff;
+   return (float)(z * scale);
+}
+
+static inline uint32_t
+z32_float_to_z24_unorm(float z)
+{
+   const double scale = 0xffffff;
+   return (uint32_t)(z * scale) & 0xffffff;
+}
+
+static inline float
+z24_unorm_to_z32_float(uint32_t z)
+{
+   const double scale = 1.0 / 0xffffff;
+   return (float)(z * scale);
+}
+
+static inline uint32_t
+z32_float_to_z32_unorm(float z)
+{
+   const double scale = 0xffffffff;
+   return (uint32_t)(z * scale);
+}
+
+static inline float
+z32_unorm_to_z32_float(uint32_t z)
+{
+   const double scale = 1.0 / 0xffffffff;
+   return (float)(z * scale);
+}
+
+
 void
-util_format_s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+util_format_s8_uint_unpack_s_8uint(uint8_t *dst_row, unsigned dst_stride,
                                          const uint8_t *src_row, unsigned src_stride,
                                          unsigned width, unsigned height)
 {
@@ -45,7 +125,7 @@ util_format_s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
 }
 
 void
-util_format_s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+util_format_s8_uint_pack_s_8uint(uint8_t *dst_row, unsigned dst_stride,
                                        const uint8_t *src_row, unsigned src_stride,
                                        unsigned width, unsigned height)
 {
@@ -67,12 +147,8 @@ util_format_z16_unorm_unpack_z_float(float *dst_row, unsigned dst_stride,
       float *dst = dst_row;
       const uint16_t *src = (const uint16_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint16_t value = *src++;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap16(value);
-#endif
-         dst[0] = (float)(value * (1.0f/0xffff));
-         dst += 1;
+         uint16_t value = util_cpu_to_le16(*src++);
+         *dst++ = z16_unorm_to_z32_float(value);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -90,12 +166,8 @@ util_format_z16_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride,
       uint16_t *dst = (uint16_t *)dst_row;
       for(x = 0; x < width; ++x) {
          uint16_t value;
-         value = (uint16_t)(*src * 0xffff);
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap16(value);
-#endif
-         *dst++ = value;
-         src += 1;
+         value = z32_float_to_z16_unorm(*src++);
+         *dst++ = util_le16_to_cpu(value);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -112,12 +184,8 @@ util_format_z16_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride,
       uint32_t *dst = dst_row;
       const uint16_t *src = (const uint16_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint16_t value = *src++;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap16(value);
-#endif
-         /* value * 0xffffffff / 0xffff */
-         *dst++ = (value << 16) | value;
+         uint16_t value = util_cpu_to_le16(*src++);
+         *dst++ = z16_unorm_to_z32_unorm(value);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -135,11 +203,8 @@ util_format_z16_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
       uint16_t *dst = (uint16_t *)dst_row;
       for(x = 0; x < width; ++x) {
          uint16_t value;
-         value = (uint16_t)(*src++ >> 16);
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap16(value);
-#endif
-         *dst++ = value;
+         value = z32_unorm_to_z16_unorm(*src++);
+         *dst++ = util_le16_to_cpu(value);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -156,11 +221,8 @@ util_format_z32_unorm_unpack_z_float(float *dst_row, unsigned dst_stride,
       float *dst = dst_row;
       const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = *src++;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         *dst++ = (float)(value * (1.0/0xffffffff));
+         uint32_t value = util_cpu_to_le32(*src++);
+         *dst++ = z32_unorm_to_z32_float(value);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -178,12 +240,8 @@ util_format_z32_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride,
       uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
          uint32_t value;
-         value = (uint32_t)(*src * (double)0xffffffff);
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         *dst++ = value;
-         ++src;
+         value = z32_float_to_z32_unorm(*src++);
+         *dst++ = util_le32_to_cpu(value);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -252,7 +310,7 @@ util_format_z32_float_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride,
       uint32_t *dst = dst_row;
       const float *src = (const float *)src_row;
       for(x = 0; x < width; ++x) {
-         *dst++ = (uint32_t)(*src++ * (double)0xffffffff);
+         *dst++ = z32_float_to_z32_unorm(*src++);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -269,7 +327,7 @@ util_format_z32_float_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
       const uint32_t *src = src_row;
       float *dst = (float *)dst_row;
       for(x = 0; x < width; ++x) {
-         *dst++ = (float)(*src++ * (1.0/0xffffffff));
+         *dst++ = z32_unorm_to_z32_float(*src++);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -277,7 +335,7 @@ util_format_z32_float_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
 }
 
 void
-util_format_z24_unorm_s8_uscaled_unpack_z_float(float *dst_row, unsigned dst_stride,
+util_format_z24_unorm_s8_uint_unpack_z_float(float *dst_row, unsigned dst_stride,
                                                 const uint8_t *src_row, unsigned src_stride,
                                                 unsigned width, unsigned height)
 {
@@ -286,13 +344,8 @@ util_format_z24_unorm_s8_uscaled_unpack_z_float(float *dst_row, unsigned dst_str
       float *dst = dst_row;
       const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = *src++;
-         uint32_t z;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         z = (value) & 0xffffff;
-         *dst++ = (float)(z * (1.0/0xffffff));
+         uint32_t value =  util_cpu_to_le32(*src++);
+         *dst++ = z24_unorm_to_z32_float(value & 0xffffff);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -300,7 +353,7 @@ util_format_z24_unorm_s8_uscaled_unpack_z_float(float *dst_row, unsigned dst_str
 }
 
 void
-util_format_z24_unorm_s8_uscaled_pack_z_float(uint8_t *dst_row, unsigned dst_stride,
+util_format_z24_unorm_s8_uint_pack_z_float(uint8_t *dst_row, unsigned dst_stride,
                                               const float *src_row, unsigned src_stride,
                                               unsigned width, unsigned height)
 {
@@ -309,16 +362,10 @@ util_format_z24_unorm_s8_uscaled_pack_z_float(uint8_t *dst_row, unsigned dst_str
       const float *src = src_row;
       uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = *dst;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
+         uint32_t value = util_le32_to_cpu(*dst);
          value &= 0xff000000;
-         value |= ((uint32_t)(*src++ * (double)0xffffff)) & 0xffffff;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         *dst++ = value;
+         value |= z32_float_to_z24_unorm(*src++);
+         *dst++ = util_cpu_to_le32(value);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -326,7 +373,7 @@ util_format_z24_unorm_s8_uscaled_pack_z_float(uint8_t *dst_row, unsigned dst_str
 }
 
 void
-util_format_z24_unorm_s8_uscaled_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride,
+util_format_z24_unorm_s8_uint_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride,
                                                   const uint8_t *src_row, unsigned src_stride,
                                                   unsigned width, unsigned height)
 {
@@ -335,13 +382,8 @@ util_format_z24_unorm_s8_uscaled_unpack_z_32unorm(uint32_t *dst_row, unsigned ds
       uint32_t *dst = dst_row;
       const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = *src++;
-         uint32_t z;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         z = value & 0xffffff;
-         *dst++ = (z << 8) | (z >> 16); /* z * 0xffffffff / 0xffffff */;
+         uint32_t value = util_cpu_to_le32(*src++);
+         *dst++ = z24_unorm_to_z32_unorm(value & 0xffffff);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -349,7 +391,7 @@ util_format_z24_unorm_s8_uscaled_unpack_z_32unorm(uint32_t *dst_row, unsigned ds
 }
 
 void
-util_format_z24_unorm_s8_uscaled_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
+util_format_z24_unorm_s8_uint_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
                                                 const uint32_t *src_row, unsigned src_stride,
                                                 unsigned width, unsigned height)
 {
@@ -358,15 +400,10 @@ util_format_z24_unorm_s8_uscaled_pack_z_32unorm(uint8_t *dst_row, unsigned dst_s
       const uint32_t *src = src_row;
       uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value;
-         value = ((uint32_t)(*src >> 8)) & 0xffffff;
-         value = ((uint32_t)(((uint64_t)src[1]) * 0x1 / 0xffffffff)) << 24;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         *dst++ = value;
-         src += 1;
-         dst += 4;
+         uint32_t value = util_le32_to_cpu(*dst);
+         value &= 0xff000000;
+         value |= z32_unorm_to_z24_unorm(*src++);
+         *dst++ = util_cpu_to_le32(value);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -374,26 +411,17 @@ util_format_z24_unorm_s8_uscaled_pack_z_32unorm(uint8_t *dst_row, unsigned dst_s
 }
 
 void
-util_format_z24_unorm_s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+util_format_z24_unorm_s8_uint_unpack_s_8uint(uint8_t *dst_row, unsigned dst_stride,
                                                    const uint8_t *src_row, unsigned src_stride,
                                                    unsigned width, unsigned height)
 {
    unsigned x, y;
    for(y = 0; y < height; ++y) {
       uint8_t *dst = dst_row;
-      const uint8_t *src = src_row;
+      const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = *(const uint32_t *)src;
-         uint32_t z;
-         uint32_t s;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         z = (value) & 0xffffff;
-         s = value >> 24;
-         dst[1] = s;
-         src += 4;
-         dst += 1;
+         uint32_t value = util_cpu_to_le32(*src++);
+         *dst++ = value >> 24;
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -401,24 +429,19 @@ util_format_z24_unorm_s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned ds
 }
 
 void
-util_format_z24_unorm_s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+util_format_z24_unorm_s8_uint_pack_s_8uint(uint8_t *dst_row, unsigned dst_stride,
                                                  const uint8_t *src_row, unsigned src_stride,
                                                  unsigned width, unsigned height)
 {
    unsigned x, y;
    for(y = 0; y < height; ++y) {
       const uint8_t *src = src_row;
-      uint8_t *dst = dst_row;
+      uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value;
-         value = ((uint32_t)(((uint32_t)MIN2(*src, 1)) * 0xffffff / 0x1)) & 0xffffff;
-         value = (src[1]) << 24;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         *(uint32_t *)dst = value;
-         src += 1;
-         dst += 4;
+         uint32_t value = util_le32_to_cpu(*dst);
+         value &= 0x00ffffff;
+         value |= *src++ << 24;
+         *dst++ = util_cpu_to_le32(value);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -426,26 +449,17 @@ util_format_z24_unorm_s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_
 }
 
 void
-util_format_s8_uscaled_z24_unorm_unpack_z_float(float *dst_row, unsigned dst_stride,
+util_format_s8_uint_z24_unorm_unpack_z_float(float *dst_row, unsigned dst_stride,
                                                 const uint8_t *src_row, unsigned src_stride,
                                                 unsigned width, unsigned height)
 {
    unsigned x, y;
    for(y = 0; y < height; ++y) {
       float *dst = dst_row;
-      const uint8_t *src = src_row;
+      const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = *(const uint32_t *)src;
-         uint32_t s;
-         uint32_t z;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         s = (value) & 0xff;
-         z = value >> 8;
-         dst[0] = (float)(z * (1.0/0xffffff));
-         src += 4;
-         dst += 1;
+         uint32_t value = util_cpu_to_le32(*src++);
+         *dst++ = z24_unorm_to_z32_float(value >> 8);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -453,24 +467,19 @@ util_format_s8_uscaled_z24_unorm_unpack_z_float(float *dst_row, unsigned dst_str
 }
 
 void
-util_format_s8_uscaled_z24_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride,
+util_format_s8_uint_z24_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride,
                                               const float *src_row, unsigned src_stride,
                                               unsigned width, unsigned height)
 {
    unsigned x, y;
    for(y = 0; y < height; ++y) {
       const float *src = src_row;
-      uint8_t *dst = dst_row;
+      uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value;
-         value = ((uint32_t)CLAMP(src[1], 0, 255)) & 0xff;
-         value = ((uint32_t)(*src * (double)0xffffff)) << 8;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         *(uint32_t *)dst = value;
-         src += 1;
-         dst += 4;
+         uint32_t value = util_le32_to_cpu(*dst);
+         value &= 0x000000ff;
+         value |= z32_float_to_z24_unorm(*src++) << 8;
+         *dst++ = util_cpu_to_le32(value);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -478,26 +487,17 @@ util_format_s8_uscaled_z24_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_str
 }
 
 void
-util_format_s8_uscaled_z24_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride,
+util_format_s8_uint_z24_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride,
                                                   const uint8_t *src_row, unsigned src_stride,
                                                   unsigned width, unsigned height)
 {
    unsigned x, y;
    for(y = 0; y < height; ++y) {
       uint32_t *dst = dst_row;
-      const uint8_t *src = src_row;
+      const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = *(const uint32_t *)src;
-         uint32_t s;
-         uint32_t z;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         s = (value) & 0xff;
-         z = value >> 8;
-         dst[0] = (uint32_t)(((uint64_t)z) * 0xffffffff / 0xffffff);
-         src += 4;
-         dst += 1;
+         uint32_t value = util_cpu_to_le32(*src++);
+         *dst++ = z24_unorm_to_z32_unorm(value >> 8);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -505,24 +505,19 @@ util_format_s8_uscaled_z24_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned ds
 }
 
 void
-util_format_s8_uscaled_z24_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
+util_format_s8_uint_z24_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
                                                 const uint32_t *src_row, unsigned src_stride,
                                                 unsigned width, unsigned height)
 {
    unsigned x, y;
    for(y = 0; y < height; ++y) {
       const uint32_t *src = src_row;
-      uint8_t *dst = dst_row;
+      uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value;
-         value = ((uint32_t)(((uint64_t)src[1]) * 0x1 / 0xffffffff)) & 0xff;
-         value = ((uint32_t)(*src >> 8)) << 8;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         *(uint32_t *)dst = value;
-         src += 1;
-         dst += 4;
+         uint32_t value = util_le32_to_cpu(*dst);
+         value &= 0x000000ff;
+         value |= *src++ & 0xffffff00;
+         *dst++ = util_cpu_to_le32(value);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -530,26 +525,17 @@ util_format_s8_uscaled_z24_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_s
 }
 
 void
-util_format_s8_uscaled_z24_unorm_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+util_format_s8_uint_z24_unorm_unpack_s_8uint(uint8_t *dst_row, unsigned dst_stride,
                                                    const uint8_t *src_row, unsigned src_stride,
                                                    unsigned width, unsigned height)
 {
    unsigned x, y;
    for(y = 0; y < height; ++y) {
       uint8_t *dst = dst_row;
-      const uint8_t *src = src_row;
+      const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = *(const uint32_t *)src;
-         uint32_t s;
-         uint32_t z;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         s = (value) & 0xff;
-         z = value >> 8;
-         dst[1] = s;
-         src += 4;
-         dst += 1;
+         uint32_t value = util_cpu_to_le32(*src++);
+         *dst++ = value & 0xff;
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -557,24 +543,19 @@ util_format_s8_uscaled_z24_unorm_unpack_s_8uscaled(uint8_t *dst_row, unsigned ds
 }
 
 void
-util_format_s8_uscaled_z24_unorm_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+util_format_s8_uint_z24_unorm_pack_s_8uint(uint8_t *dst_row, unsigned dst_stride,
                                                  const uint8_t *src_row, unsigned src_stride,
                                                  unsigned width, unsigned height)
 {
    unsigned x, y;
    for(y = 0; y < height; ++y) {
       const uint8_t *src = src_row;
-      uint8_t *dst = dst_row;
+      uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value;
-         value = (src[1]) & 0xff;
-         value = ((uint32_t)(((uint32_t)MIN2(*src, 1)) * 0xffffff / 0x1)) << 8;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         *(uint32_t *)dst = value;
-         src += 1;
-         dst += 4;
+         uint32_t value = util_le32_to_cpu(*dst);
+         value &= 0xffffff00;
+         value |= *src++;
+         *dst++ = util_cpu_to_le32(value);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -589,17 +570,10 @@ util_format_z24x8_unorm_unpack_z_float(float *dst_row, unsigned dst_stride,
    unsigned x, y;
    for(y = 0; y < height; ++y) {
       float *dst = dst_row;
-      const uint8_t *src = src_row;
+      const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = *(const uint32_t *)src;
-         uint32_t z;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         z = (value) & 0xffffff;
-         dst[0] = (float)(z * (1.0/0xffffff));
-         src += 4;
-         dst += 1;
+         uint32_t value = util_cpu_to_le32(*src++);
+         *dst++ = z24_unorm_to_z32_float(value & 0xffffff);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -614,16 +588,11 @@ util_format_z24x8_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride,
    unsigned x, y;
    for(y = 0; y < height; ++y) {
       const float *src = src_row;
-      uint8_t *dst = dst_row;
+      uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
          uint32_t value;
-         value = ((uint32_t)(*src * (double)0xffffff)) & 0xffffff;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         *(uint32_t *)dst = value;
-         src += 1;
-         dst += 4;
+         value = z32_float_to_z24_unorm(*src++);
+         *dst++ = util_le32_to_cpu(value);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -638,17 +607,10 @@ util_format_z24x8_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride,
    unsigned x, y;
    for(y = 0; y < height; ++y) {
       uint32_t *dst = dst_row;
-      const uint8_t *src = src_row;
+      const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = *(const uint32_t *)src;
-         uint32_t z;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         z = (value) & 0xffffff;
-         dst[0] = (uint32_t)(((uint64_t)z) * 0xffffffff / 0xffffff);
-         src += 4;
-         dst += 1;
+         uint32_t value = util_cpu_to_le32(*src++);
+         *dst++ = z24_unorm_to_z32_unorm(value & 0xffffff);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -663,16 +625,11 @@ util_format_z24x8_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
    unsigned x, y;
    for(y = 0; y < height; ++y) {
       const uint32_t *src = src_row;
-      uint8_t *dst = dst_row;
+      uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
          uint32_t value;
-         value = ((uint32_t)(*src >> 8)) & 0xffffff;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         *(uint32_t *)dst = value;
-         src += 1;
-         dst += 4;
+         value = z32_unorm_to_z24_unorm(*src++);
+         *dst++ = util_cpu_to_le32(value);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -687,17 +644,10 @@ util_format_x8z24_unorm_unpack_z_float(float *dst_row, unsigned dst_stride,
    unsigned x, y;
    for(y = 0; y < height; ++y) {
       float *dst = dst_row;
-      const uint8_t *src = src_row;
+      const uint32_t *src = (uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = *(const uint32_t *)src;
-         uint32_t z;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         z = value >> 8;
-         dst[0] = (float)(z * (1.0/0xffffff));
-         src += 4;
-         dst += 1;
+         uint32_t value = util_cpu_to_le32(*src++);
+         *dst++ = z24_unorm_to_z32_float(value >> 8);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -712,16 +662,11 @@ util_format_x8z24_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride,
    unsigned x, y;
    for(y = 0; y < height; ++y) {
       const float *src = src_row;
-      uint8_t *dst = dst_row;
+      uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
          uint32_t value;
-         value = ((uint32_t)(*src * (double)0xffffff)) << 8;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         *(uint32_t *)dst = value;
-         src += 1;
-         dst += 4;
+         value = z32_float_to_z24_unorm(*src++) << 8;
+         *dst++ = util_cpu_to_le32(value);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -736,17 +681,10 @@ util_format_x8z24_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride,
    unsigned x, y;
    for(y = 0; y < height; ++y) {
       uint32_t *dst = dst_row;
-      const uint8_t *src = src_row;
+      const uint32_t *src = (const uint32_t *)src_row;
       for(x = 0; x < width; ++x) {
-         uint32_t value = *(const uint32_t *)src;
-         uint32_t z;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         z = value >> 8;
-         dst[0] = (uint32_t)(((uint64_t)z) * 0xffffffff / 0xffffff);
-         src += 4;
-         dst += 1;
+         uint32_t value = util_cpu_to_le32(*src++);
+         *dst++ = z24_unorm_to_z32_unorm(value >> 8);
       }
       src_row += src_stride/sizeof(*src_row);
       dst_row += dst_stride/sizeof(*dst_row);
@@ -761,16 +699,11 @@ util_format_x8z24_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
    unsigned x, y;
    for(y = 0; y < height; ++y) {
       const uint32_t *src = src_row;
-      uint8_t *dst = dst_row;
+      uint32_t *dst = (uint32_t *)dst_row;
       for(x = 0; x < width; ++x) {
          uint32_t value;
-         value = ((uint32_t)(*src >> 8)) << 8;
-#ifdef PIPE_ARCH_BIG_ENDIAN
-         value = util_bswap32(value);
-#endif
-         *(uint32_t *)dst = value;
-         src += 1;
-         dst += 4;
+         value = z32_unorm_to_z24_unorm(*src++) << 8;
+         *dst++ = util_cpu_to_le32(value);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -778,7 +711,7 @@ util_format_x8z24_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
 }
 
 void
-util_format_z32_float_s8x24_uscaled_unpack_z_float(float *dst_row, unsigned dst_stride,
+util_format_z32_float_s8x24_uint_unpack_z_float(float *dst_row, unsigned dst_stride,
                                                    const uint8_t *src_row, unsigned src_stride,
                                                    unsigned width, unsigned height)
 {
@@ -797,7 +730,7 @@ util_format_z32_float_s8x24_uscaled_unpack_z_float(float *dst_row, unsigned dst_
 }
 
 void
-util_format_z32_float_s8x24_uscaled_pack_z_float(uint8_t *dst_row, unsigned dst_stride,
+util_format_z32_float_s8x24_uint_pack_z_float(uint8_t *dst_row, unsigned dst_stride,
                                                  const float *src_row, unsigned src_stride,
                                                  unsigned width, unsigned height)
 {
@@ -816,7 +749,7 @@ util_format_z32_float_s8x24_uscaled_pack_z_float(uint8_t *dst_row, unsigned dst_
 }
 
 void
-util_format_z32_float_s8x24_uscaled_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride,
+util_format_z32_float_s8x24_uint_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride,
                                                      const uint8_t *src_row, unsigned src_stride,
                                                      unsigned width, unsigned height)
 {
@@ -825,7 +758,7 @@ util_format_z32_float_s8x24_uscaled_unpack_z_32unorm(uint32_t *dst_row, unsigned
       uint32_t *dst = dst_row;
       const float *src = (const float *)src_row;
       for(x = 0; x < width; ++x) {
-         *dst = (uint32_t)(*src * (double)0xffffffff);
+         *dst = z32_float_to_z32_unorm(*src);
          src += 2;
          dst += 1;
       }
@@ -835,7 +768,7 @@ util_format_z32_float_s8x24_uscaled_unpack_z_32unorm(uint32_t *dst_row, unsigned
 }
 
 void
-util_format_z32_float_s8x24_uscaled_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
+util_format_z32_float_s8x24_uint_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride,
                                                    const uint32_t *src_row, unsigned src_stride,
                                                    unsigned width, unsigned height)
 {
@@ -844,9 +777,7 @@ util_format_z32_float_s8x24_uscaled_pack_z_32unorm(uint8_t *dst_row, unsigned ds
       const uint32_t *src = src_row;
       float *dst = (float *)dst_row;
       for(x = 0; x < width; ++x) {
-         *dst = (float)(*src * (1.0/0xffffffff));
-         src += 2;
-         dst += 1;
+         *dst++ = z32_unorm_to_z32_float(*src++);
       }
       dst_row += dst_stride/sizeof(*dst_row);
       src_row += src_stride/sizeof(*src_row);
@@ -854,7 +785,7 @@ util_format_z32_float_s8x24_uscaled_pack_z_32unorm(uint8_t *dst_row, unsigned ds
 }
 
 void
-util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+util_format_z32_float_s8x24_uint_unpack_s_8uint(uint8_t *dst_row, unsigned dst_stride,
                                                       const uint8_t *src_row, unsigned src_stride,
                                                       unsigned width, unsigned height)
 {
@@ -873,7 +804,7 @@ util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned
 }
 
 void
-util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+util_format_z32_float_s8x24_uint_pack_s_8uint(uint8_t *dst_row, unsigned dst_stride,
                                                     const uint8_t *src_row, unsigned src_stride,
                                                     unsigned width, unsigned height)
 {
@@ -891,3 +822,56 @@ util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned d
    }
 }
 
+
+void
+util_format_x24s8_uint_unpack_s_8uint(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+   util_format_z24_unorm_s8_uint_unpack_s_8uint(dst_row, dst_stride,
+                                                     src_row, src_stride,
+                                                     width, height);
+}
+
+void
+util_format_x24s8_uint_pack_s_8uint(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+   util_format_z24_unorm_s8_uint_pack_s_8uint(dst_row, dst_stride,
+                                                   src_row, src_stride,
+                                                   width, height);
+}
+
+void
+util_format_s8x24_uint_unpack_s_8uint(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+   util_format_s8_uint_z24_unorm_unpack_s_8uint(dst_row, dst_stride,
+                                                     src_row, src_stride,
+                                                     width, height);
+}
+
+void
+util_format_s8x24_uint_pack_s_8uint(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+   util_format_s8_uint_z24_unorm_pack_s_8uint(dst_row, dst_stride,
+                                                     src_row, src_stride,
+                                                     width, height);
+}
+
+void
+util_format_x32_s8x24_uint_unpack_s_8uint(uint8_t *dst_row, unsigned dst_stride,
+                                               const uint8_t *src_row, unsigned src_stride,
+                                               unsigned width, unsigned height)
+{
+   util_format_z32_float_s8x24_uint_unpack_s_8uint(dst_row, dst_stride,
+                                                        src_row, src_stride,
+                                                        width, height);
+
+}
+
+void
+util_format_x32_s8x24_uint_pack_s_8uint(uint8_t *dst_row, unsigned dst_stride,
+                                             const uint8_t *src_row, unsigned src_stride,
+                                             unsigned width, unsigned height)
+{
+   util_format_z32_float_s8x24_uint_pack_s_8uint(dst_row, dst_stride,
+                                                       src_row, src_stride,
+                                                      width, height);
+}