From: Matt Turner Date: Fri, 8 Apr 2016 22:30:30 +0000 (-0700) Subject: i965/tiled_memcpy: Optimize RGBA -> BGRA swizzle. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0a5d8d9af42fd77fce1492d55f958da97816961a;p=mesa.git i965/tiled_memcpy: Optimize RGBA -> BGRA swizzle. Replaces four byte loads and four byte stores with a load, bswap, rotate, store; or a movbe, rotate, store. Reviewed-by: Roland Scheidegger --- diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c index 0a68751d5d0..fa5ec755298 100644 --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c @@ -56,24 +56,27 @@ static const uint32_t ytile_width = 128; static const uint32_t ytile_height = 32; static const uint32_t ytile_span = 16; +static inline uint32_t +ror(uint32_t n, uint32_t d) +{ + return (n >> d) | (n << (32 - d)); +} + /** * Copy RGBA to BGRA - swap R and B. */ static inline void * rgba8_copy(void *dst, const void *src, size_t bytes) { - uint8_t *d = dst; - uint8_t const *s = src; + uint32_t *d = dst; + uint32_t const *s = src; assert(bytes % 4 == 0); while (bytes >= 4) { - d[0] = s[2]; - d[1] = s[1]; - d[2] = s[0]; - d[3] = s[3]; - d += 4; - s += 4; + *d = ror(__builtin_bswap32(*s), 8); + d += 1; + s += 1; bytes -= 4; } return dst;