swr: [rasterizer core] fix SIMD16 Transpose_16_16
authorTim Rowley <timothy.o.rowley@intel.com>
Thu, 22 Dec 2016 00:06:42 +0000 (18:06 -0600)
committerTim Rowley <timothy.o.rowley@intel.com>
Fri, 6 Jan 2017 16:05:02 +0000 (10:05 -0600)
Fix incorrect swizzling in SIMD16 Transpose_16_16 breaking the
two-channel 16-bpc formats like R16G16_FLOAT.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
src/gallium/drivers/swr/rasterizer/core/utils.h

index 6caee16623b2665f356d8052f30877230a5be9b0..c4162b4e71cb98738bc868d19913f89f45789faf 100644 (file)
@@ -732,8 +732,8 @@ struct Transpose16_16
         simdscalari tmp0 = _simd_unpacklo_epi16(src0, src1);                                        // rg0 rg1 rg2 rg3 rg8 rg9 rgA rgB
         simdscalari tmp1 = _simd_unpackhi_epi16(src0, src1);                                        // rg4 rg5 rg6 rg7 rgC rgD rgE rgF
 
-        simdscalari dst0 = _simd_permute2f128_si(tmp0, tmp1, 0x00);     // (0, 0)                   // rg0 rg1 rg2 rg3 rg4 rg5 rg6 rg7
-        simdscalari dst1 = _simd_permute2f128_si(tmp0, tmp1, 0x11);     // (1, 1)                   // rg8 rg9 rgA rgB rgC rgD rgE rgF
+        simdscalari dst0 = _simd_permute2f128_si(tmp0, tmp1, 0x20);     // (2, 0)                   // rg0 rg1 rg2 rg3 rg4 rg5 rg6 rg7
+        simdscalari dst1 = _simd_permute2f128_si(tmp0, tmp1, 0x31);     // (3, 1)                   // rg8 rg9 rgA rgB rgC rgD rgE rgF
 
         _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 0, dst0);                            // rgrgrgrgrgrgrgrg
         _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 1, dst1);                            // rgrgrgrgrgrgrgrg