simplify-rtx.c (simplify_ternary_operation): Simplify vec_merge (vec_duplicate (vec_s...
authorPetr Murzin <petr.murzin@intel.com>
Mon, 24 Nov 2014 12:00:54 +0000 (12:00 +0000)
committerIlya Verbin <iverbin@gcc.gnu.org>
Mon, 24 Nov 2014 12:00:54 +0000 (12:00 +0000)
2014-11-24  Petr Murzin  <petr.murzin@intel.com>

gcc/
* simplify-rtx.c (simplify_ternary_operation): Simplify
vec_merge (vec_duplicate (vec_select)).

gcc/testsuite/
* gcc.target/i386/extract-insert-combining.c: New test.

From-SVN: r218015

gcc/ChangeLog
gcc/simplify-rtx.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/extract-insert-combining.c [new file with mode: 0644]

index 60a6b22a8942c2e25ffb4b7d2047dd3f0dd0ffcd..e362f1d80434702e851a470c017a01663677c381 100644 (file)
@@ -1,3 +1,8 @@
+2014-11-24  Petr Murzin  <petr.murzin@intel.com>
+
+       * simplify-rtx.c (simplify_ternary_operation): Simplify
+       vec_merge (vec_duplicate (vec_select)).
+
 2014-11-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
 
        * config/aarch64/aarch64.c (AARCH64_FUSE_ADRP_LDR): Define.
index 98d4cebf94f7a47dde80cdb3e8653ef38b58faee..055ba787ac6e5b503d1cf7f3c8438b546c107e79 100644 (file)
@@ -5233,6 +5233,22 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode,
                                                 op0, XEXP (op1, 0), op2);
                }
            }
+
+         /* Replace (vec_merge (vec_duplicate (vec_select a parallel (i))) a 1 << i)
+            with a.  */
+         if (GET_CODE (op0) == VEC_DUPLICATE
+             && GET_CODE (XEXP (op0, 0)) == VEC_SELECT
+             && GET_CODE (XEXP (XEXP (op0, 0), 1)) == PARALLEL
+             && mode_nunits[GET_MODE (XEXP (op0, 0))] == 1)
+           {
+             tem = XVECEXP ((XEXP (XEXP (op0, 0), 1)), 0, 0);
+             if (CONST_INT_P (tem) && CONST_INT_P (op2))
+               {
+                 if (XEXP (XEXP (op0, 0), 0) == op1
+                     && UINTVAL (op2) == HOST_WIDE_INT_1U << UINTVAL (tem))
+                   return op1;
+               }
+           }
        }
 
       if (rtx_equal_p (op0, op1)
index b3d827fe6ab7ba3bc86b6ce8960df33aeb9f26fe..07b00cf2bb4f4cc73b1c33820cd7be1f08c4011b 100644 (file)
@@ -1,3 +1,7 @@
+2014-11-24  Petr Murzin  <petr.murzin@intel.com>
+
+       * gcc.target/i386/extract-insert-combining.c: New test.
+
 2014-11-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
 
        * gcc.target/aarch64/fuse_adrp_add_1.c: New test.
diff --git a/gcc/testsuite/gcc.target/i386/extract-insert-combining.c b/gcc/testsuite/gcc.target/i386/extract-insert-combining.c
new file mode 100644 (file)
index 0000000..f27f92c
--- /dev/null
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-msse4.2 -O3" } */
+/* { dg-final { scan-assembler-times "(?:vmovd|movd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "(?:vpaddd|paddd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "(?:vpinsrd|pinsrd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-not "vmovss" } } */
+
+#include <immintrin.h>
+
+int
+main (int a, int b)
+{
+  int res;
+
+  __m128i xa, xb, xres;
+
+  xa = _mm_insert_epi32 (xa, a, 0);
+  xb = _mm_insert_epi32 (xb, b, 0);
+
+  xres = _mm_add_epi32 (xa, xb);
+
+  res = _mm_extract_epi32 (xres, 0);
+
+  xres = _mm_insert_epi32 (xres, res, 0);
+  xb   = _mm_insert_epi32 (xb, b, 0);
+
+  xres = _mm_add_epi32 (xres, xb);
+
+  res = _mm_extract_epi32 (xres, 0);
+
+  return res;
+}
+