From 691b9fb7a2a752995591b9e1196d0e8588401d51 Mon Sep 17 00:00:00 2001 From: Petr Murzin Date: Mon, 24 Nov 2014 12:00:54 +0000 Subject: [PATCH] simplify-rtx.c (simplify_ternary_operation): Simplify vec_merge (vec_duplicate (vec_select)). 2014-11-24 Petr Murzin gcc/ * simplify-rtx.c (simplify_ternary_operation): Simplify vec_merge (vec_duplicate (vec_select)). gcc/testsuite/ * gcc.target/i386/extract-insert-combining.c: New test. From-SVN: r218015 --- gcc/ChangeLog | 5 +++ gcc/simplify-rtx.c | 16 +++++++++ gcc/testsuite/ChangeLog | 4 +++ .../i386/extract-insert-combining.c | 34 +++++++++++++++++++ 4 files changed, 59 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/extract-insert-combining.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 60a6b22a894..e362f1d8043 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2014-11-24 Petr Murzin + + * simplify-rtx.c (simplify_ternary_operation): Simplify + vec_merge (vec_duplicate (vec_select)). + 2014-11-24 Kyrylo Tkachov * config/aarch64/aarch64.c (AARCH64_FUSE_ADRP_LDR): Define. diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 98d4cebf94f..055ba787ac6 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -5233,6 +5233,22 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode, op0, XEXP (op1, 0), op2); } } + + /* Replace (vec_merge (vec_duplicate (vec_select a parallel (i))) a 1 << i) + with a. */ + if (GET_CODE (op0) == VEC_DUPLICATE + && GET_CODE (XEXP (op0, 0)) == VEC_SELECT + && GET_CODE (XEXP (XEXP (op0, 0), 1)) == PARALLEL + && mode_nunits[GET_MODE (XEXP (op0, 0))] == 1) + { + tem = XVECEXP ((XEXP (XEXP (op0, 0), 1)), 0, 0); + if (CONST_INT_P (tem) && CONST_INT_P (op2)) + { + if (XEXP (XEXP (op0, 0), 0) == op1 + && UINTVAL (op2) == HOST_WIDE_INT_1U << UINTVAL (tem)) + return op1; + } + } } if (rtx_equal_p (op0, op1) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b3d827fe6ab..07b00cf2bb4 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2014-11-24 Petr Murzin + + * gcc.target/i386/extract-insert-combining.c: New test. + 2014-11-24 Kyrylo Tkachov * gcc.target/aarch64/fuse_adrp_add_1.c: New test. diff --git a/gcc/testsuite/gcc.target/i386/extract-insert-combining.c b/gcc/testsuite/gcc.target/i386/extract-insert-combining.c new file mode 100644 index 00000000000..f27f92c083f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/extract-insert-combining.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-msse4.2 -O3" } */ +/* { dg-final { scan-assembler-times "(?:vmovd|movd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 3 } } */ +/* { dg-final { scan-assembler-times "(?:vpaddd|paddd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 2 } } */ +/* { dg-final { scan-assembler-times "(?:vpinsrd|pinsrd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-not "vmovss" } } */ + +#include + +int +main (int a, int b) +{ + int res; + + __m128i xa, xb, xres; + + xa = _mm_insert_epi32 (xa, a, 0); + xb = _mm_insert_epi32 (xb, b, 0); + + xres = _mm_add_epi32 (xa, xb); + + res = _mm_extract_epi32 (xres, 0); + + xres = _mm_insert_epi32 (xres, res, 0); + xb = _mm_insert_epi32 (xb, b, 0); + + xres = _mm_add_epi32 (xres, xb); + + res = _mm_extract_epi32 (xres, 0); + + return res; +} + -- 2.30.2