From 0179520a83329a8daefcbb87c2609035694b928a Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 23 Mar 2016 10:52:00 +0100 Subject: [PATCH] re PR tree-optimization/70354 (Wrong code with -O3 -march=broadwell and -march=skylake-avx512.) PR tree-optimization/70354 * tree-vect-patterns.c (vect_recog_vector_vector_shift_pattern): If oprnd0 is wider than oprnd1 and there is a cast from the wider type to oprnd1, mask it with the mask of the narrower type. * gcc.dg/vect/pr70354-1.c: New test. * gcc.dg/vect/pr70354-2.c: New test. * gcc.target/i386/avx2-pr70354-1.c: New test. * gcc.target/i386/avx2-pr70354-2.c: New test. From-SVN: r234417 --- gcc/ChangeLog | 5 ++ gcc/testsuite/ChangeLog | 8 +++ gcc/testsuite/gcc.dg/vect/pr70354-1.c | 50 +++++++++++++++++++ gcc/testsuite/gcc.dg/vect/pr70354-2.c | 37 ++++++++++++++ .../gcc.target/i386/avx2-pr70354-1.c | 16 ++++++ .../gcc.target/i386/avx2-pr70354-2.c | 16 ++++++ gcc/tree-vect-patterns.c | 15 +++++- 7 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr70354-1.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr70354-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx2-pr70354-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx2-pr70354-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6261b3a8162..9a1eb039d88 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,10 @@ 2016-03-23 Jakub Jelinek + PR tree-optimization/70354 + * tree-vect-patterns.c (vect_recog_vector_vector_shift_pattern): If + oprnd0 is wider than oprnd1 and there is a cast from the wider + type to oprnd1, mask it with the mask of the narrower type. + PR target/70321 * config/i386/i386.md (*anddi3_doubleword, *di3_doubleword): Optimize TARGET_STV splitters, if high or low word of last argument diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index eda8a9acc97..f9c6de25ed9 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2016-03-23 Jakub Jelinek + + PR tree-optimization/70354 + * gcc.dg/vect/pr70354-1.c: New test. + * gcc.dg/vect/pr70354-2.c: New test. + * gcc.target/i386/avx2-pr70354-1.c: New test. + * gcc.target/i386/avx2-pr70354-2.c: New test. + 2016-03-22 Jeff Law PR target/70232 diff --git a/gcc/testsuite/gcc.dg/vect/pr70354-1.c b/gcc/testsuite/gcc.dg/vect/pr70354-1.c new file mode 100644 index 00000000000..70de8119a29 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr70354-1.c @@ -0,0 +1,50 @@ +/* PR tree-optimization/70354 */ +/* { dg-do run } */ + +#ifndef main +#include "tree-vect.h" +#endif + +long long int b[64], c[64], g[64]; +unsigned long long int a[64], d[64], e[64], f[64], h[64]; + +__attribute__ ((noinline, noclone)) void +foo (void) +{ + int i; + for (i = 0; i < 64; i++) + { + d[i] = h[i] << (((((unsigned long long int) b[i] * e[i]) + << (-a[i] - 3752448776177690134ULL)) + - 8214565720323784703ULL) - 1ULL); + e[i] = (_Bool) (f[i] + (unsigned long long int) g[i]); + g[i] = c[i]; + } +} + +int +main () +{ + int i; +#ifndef main + check_vect (); +#endif + if (__CHAR_BIT__ != 8 || sizeof (long long int) != 8) + return 0; + for (i = 0; i < 64; ++i) + { + a[i] = 14694295297531861425ULL; + b[i] = -1725558902283030715LL; + c[i] = 4402992416302558097LL; + e[i] = 6297173129107286501ULL; + f[i] = 13865724171235650855ULL; + g[i] = 982871027473857427LL; + h[i] = 8193845517487445944ULL; + } + foo (); + for (i = 0; i < 64; i++) + if (d[i] != 8193845517487445944ULL || e[i] != 1 + || g[i] != 4402992416302558097ULL) + abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr70354-2.c b/gcc/testsuite/gcc.dg/vect/pr70354-2.c new file mode 100644 index 00000000000..356a1152d0c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr70354-2.c @@ -0,0 +1,37 @@ +/* PR tree-optimization/70354 */ +/* { dg-do run } */ + +#ifndef main +#include "tree-vect.h" +#endif + +unsigned long long a[64], b[64]; + +__attribute__((noinline, noclone)) void +foo (void) +{ + int i; + for (i = 0; i < 64; i++) + a[i] <<= (b[i] - 0x1200000000ULL); +} + +int +main () +{ + int i; +#ifndef main + check_vect (); +#endif + if (__CHAR_BIT__ != 8 || sizeof (long long int) != 8) + return 0; + for (i = 0; i < 64; i++) + { + a[i] = 0x1234ULL; + b[i] = 0x1200000000ULL + (i % 54); + } + foo (); + for (i = 0; i < 64; i++) + if (a[i] != (0x1234ULL << (i % 54))) + abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr70354-1.c b/gcc/testsuite/gcc.target/i386/avx2-pr70354-1.c new file mode 100644 index 00000000000..d2d9b83d924 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-pr70354-1.c @@ -0,0 +1,16 @@ +/* PR tree-optimization/70354 */ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -mavx2" } */ +/* { dg-require-effective-target avx2 } */ + +#include "avx2-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/pr70354-1.c" + +static void +avx2_test (void) +{ + do_main (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr70354-2.c b/gcc/testsuite/gcc.target/i386/avx2-pr70354-2.c new file mode 100644 index 00000000000..709a51bcb8c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-pr70354-2.c @@ -0,0 +1,16 @@ +/* PR tree-optimization/70354 */ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -mavx2" } */ +/* { dg-require-effective-target avx2 } */ + +#include "avx2-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/pr70354-2.c" + +static void +avx2_test (void) +{ + do_main (); +} diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index 0ee5bbe8f46..27080d28a1d 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -2097,7 +2097,20 @@ vect_recog_vector_vector_shift_pattern (vec *stmts, if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0)) && TYPE_PRECISION (TREE_TYPE (rhs1)) == TYPE_PRECISION (TREE_TYPE (oprnd0))) - def = rhs1; + { + if (TYPE_PRECISION (TREE_TYPE (oprnd1)) + >= TYPE_PRECISION (TREE_TYPE (rhs1))) + def = rhs1; + else + { + tree mask + = build_low_bits_mask (TREE_TYPE (rhs1), + TYPE_PRECISION (TREE_TYPE (oprnd1))); + def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL); + def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask); + new_pattern_def_seq (stmt_vinfo, def_stmt); + } + } } if (def == NULL_TREE) -- 2.30.2