From b308d872e6c73178dd4351a866932709d398313a Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 3 Dec 2015 08:43:22 +0000 Subject: [PATCH] re PR tree-optimization/67800 (Missed vectorization opportunity on x86 (DOT_PROD_EXPR in non-reduction)) 2015-12-03 Richard Biener PR tree-optimization/67800 PR tree-optimization/68333 * tree-vect-patterns.c (vect_recog_dot_prod_pattern): Restore restriction to reduction contexts but allow SLP reductions as well. (vect_recog_sad_pattern): Likewise. (vect_recog_widen_sum_pattern): Likewise. * gcc.target/i386/vect-pr67800.c: New testcase. From-SVN: r231221 --- gcc/ChangeLog | 9 +++++ gcc/testsuite/ChangeLog | 6 +++ gcc/testsuite/gcc.target/i386/vect-pr67800.c | 42 ++++++++++++++++++++ gcc/tree-vect-patterns.c | 10 +++++ 4 files changed, 67 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/vect-pr67800.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 51924213313..7072624fa7a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2015-12-03 Richard Biener + + PR tree-optimization/67800 + PR tree-optimization/68333 + * tree-vect-patterns.c (vect_recog_dot_prod_pattern): Restore + restriction to reduction contexts but allow SLP reductions as well. + (vect_recog_sad_pattern): Likewise. + (vect_recog_widen_sum_pattern): Likewise. + 2015-12-03 Richard Biener PR tree-optimization/68639 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 738d5617ff5..074cdceb525 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2015-12-03 Richard Biener + + PR tree-optimization/67800 + PR tree-optimization/68333 + * gcc.target/i386/vect-pr67800.c: New testcase. + 2015-12-03 Richard Biener PR tree-optimization/68639 diff --git a/gcc/testsuite/gcc.target/i386/vect-pr67800.c b/gcc/testsuite/gcc.target/i386/vect-pr67800.c new file mode 100644 index 00000000000..324510858ff --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-pr67800.c @@ -0,0 +1,42 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */ +/* { dg-additional-options "-msse4.2" } */ + +#define ubyte unsigned char +#define byte char + +#define SCALE 8 + +#define R2Y (76) +#define G2Y (150) +#define B2Y (30) +#define R2I (127) +#define G2I (-59) +#define B2I (-68) +#define R2Q (51) +#define G2Q (-127) +#define B2Q (76) + +void +convert(ubyte *in, ubyte *out, unsigned n) +{ + ubyte r, g, b; + ubyte y = 0; + byte i, q; + + while (--n) { + r = *in++; + g = *in++; + b = *in++; + + y = (ubyte)(((R2Y * r) + (G2Y * g) + (B2Y * b) + (1 << (SCALE - 1))) >> SCALE); + i = (byte)(((R2I * r) + (G2I * g) + (B2I * b) + (1 << (SCALE - 1))) >> SCALE); + q = (byte)(((R2Q * r) + (G2Q * g) + (B2Q * b) + (1 << (SCALE - 1))) >> SCALE); + + *out++ = y; + *out++ = i; + *out++ = q; + } +} + +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index cd142e13903..4b225fbb4ca 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -312,6 +312,9 @@ vect_recog_dot_prod_pattern (vec *stmts, tree *type_in, { gimple *def_stmt; + if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def + && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo)) + return NULL; oprnd0 = gimple_assign_rhs1 (last_stmt); oprnd1 = gimple_assign_rhs2 (last_stmt); if (!types_compatible_p (TREE_TYPE (oprnd0), type) @@ -531,6 +534,9 @@ vect_recog_sad_pattern (vec *stmts, tree *type_in, { gimple *def_stmt; + if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def + && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo)) + return NULL; plus_oprnd0 = gimple_assign_rhs1 (last_stmt); plus_oprnd1 = gimple_assign_rhs2 (last_stmt); if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type) @@ -1152,6 +1158,10 @@ vect_recog_widen_sum_pattern (vec *stmts, tree *type_in, if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) return NULL; + if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def + && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo)) + return NULL; + oprnd0 = gimple_assign_rhs1 (last_stmt); oprnd1 = gimple_assign_rhs2 (last_stmt); if (!types_compatible_p (TREE_TYPE (oprnd0), type) -- 2.30.2