From f502d50e30db90506c0c3c4845694cd2e8e0cb25 Mon Sep 17 00:00:00 2001 From: Michael Matz Date: Thu, 18 Jun 2015 13:31:17 +0000 Subject: [PATCH] re PR middle-end/66253 (459.GemsFDTD in SPEC CPU 2006 is miscompiled) PR middle-end/66253 * tree-vect-stmts.c (vectorizable_store): Implement non-SLP grouped strided stores. (vectorizable_load): Don't use the DR from first_stmt in the non-SLP grouped strided case. testsuite/ * gcc.dg/vect/pr66253.c: New testcase. From-SVN: r224605 --- gcc/ChangeLog | 8 ++ gcc/testsuite/ChangeLog | 5 + gcc/testsuite/gcc.dg/vect/pr66253.c | 51 ++++++++++ gcc/tree-vect-stmts.c | 138 +++++++++++++++++----------- 4 files changed, 146 insertions(+), 56 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr66253.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index fae12ccdd1a..fef6cb94002 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2015-06-18 Michael Matz + + PR middle-end/66253 + * tree-vect-stmts.c (vectorizable_store): Implement non-SLP + grouped strided stores. + (vectorizable_load): Don't use the DR from first_stmt in + the non-SLP grouped strided case. + 2015-06-18 Ilya Enkovich PR target/66569 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7692ae53382..b6d91e141bd 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2015-06-18 Michael Matz + + PR middle-end/66253 + * gcc.dg/vect/pr66253.c: New testcase. + 2015-06-18 Ilya Enkovich PR target/66569 diff --git a/gcc/testsuite/gcc.dg/vect/pr66253.c b/gcc/testsuite/gcc.dg/vect/pr66253.c new file mode 100644 index 00000000000..bdf3ff9ca51 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr66253.c @@ -0,0 +1,51 @@ +/* { dg-require-effective-target vect_double } */ +/* { dg-require-effective-target vect_hw_misalign } */ + +#include "tree-vect.h" + +void __attribute__((noinline,noclone)) +test1(_Complex double * __restrict__ a, _Complex double * __restrict__ b, + double * __restrict__ c, int stride, int n) +{ + int i; + for (i = 0; i < n; i++) + { + a[i*stride] = 0.5 * b[i*stride] * c[i*stride]; + } +} + +double ca[256]; +_Complex double ia[256]; +_Complex double da[256]; + +extern void abort (void); + +int main () +{ + int i; + int stride; + + check_vect (); + + for (stride = 1; stride < 15; stride++) + { + for (i = 0; i < 256; i++) + { + __real__ ia[i] = (i + stride) % 19; + __imag__ ia[i] = (i + stride) % 23; + ca[i] = (i + stride) % 29; + __asm__ volatile (""); + } + + test1(da, ia, ca, stride, 256/stride); + + for (i = 0; i < 256/stride; i++) + { + if (da[i*stride] != 0.5 * ia[i*stride] * ca[i*stride]) + abort (); + } + } + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index d4d3b91dd99..9760d9a471f 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -5262,16 +5262,17 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, gimple_seq stmts = NULL; tree stride_base, stride_step, alias_off; tree vec_oprnd; + unsigned int g; gcc_assert (!nested_in_vect_loop_p (loop, stmt)); stride_base = fold_build_pointer_plus - (unshare_expr (DR_BASE_ADDRESS (dr)), + (unshare_expr (DR_BASE_ADDRESS (first_dr)), size_binop (PLUS_EXPR, - convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))), - convert_to_ptrofftype (DR_INIT(dr)))); - stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr))); + convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))), + convert_to_ptrofftype (DR_INIT(first_dr)))); + stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr))); /* For a store with loop-invariant (but other than power-of-2) stride (i.e. not a grouped access) like so: @@ -5302,6 +5303,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ltype = vectype; ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type)); ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + group_size = 1; } ivstep = stride_step; @@ -5322,65 +5324,89 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); prev_stmt_info = NULL; - running_off = offvar; - alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0); - for (j = 0; j < ncopies; j++) + alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0); + next_stmt = first_stmt; + for (g = 0; g < group_size; g++) { - /* We've set op and dt above, from gimple_assign_rhs1(stmt), - and first_stmt == stmt. */ - if (j == 0) - { - if (slp) - { - vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL, - slp_node, -1); - vec_oprnd = vec_oprnds[0]; - } - else - vec_oprnd = vect_get_vec_def_for_operand (op, first_stmt, NULL); - } - else - { - if (slp) - vec_oprnd = vec_oprnds[j]; - else - vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd); - } - - for (i = 0; i < nstores; i++) + running_off = offvar; + if (g) { - tree newref, newoff; - gimple incr, assign; - tree size = TYPE_SIZE (ltype); - /* Extract the i'th component. */ - tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i), + tree size = TYPE_SIZE_UNIT (ltype); + tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g), size); - tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd, - size, pos); - - elem = force_gimple_operand_gsi (gsi, elem, true, - NULL_TREE, true, - GSI_SAME_STMT); - - newref = build2 (MEM_REF, ltype, - running_off, alias_off); - - /* And store it to *running_off. */ - assign = gimple_build_assign (newref, elem); - vect_finish_stmt_generation (stmt, assign, gsi); - - newoff = copy_ssa_name (running_off, NULL); + tree newoff = copy_ssa_name (running_off, NULL); incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, - running_off, stride_step); + running_off, pos); vect_finish_stmt_generation (stmt, incr, gsi); - running_off = newoff; - if (j == 0 && i == 0) - STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign; + } + for (j = 0; j < ncopies; j++) + { + /* We've set op and dt above, from gimple_assign_rhs1(stmt), + and first_stmt == stmt. */ + if (j == 0) + { + if (slp) + { + vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL, + slp_node, -1); + vec_oprnd = vec_oprnds[0]; + } + else + { + gcc_assert (gimple_assign_single_p (next_stmt)); + op = gimple_assign_rhs1 (next_stmt); + vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, + NULL); + } + } else - STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign; - prev_stmt_info = vinfo_for_stmt (assign); + { + if (slp) + vec_oprnd = vec_oprnds[j]; + else + vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd); + } + + for (i = 0; i < nstores; i++) + { + tree newref, newoff; + gimple incr, assign; + tree size = TYPE_SIZE (ltype); + /* Extract the i'th component. */ + tree pos = fold_build2 (MULT_EXPR, bitsizetype, + bitsize_int (i), size); + tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd, + size, pos); + + elem = force_gimple_operand_gsi (gsi, elem, true, + NULL_TREE, true, + GSI_SAME_STMT); + + newref = build2 (MEM_REF, ltype, + running_off, alias_off); + + /* And store it to *running_off. */ + assign = gimple_build_assign (newref, elem); + vect_finish_stmt_generation (stmt, assign, gsi); + + newoff = copy_ssa_name (running_off, NULL); + incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, + running_off, stride_step); + vect_finish_stmt_generation (stmt, incr, gsi); + + running_off = newoff; + if (g == group_size - 1) + { + if (j == 0 && i == 0) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign; + prev_stmt_info = vinfo_for_stmt (assign); + } + } } + next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); } return true; } @@ -6265,7 +6291,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, gcc_assert (!nested_in_vect_loop); - if (grouped_load) + if (slp && grouped_load) first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info))); else -- 2.30.2