From: Richard Biener Date: Thu, 29 Jan 2015 12:53:39 +0000 (+0000) Subject: re PR target/64844 (Vectorization inhibited in gcc5 when loop starts with elem[1... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=62c004451aaecf4d5cfdd0db776165410e017f75;p=gcc.git re PR target/64844 (Vectorization inhibited in gcc5 when loop starts with elem[1], aarch64 perf regression from 4.9.1) 2015-01-29 Richard Biener PR tree-optimization/64844 * tree-vect-loop.c (vect_estimate_min_profitable_iters): Always dump cost model analysis. * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not register adjusted load/store costs here. * gcc.dg/vect/pr64844.c: New testcase. From-SVN: r220244 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 295884e7870..308c0eb20d2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2015-01-29 Richard Biener + + PR tree-optimization/64844 + * tree-vect-loop.c (vect_estimate_min_profitable_iters): Always + dump cost model analysis. + * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): + Do not register adjusted load/store costs here. + 2015-01-29 Ilya Enkovich Uros Bizjak diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a8df363a43b..cda948d64d3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2015-01-29 Richard Biener + + PR tree-optimization/64844 + * gcc.dg/vect/pr64844.c: New testcase. + 2015-01-29 Yuri Rumyantsev PR middle-end/64809 diff --git a/gcc/testsuite/gcc.dg/vect/pr64844.c b/gcc/testsuite/gcc.dg/vect/pr64844.c new file mode 100644 index 00000000000..fa01efc4506 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr64844.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vect_double } */ +/* { dg-additional-options "-ffast-math" } */ + +#include "tree-vect.h" + +extern void abort (void); + +typedef __SIZE_TYPE__ size_t; + +static double +compute(size_t n, double const * __restrict a, double const * __restrict b) +{ + double res = 0.0; + size_t i; + for (i = 0; i < n; ++i) + res += a[i] + b[i]; + return res; +} + +void init(double *, double *); + +int +main() +{ + double ary1[1024]; + double ary2[1024]; + size_t i; + + check_vect (); + + // Initialize arrays + for (i = 0; i < 1024; ++i) + { + ary1[i] = 1 / (double)(i + 1); + ary2[i] = 1 + 1 / (double) (i + 1); + __asm__ volatile ("" : : : "memory"); + } + + // Compute two results using different starting elements + if ((int) compute (512, &ary1[0], &ary2[0]) != 525 + || (int) compute(512, &ary1[1], &ary2[1]) != 523) + abort (); + + return 0; +} + +/* All targets should allow vectorizing this by some means of + dealing with the known misalignment in loop 2. */ + +/* { dg-final { scan-tree-dump-times "loop vectorized" 2 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 52d6a869c4e..302f2df2af2 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -1763,9 +1763,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) if (do_peeling) { - stmt_info_for_cost *si; - void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); - /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i. If the misalignment of DR_i is identical to that of dr0 then set DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and @@ -1791,20 +1788,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) dump_printf_loc (MSG_NOTE, vect_location, "Peeling for alignment will be applied.\n"); } - /* We've delayed passing the inside-loop peeling costs to the - target cost model until we were sure peeling would happen. - Do so now. */ - if (body_cost_vec.exists ()) - { - FOR_EACH_VEC_ELT (body_cost_vec, i, si) - { - struct _stmt_vec_info *stmt_info - = si->stmt ? vinfo_for_stmt (si->stmt) : NULL; - (void) add_stmt_cost (data, si->count, si->kind, stmt_info, - si->misalign, vect_body); - } - body_cost_vec.release (); - } + /* The inside-loop cost will be accounted for in vectorizable_load + and vectorizable_store correctly with adjusted alignments. + Drop the body_cst_vec on the floor here. */ + body_cost_vec.release (); stat = vect_verify_datarefs_alignment (loop_vinfo, NULL); gcc_assert (stat); diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index afc12a7f760..c5f1c29cbe2 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -2990,6 +2990,27 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost); + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n"); + dump_printf (MSG_NOTE, " Vector inside of loop cost: %d\n", + vec_inside_cost); + dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", + vec_prologue_cost); + dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", + vec_epilogue_cost); + dump_printf (MSG_NOTE, " Scalar iteration cost: %d\n", + scalar_single_iter_cost); + dump_printf (MSG_NOTE, " Scalar outside cost: %d\n", + scalar_outside_cost); + dump_printf (MSG_NOTE, " Vector outside cost: %d\n", + vec_outside_cost); + dump_printf (MSG_NOTE, " prologue iterations: %d\n", + peel_iters_prologue); + dump_printf (MSG_NOTE, " epilogue iterations: %d\n", + peel_iters_epilogue); + } + /* Calculate number of iterations required to make the vector version profitable, relative to the loop bodies only. The following condition must hold true: @@ -3037,30 +3058,9 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, return; } - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n"); - dump_printf (MSG_NOTE, " Vector inside of loop cost: %d\n", - vec_inside_cost); - dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", - vec_prologue_cost); - dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", - vec_epilogue_cost); - dump_printf (MSG_NOTE, " Scalar iteration cost: %d\n", - scalar_single_iter_cost); - dump_printf (MSG_NOTE, " Scalar outside cost: %d\n", - scalar_outside_cost); - dump_printf (MSG_NOTE, " Vector outside cost: %d\n", - vec_outside_cost); - dump_printf (MSG_NOTE, " prologue iterations: %d\n", - peel_iters_prologue); - dump_printf (MSG_NOTE, " epilogue iterations: %d\n", - peel_iters_epilogue); - dump_printf (MSG_NOTE, - " Calculated minimum iters for profitability: %d\n", - min_profitable_iters); - dump_printf (MSG_NOTE, "\n"); - } + dump_printf (MSG_NOTE, + " Calculated minimum iters for profitability: %d\n", + min_profitable_iters); min_profitable_iters = min_profitable_iters < vf ? vf : min_profitable_iters;