From 598eaaa2a2368bb7d5ac3bafe7a0d1bb26d43f6e Mon Sep 17 00:00:00 2001 From: Yuri Rumyantsev Date: Wed, 16 Nov 2016 16:22:39 +0000 Subject: [PATCH] Support non-masked epilogue vectoriziation gcc/ 2016-11-16 Yuri Rumyantsev * params.def (PARAM_VECT_EPILOGUES_NOMASK): New. * tree-if-conv.c (tree_if_conversion): Make public. * * tree-if-conv.h: New file. * tree-vect-data-refs.c (vect_analyze_data_ref_dependences) Avoid dynamic alias checks for epilogues. * tree-vect-loop-manip.c (vect_do_peeling): Return created epilog. * tree-vect-loop.c: include tree-if-conv.h. (new_loop_vec_info): Add zeroing orig_loop_info field. (vect_analyze_loop_2): Don't try to enhance alignment for epilogues. (vect_analyze_loop): Add argument ORIG_LOOP_INFO which is not NULL if epilogue is vectorized, set up orig_loop_info field of loop_vinfo using passed argument. (vect_transform_loop): Check if created epilogue should be returned for further vectorization with less vf. If-convert epilogue if required. Print vectorization success for epilogue. * tree-vectorizer.c (vectorize_loops): Add epilogue vectorization if it is required, pass loop_vinfo produced during vectorization of loop body to vect_analyze_loop. * tree-vectorizer.h (struct _loop_vec_info): Add new field orig_loop_info. (LOOP_VINFO_ORIG_LOOP_INFO): New. (LOOP_VINFO_EPILOGUE_P): New. (LOOP_VINFO_ORIG_VECT_FACTOR): New. (vect_do_peeling): Change prototype to return epilogue. (vect_analyze_loop): Add argument of loop_vec_info type. (vect_transform_loop): Return created loop. gcc/testsuite/ 2016-11-16 Yuri Rumyantsev * lib/target-supports.exp (check_avx2_hw_available): New. (check_effective_target_avx2_runtime): New. * gcc.dg/vect/vect-tail-nomask-1.c: New test. From-SVN: r242501 --- gcc/ChangeLog | 29 +++++ gcc/params.def | 5 + gcc/testsuite/ChangeLog | 6 + .../gcc.dg/vect/vect-tail-nomask-1.c | 106 ++++++++++++++++++ gcc/testsuite/lib/target-supports.exp | 41 +++++++ gcc/tree-if-conv.c | 2 +- gcc/tree-if-conv.h | 24 ++++ gcc/tree-vect-data-refs.c | 12 +- gcc/tree-vect-loop-manip.c | 10 +- gcc/tree-vect-loop.c | 102 +++++++++++++---- gcc/tree-vectorizer.c | 17 ++- gcc/tree-vectorizer.h | 19 +++- 12 files changed, 340 insertions(+), 33 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c create mode 100644 gcc/tree-if-conv.h diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c3b99edf227..3a48f13122c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,32 @@ +2016-11-16 Yuri Rumyantsev + + * params.def (PARAM_VECT_EPILOGUES_NOMASK): New. + * tree-if-conv.c (tree_if_conversion): Make public. + * * tree-if-conv.h: New file. + * tree-vect-data-refs.c (vect_analyze_data_ref_dependences) Avoid + dynamic alias checks for epilogues. + * tree-vect-loop-manip.c (vect_do_peeling): Return created epilog. + * tree-vect-loop.c: include tree-if-conv.h. + (new_loop_vec_info): Add zeroing orig_loop_info field. + (vect_analyze_loop_2): Don't try to enhance alignment for epilogues. + (vect_analyze_loop): Add argument ORIG_LOOP_INFO which is not NULL + if epilogue is vectorized, set up orig_loop_info field of loop_vinfo + using passed argument. + (vect_transform_loop): Check if created epilogue should be returned + for further vectorization with less vf. If-convert epilogue if + required. Print vectorization success for epilogue. + * tree-vectorizer.c (vectorize_loops): Add epilogue vectorization + if it is required, pass loop_vinfo produced during vectorization of + loop body to vect_analyze_loop. + * tree-vectorizer.h (struct _loop_vec_info): Add new field + orig_loop_info. + (LOOP_VINFO_ORIG_LOOP_INFO): New. + (LOOP_VINFO_EPILOGUE_P): New. + (LOOP_VINFO_ORIG_VECT_FACTOR): New. + (vect_do_peeling): Change prototype to return epilogue. + (vect_analyze_loop): Add argument of loop_vec_info type. + (vect_transform_loop): Return created loop. + 2016-11-16 Segher Boessenkool * config/rs6000/rs6000.c (rs6000_components_for_bb): Mark the LR diff --git a/gcc/params.def b/gcc/params.def index 89f70936d2e..50f75a728d2 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -1270,6 +1270,11 @@ DEFPARAM (PARAM_MAX_VRP_SWITCH_ASSERTIONS, "edge of a switch statement during VRP", 10, 0, 0) +DEFPARAM (PARAM_VECT_EPILOGUES_NOMASK, + "vect-epilogues-nomask", + "Enable loop epilogue vectorization using smaller vector size.", + 0, 0, 1) + /* Local variables: diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6edf7150cc6..2d036754695 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2016-11-16 Yuri Rumyantsev + + * lib/target-supports.exp (check_avx2_hw_available): New. + (check_effective_target_avx2_runtime): New. + * gcc.dg/vect/vect-tail-nomask-1.c: New test. + 2016-11-16 Tamar Christina PR testsuite/78136 diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c new file mode 100644 index 00000000000..dc016bb47c2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c @@ -0,0 +1,106 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "--param vect-epilogues-nomask=1 -mavx2" { target avx2_runtime } } */ + +#define SIZE 1023 +#define ALIGN 64 + +extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size) __attribute__((weak)); +extern void free (void *); + +void __attribute__((noinline)) +test_citer (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c) +{ + int i; + + a = (int *)__builtin_assume_aligned (a, ALIGN); + b = (int *)__builtin_assume_aligned (b, ALIGN); + c = (int *)__builtin_assume_aligned (c, ALIGN); + + for (i = 0; i < SIZE; i++) + c[i] = a[i] + b[i]; +} + +void __attribute__((noinline)) +test_viter (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c, + int size) +{ + int i; + + a = (int *)__builtin_assume_aligned (a, ALIGN); + b = (int *)__builtin_assume_aligned (b, ALIGN); + c = (int *)__builtin_assume_aligned (c, ALIGN); + + for (i = 0; i < size; i++) + c[i] = a[i] + b[i]; +} + +void __attribute__((noinline)) +init_data (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c, + int size) +{ + for (int i = 0; i < size; i++) + { + a[i] = i; + b[i] = -i; + c[i] = 0; + asm volatile("": : :"memory"); + } + a[size] = b[size] = c[size] = size; +} + + +void __attribute__((noinline)) +run_test () +{ + int *a; + int *b; + int *c; + int i; + + if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0) + return; + if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0) + return; + if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0) + return; + + init_data (a, b, c, SIZE); + test_citer (a, b, c); + for (i = 0; i < SIZE; i++) + if (c[i] != a[i] + b[i]) + __builtin_abort (); + if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE) + __builtin_abort (); + + init_data (a, b, c, SIZE); + test_viter (a, b, c, SIZE); + for (i = 0; i < SIZE; i++) + if (c[i] != a[i] + b[i]) + __builtin_abort (); + if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE) + __builtin_abort (); + + free (a); + free (b); + free (c); +} + +int +main (int argc, const char **argv) +{ + if (!posix_memalign) + return 0; + + run_test (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index b683c09c1db..e62b7688798 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -1730,6 +1730,36 @@ proc check_avx_hw_available { } { }] } +# Return 1 if the target supports executing AVX2 instructions, 0 +# otherwise. Cache the result. + +proc check_avx2_hw_available { } { + return [check_cached_effective_target avx2_hw_available { + # If this is not the right target then we can skip the test. + if { !([istarget x86_64-*-*] || [istarget i?86-*-*]) } { + expr 0 + } else { + check_runtime_nocache avx2_hw_available { + #include "cpuid.h" + int main () + { + unsigned int eax, ebx, ecx, edx; + if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx) + || ((ecx & bit_OSXSAVE) != bit_OSXSAVE)) + return 1; + + if (__get_cpuid_max (0, NULL) < 7) + return 1; + + __cpuid_count (7, 0, eax, ebx, ecx, edx); + + return (ebx & bit_AVX2) != bit_AVX2; + } + } "" + } + }] +} + # Return 1 if the target supports running SSE executables, 0 otherwise. proc check_effective_target_sse_runtime { } { @@ -1805,6 +1835,17 @@ proc check_effective_target_avx_runtime { } { return 0 } +# Return 1 if the target supports running AVX2 executables, 0 otherwise. + +proc check_effective_target_avx2_runtime { } { + if { [check_effective_target_avx2] + && [check_avx2_hw_available] + && [check_avx_os_support_available] } { + return 1 + } + return 0 +} + # Return 1 if we are compiling for 64-bit PowerPC but we do not use direct # move instructions for moves from GPR to FPR. diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c index 0a201898177..0b86ffebf7f 100644 --- a/gcc/tree-if-conv.c +++ b/gcc/tree-if-conv.c @@ -2734,7 +2734,7 @@ ifcvt_local_dce (basic_block bb) profitability analysis. Returns non-zero todo flags when something changed. */ -static unsigned int +unsigned int tree_if_conversion (struct loop *loop) { unsigned int todo = 0; diff --git a/gcc/tree-if-conv.h b/gcc/tree-if-conv.h new file mode 100644 index 00000000000..3a732c25bc7 --- /dev/null +++ b/gcc/tree-if-conv.h @@ -0,0 +1,24 @@ +/* Copyright (C) 2016 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_TREE_IF_CONV_H +#define GCC_TREE_IF_CONV_H + +unsigned int tree_if_conversion (struct loop *); + +#endif /* GCC_TREE_IF_CONV_H */ diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 220dc302627..5a303140833 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -480,9 +480,15 @@ vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, int *max_vf) LOOP_VINFO_LOOP_NEST (loop_vinfo), true)) return false; - FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr) - if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf)) - return false; + /* For epilogues we either have no aliases or alias versioning + was applied to original loop. Therefore we may just get max_vf + using VF of original loop. */ + if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)) + *max_vf = LOOP_VINFO_ORIG_VECT_FACTOR (loop_vinfo); + else + FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr) + if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf)) + return false; return true; } diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index 4c6b8c7459c..e13d6a2758b 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -1614,11 +1614,13 @@ slpeel_update_phi_nodes_for_lcssa (struct loop *epilog) Note this function peels prolog and epilog only if it's necessary, as well as guards. + Returns created epilogue or NULL. TODO: Guard for prefer_scalar_loop should be emitted along with versioning conditions if loop versioning is needed. */ -void + +struct loop * vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, tree *niters_vector, int th, bool check_profitability, bool niters_no_overflow) @@ -1634,7 +1636,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)); if (!prolog_peeling && !epilog_peeling) - return; + return NULL; prob_vector = 9 * REG_BR_PROB_BASE / 10; if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2) @@ -1642,7 +1644,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf; vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - struct loop *prolog, *epilog, *loop = LOOP_VINFO_LOOP (loop_vinfo); + struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo); struct loop *first_loop = loop; create_lcssa_for_virtual_phi (loop); update_ssa (TODO_update_ssa_only_virtuals); @@ -1824,6 +1826,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, } adjust_vec.release (); free_original_copy_tables (); + + return epilog; } /* Function vect_create_cond_for_niters_checks. diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 53570f32d45..4150b0d9ee2 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -49,6 +49,7 @@ along with GCC; see the file COPYING3. If not see #include "gimple-fold.h" #include "cgraph.h" #include "tree-cfg.h" +#include "tree-if-conv.h" /* Loop Vectorization Pass. @@ -1171,6 +1172,7 @@ new_loop_vec_info (struct loop *loop) LOOP_VINFO_PEELING_FOR_GAPS (res) = false; LOOP_VINFO_PEELING_FOR_NITER (res) = false; LOOP_VINFO_OPERANDS_SWAPPED (res) = false; + LOOP_VINFO_ORIG_LOOP_INFO (res) = NULL; return res; } @@ -2046,15 +2048,20 @@ start_over: if (!ok) return false; - /* This pass will decide on using loop versioning and/or loop peeling in - order to enhance the alignment of data references in the loop. */ - ok = vect_enhance_data_refs_alignment (loop_vinfo); - if (!ok) + /* Do not invoke vect_enhance_data_refs_alignment for eplilogue + vectorization. */ + if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)) { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "bad data alignment.\n"); - return false; + /* This pass will decide on using loop versioning and/or loop peeling in + order to enhance the alignment of data references in the loop. */ + ok = vect_enhance_data_refs_alignment (loop_vinfo); + if (!ok) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "bad data alignment.\n"); + return false; + } } if (slp) @@ -2308,9 +2315,10 @@ again: Apply a set of analyses on LOOP, and create a loop_vec_info struct for it. The different analyses will record information in the - loop_vec_info struct. */ + loop_vec_info struct. If ORIG_LOOP_VINFO is not NULL epilogue must + be vectorized. */ loop_vec_info -vect_analyze_loop (struct loop *loop) +vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo) { loop_vec_info loop_vinfo; unsigned int vector_sizes; @@ -2346,6 +2354,10 @@ vect_analyze_loop (struct loop *loop) } bool fatal = false; + + if (orig_loop_vinfo) + LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_vinfo; + if (vect_analyze_loop_2 (loop_vinfo, fatal)) { LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; @@ -6696,12 +6708,14 @@ loop_niters_no_overflow (loop_vec_info loop_vinfo) The analysis phase has determined that the loop is vectorizable. Vectorize the loop - created vectorized stmts to replace the scalar - stmts in the loop, and update the loop exit condition. */ + stmts in the loop, and update the loop exit condition. + Returns scalar epilogue loop if any. */ -void +struct loop * vect_transform_loop (loop_vec_info loop_vinfo) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + struct loop *epilogue = NULL; basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); int nbbs = loop->num_nodes; int i; @@ -6780,8 +6794,8 @@ vect_transform_loop (loop_vec_info loop_vinfo) LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters; tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo)); bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo); - vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector, th, - check_profitability, niters_no_overflow); + epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector, th, + check_profitability, niters_no_overflow); if (niters_vector == NULL_TREE) { if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) @@ -7065,12 +7079,19 @@ vect_transform_loop (loop_vec_info loop_vinfo) if (dump_enabled_p ()) { - dump_printf_loc (MSG_NOTE, vect_location, - "LOOP VECTORIZED\n"); - if (loop->inner) + if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)) + { + dump_printf_loc (MSG_NOTE, vect_location, + "LOOP VECTORIZED\n"); + if (loop->inner) + dump_printf_loc (MSG_NOTE, vect_location, + "OUTER LOOP VECTORIZED\n"); + dump_printf (MSG_NOTE, "\n"); + } + else dump_printf_loc (MSG_NOTE, vect_location, - "OUTER LOOP VECTORIZED\n"); - dump_printf (MSG_NOTE, "\n"); + "LOOP EPILOGUE VECTORIZED (VS=%d)\n", + current_vector_size); } /* Free SLP instances here because otherwise stmt reference counting @@ -7082,6 +7103,49 @@ vect_transform_loop (loop_vec_info loop_vinfo) /* Clear-up safelen field since its value is invalid after vectorization since vectorized loop can have loop-carried dependencies. */ loop->safelen = 0; + + /* Don't vectorize epilogue for epilogue. */ + if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)) + epilogue = NULL; + + if (epilogue) + { + unsigned int vector_sizes + = targetm.vectorize.autovectorize_vector_sizes (); + vector_sizes &= current_vector_size - 1; + + if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK)) + epilogue = NULL; + else if (!vector_sizes) + epilogue = NULL; + else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0) + { + int smallest_vec_size = 1 << ctz_hwi (vector_sizes); + int ratio = current_vector_size / smallest_vec_size; + int eiters = LOOP_VINFO_INT_NITERS (loop_vinfo) + - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); + eiters = eiters % vf; + + epilogue->nb_iterations_upper_bound = eiters - 1; + + if (eiters < vf / ratio) + epilogue = NULL; + } + } + + if (epilogue) + { + epilogue->force_vectorize = loop->force_vectorize; + epilogue->safelen = loop->safelen; + epilogue->dont_vectorize = false; + + /* We may need to if-convert epilogue to vectorize it. */ + if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo)) + tree_if_conversion (epilogue); + } + + return epilogue; } /* The code below is trying to perform simple optimization - revert diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 22e587afb4c..35d7a3ee0c5 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -514,6 +514,7 @@ vectorize_loops (void) hash_table *simd_array_to_simduid_htab = NULL; bool any_ifcvt_loops = false; unsigned ret = 0; + struct loop *new_loop; vect_loops_num = number_of_loops (cfun); @@ -538,7 +539,8 @@ vectorize_loops (void) && optimize_loop_nest_for_speed_p (loop)) || loop->force_vectorize) { - loop_vec_info loop_vinfo; + loop_vec_info loop_vinfo, orig_loop_vinfo = NULL; +vectorize_epilogue: vect_location = find_loop_location (loop); if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION && dump_enabled_p ()) @@ -546,7 +548,7 @@ vectorize_loops (void) LOCATION_FILE (vect_location), LOCATION_LINE (vect_location)); - loop_vinfo = vect_analyze_loop (loop); + loop_vinfo = vect_analyze_loop (loop, orig_loop_vinfo); loop->aux = loop_vinfo; if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) @@ -580,7 +582,7 @@ vectorize_loops (void) && dump_enabled_p ()) dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, "loop vectorized\n"); - vect_transform_loop (loop_vinfo); + new_loop = vect_transform_loop (loop_vinfo); num_vectorized_loops++; /* Now that the loop has been vectorized, allow it to be unrolled etc. */ @@ -602,6 +604,15 @@ vectorize_loops (void) fold_loop_vectorized_call (loop_vectorized_call, boolean_true_node); ret |= TODO_cleanup_cfg; } + + if (new_loop) + { + /* Epilogue of vectorized loop must be vectorized too. */ + vect_loops_num = number_of_loops (cfun); + loop = new_loop; + orig_loop_vinfo = loop_vinfo; /* To pass vect_analyze_loop. */ + goto vectorize_epilogue; + } } vect_location = UNKNOWN_LOCATION; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 2a7cdfe27a5..2a7fa0a33a2 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -335,6 +335,10 @@ typedef struct _loop_vec_info : public vec_info { /* Mark loops having masked stores. */ bool has_mask_store; + /* For loops being epilogues of already vectorized loops + this points to the original vectorized loop. Otherwise NULL. */ + _loop_vec_info *orig_loop_info; + } *loop_vec_info; /* Access Functions. */ @@ -374,6 +378,7 @@ typedef struct _loop_vec_info : public vec_info { #define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec #define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost +#define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ ((L)->may_misalign_stmts.length () > 0) @@ -389,6 +394,12 @@ typedef struct _loop_vec_info : public vec_info { #define LOOP_VINFO_NITERS_KNOWN_P(L) \ (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0) +#define LOOP_VINFO_EPILOGUE_P(L) \ + (LOOP_VINFO_ORIG_LOOP_INFO (L) != NULL) + +#define LOOP_VINFO_ORIG_VECT_FACTOR(L) \ + (LOOP_VINFO_VECT_FACTOR (LOOP_VINFO_ORIG_LOOP_INFO (L))) + static inline loop_vec_info loop_vec_info_for_loop (struct loop *loop) { @@ -1032,8 +1043,8 @@ extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge); struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, struct loop *, edge); extern void vect_loop_versioning (loop_vec_info, unsigned int, bool); -extern void vect_do_peeling (loop_vec_info, tree, tree, - tree *, int, bool, bool); +extern struct loop *vect_do_peeling (loop_vec_info, tree, tree, + tree *, int, bool, bool); extern source_location find_loop_location (struct loop *); extern bool vect_can_advance_ivs_p (loop_vec_info); @@ -1144,11 +1155,11 @@ extern void destroy_loop_vec_info (loop_vec_info, bool); extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *, bool, bool *, bool); /* Drive for loop analysis stage. */ -extern loop_vec_info vect_analyze_loop (struct loop *); +extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info); extern tree vect_build_loop_niters (loop_vec_info); extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, bool); /* Drive for loop transformation stage. */ -extern void vect_transform_loop (loop_vec_info); +extern struct loop *vect_transform_loop (loop_vec_info); extern loop_vec_info vect_analyze_loop_form (struct loop *); extern bool vectorizable_live_operation (gimple *, gimple_stmt_iterator *, slp_tree, int, gimple **); -- 2.30.2