From f63445e56c265757ebd50dc12fcd01773341b49f Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Mon, 20 May 2019 11:49:07 +0200 Subject: [PATCH] cfgloop.h (struct loop): Add simdlen member. * cfgloop.h (struct loop): Add simdlen member. * cfgloopmanip.c (copy_loop_info): Copy simdlen as well. * omp-expand.c (expand_omp_simd): Set it if simdlen clause is present. * tree-vect-loop.c (vect_analyze_loop): Pass loop->simdlen != 0 as new argument to autovectorize_vector_sizes target hook. If loop->simdlen, pick up vector size where the vectorization factor is equal to loop->simd, and if there is none, fall back to the first successful one. (vect_transform_loop): Adjust autovectorize_vector_sizes target hook caller. * omp-low.c (omp_clause_aligned_alignment): Likewise. * omp-general.c (omp_max_vf): Likewise. * optabs-query.c (can_vec_mask_load_store_p): Likewise. * tree-vect-slp.c (vect_slp_bb): Likewise. * target.def (autovectorize_vector_sizes): Add ALL argument and document it. * doc/tm.texi: Adjust documentation. * targhooks.c (default_autovectorize_vector_sizes): Add bool argument. * targhooks.h (default_autovectorize_vector_sizes): Likewise. * config/aarch64/aarch64.c (aarch64_autovectorize_vector_sizes): Add bool argument. * config/arc/arc.c (arc_autovectorize_vector_sizes): Likewise. * config/arm/arm.c (arm_autovectorize_vector_sizes): Likewise. * config/mips/mips.c (mips_autovectorize_vector_sizes): Likewise. * config/i386/i386.c (ix86_autovectorize_vector_sizes): Likewise. If true and TARGET_AVX512F or TARGET_AVX, push 3 or 2 sizes even if preferred vector size is not 512-bit or 256-bit, just put those unpreferred ones last. * gcc.target/i386/avx512f-simd-1.c: New test. From-SVN: r271403 --- gcc/ChangeLog | 31 ++++++++++ gcc/cfgloop.h | 3 + gcc/cfgloopmanip.c | 1 + gcc/config/aarch64/aarch64.c | 2 +- gcc/config/arc/arc.c | 2 +- gcc/config/arm/arm.c | 4 +- gcc/config/i386/i386.c | 13 ++++- gcc/config/mips/mips.c | 2 +- gcc/doc/tm.texi | 4 +- gcc/omp-expand.c | 7 +++ gcc/omp-general.c | 2 +- gcc/omp-low.c | 2 +- gcc/optabs-query.c | 2 +- gcc/target.def | 4 +- gcc/targhooks.c | 2 +- gcc/targhooks.h | 2 +- gcc/testsuite/ChangeLog | 4 ++ .../gcc.target/i386/avx512f-simd-1.c | 35 +++++++++++ gcc/tree-vect-loop.c | 58 ++++++++++++++++--- gcc/tree-vect-slp.c | 2 +- 20 files changed, 160 insertions(+), 22 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-simd-1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e07f8a11a05..682d5f75e6c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,34 @@ +2019-05-20 Jakub Jelinek + + * cfgloop.h (struct loop): Add simdlen member. + * cfgloopmanip.c (copy_loop_info): Copy simdlen as well. + * omp-expand.c (expand_omp_simd): Set it if simdlen clause is present. + * tree-vect-loop.c (vect_analyze_loop): Pass loop->simdlen != 0 + as new argument to autovectorize_vector_sizes target hook. If + loop->simdlen, pick up vector size where the vectorization factor + is equal to loop->simd, and if there is none, fall back to the first + successful one. + (vect_transform_loop): Adjust autovectorize_vector_sizes target hook + caller. + * omp-low.c (omp_clause_aligned_alignment): Likewise. + * omp-general.c (omp_max_vf): Likewise. + * optabs-query.c (can_vec_mask_load_store_p): Likewise. + * tree-vect-slp.c (vect_slp_bb): Likewise. + * target.def (autovectorize_vector_sizes): Add ALL argument and + document it. + * doc/tm.texi: Adjust documentation. + * targhooks.c (default_autovectorize_vector_sizes): Add bool argument. + * targhooks.h (default_autovectorize_vector_sizes): Likewise. + * config/aarch64/aarch64.c (aarch64_autovectorize_vector_sizes): Add + bool argument. + * config/arc/arc.c (arc_autovectorize_vector_sizes): Likewise. + * config/arm/arm.c (arm_autovectorize_vector_sizes): Likewise. + * config/mips/mips.c (mips_autovectorize_vector_sizes): Likewise. + * config/i386/i386.c (ix86_autovectorize_vector_sizes): Likewise. If + true and TARGET_AVX512F or TARGET_AVX, push 3 or 2 sizes even if + preferred vector size is not 512-bit or 256-bit, just put those + unpreferred ones last. + 2019-05-20 Martin Liska * targhooks.c (default_libc_has_fast_function): New function. diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index e82cd7a034a..2f8ab106d03 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -174,6 +174,9 @@ struct GTY ((chain_next ("%h.next"))) loop { of the loop can be safely evaluated concurrently. */ int safelen; + /* Preferred vectorization factor for the loop if non-zero. */ + int simdlen; + /* Constraints are generally set by consumers and affect certain semantics of niter analyzer APIs. Currently the APIs affected are number_of_iterations_exit* functions and their callers. One typical diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c index bfee48ed8cc..50250ec4d7c 100644 --- a/gcc/cfgloopmanip.c +++ b/gcc/cfgloopmanip.c @@ -1016,6 +1016,7 @@ copy_loop_info (struct loop *loop, struct loop *target) target->nb_iterations_estimate = loop->nb_iterations_estimate; target->estimate_state = loop->estimate_state; target->safelen = loop->safelen; + target->simdlen = loop->simdlen; target->constraints = loop->constraints; target->can_be_parallel = loop->can_be_parallel; target->warned_aggressive_loop_optimizations diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 971c4d0cf43..8a290dcd904 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -14109,7 +14109,7 @@ aarch64_preferred_simd_mode (scalar_mode mode) /* Return a list of possible vector sizes for the vectorizer to iterate over. */ static void -aarch64_autovectorize_vector_sizes (vector_sizes *sizes) +aarch64_autovectorize_vector_sizes (vector_sizes *sizes, bool) { if (TARGET_SVE) sizes->safe_push (BYTES_PER_SVE_VECTOR); diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c index 1633d01898d..bce189958bc 100644 --- a/gcc/config/arc/arc.c +++ b/gcc/config/arc/arc.c @@ -480,7 +480,7 @@ arc_preferred_simd_mode (scalar_mode mode) TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES. */ static void -arc_autovectorize_vector_sizes (vector_sizes *sizes) +arc_autovectorize_vector_sizes (vector_sizes *sizes, bool) { if (TARGET_PLUS_QMACW) { diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 1d3be265b94..e3e71eaf2f6 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -288,7 +288,7 @@ static bool arm_builtin_support_vector_misalignment (machine_mode mode, static void arm_conditional_register_usage (void); static enum flt_eval_method arm_excess_precision (enum excess_precision_type); static reg_class_t arm_preferred_rename_class (reg_class_t rclass); -static void arm_autovectorize_vector_sizes (vector_sizes *); +static void arm_autovectorize_vector_sizes (vector_sizes *, bool); static int arm_default_branch_cost (bool, bool); static int arm_cortex_a5_branch_cost (bool, bool); static int arm_cortex_m_branch_cost (bool, bool); @@ -28351,7 +28351,7 @@ arm_vector_alignment (const_tree type) } static void -arm_autovectorize_vector_sizes (vector_sizes *sizes) +arm_autovectorize_vector_sizes (vector_sizes *sizes, bool) { if (!TARGET_NEON_VECTORIZE_DOUBLE) { diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 384c63385fd..696a4745303 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -21332,7 +21332,7 @@ ix86_preferred_simd_mode (scalar_mode mode) 256bit and 128bit vectors. */ static void -ix86_autovectorize_vector_sizes (vector_sizes *sizes) +ix86_autovectorize_vector_sizes (vector_sizes *sizes, bool all) { if (TARGET_AVX512F && !TARGET_PREFER_AVX256) { @@ -21340,11 +21340,22 @@ ix86_autovectorize_vector_sizes (vector_sizes *sizes) sizes->safe_push (32); sizes->safe_push (16); } + else if (TARGET_AVX512F && all) + { + sizes->safe_push (32); + sizes->safe_push (16); + sizes->safe_push (64); + } else if (TARGET_AVX && !TARGET_PREFER_AVX128) { sizes->safe_push (32); sizes->safe_push (16); } + else if (TARGET_AVX && all) + { + sizes->safe_push (16); + sizes->safe_push (32); + } } /* Implemenation of targetm.vectorize.get_mask_mode. */ diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index 42cafed84ab..6eafe3df3c1 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -13460,7 +13460,7 @@ mips_preferred_simd_mode (scalar_mode mode) /* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES. */ static void -mips_autovectorize_vector_sizes (vector_sizes *sizes) +mips_autovectorize_vector_sizes (vector_sizes *sizes, bool) { if (ISA_HAS_MSA) sizes->safe_push (16); diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 0941039536b..622e8cf240f 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -6021,11 +6021,13 @@ against lower halves of vectors recursively until the specified mode is reached. The default is @var{mode} which means no splitting. @end deftypefn -@deftypefn {Target Hook} void TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES (vector_sizes *@var{sizes}) +@deftypefn {Target Hook} void TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES (vector_sizes *@var{sizes}, bool @var{all}) If the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is not the only one that is worth considering, this hook should add all suitable vector sizes to @var{sizes}, in order of decreasing preference. The first one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}. +If @var{all} is true, add suitable vector sizes even when they are generally +not expected to be worthwhile. The hook does not need to do anything if the vector returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is the only one relevant diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c index 74159734fc8..0d7f104a2f2 100644 --- a/gcc/omp-expand.c +++ b/gcc/omp-expand.c @@ -4974,6 +4974,13 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) && loop->safelen > 1) { loop->force_vectorize = true; + if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))) + { + unsigned HOST_WIDE_INT v + = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)); + if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen) + loop->simdlen = v; + } cfun->has_force_vectorize_loops = true; } else if (dont_vectorize) diff --git a/gcc/omp-general.c b/gcc/omp-general.c index 82f0a04eab0..4a9b15ccde4 100644 --- a/gcc/omp-general.c +++ b/gcc/omp-general.c @@ -469,7 +469,7 @@ omp_max_vf (void) return 1; auto_vector_sizes sizes; - targetm.vectorize.autovectorize_vector_sizes (&sizes); + targetm.vectorize.autovectorize_vector_sizes (&sizes, true); if (!sizes.is_empty ()) { poly_uint64 vf = 0; diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 04fc5f6aaa8..26ee70db0b3 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -3600,7 +3600,7 @@ omp_clause_aligned_alignment (tree clause) unsigned int al = 1; opt_scalar_mode mode_iter; auto_vector_sizes sizes; - targetm.vectorize.autovectorize_vector_sizes (&sizes); + targetm.vectorize.autovectorize_vector_sizes (&sizes, true); poly_uint64 vs = 0; for (unsigned int i = 0; i < sizes.length (); ++i) vs = ordered_max (vs, sizes[i]); diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c index 71c73fb43cc..04c8d08115b 100644 --- a/gcc/optabs-query.c +++ b/gcc/optabs-query.c @@ -593,7 +593,7 @@ can_vec_mask_load_store_p (machine_mode mode, return true; auto_vector_sizes vector_sizes; - targetm.vectorize.autovectorize_vector_sizes (&vector_sizes); + targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, true); for (unsigned int i = 0; i < vector_sizes.length (); ++i) { poly_uint64 cur = vector_sizes[i]; diff --git a/gcc/target.def b/gcc/target.def index 23e260cb535..7d52102c815 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1899,12 +1899,14 @@ DEFHOOK the only one that is worth considering, this hook should add all suitable\n\ vector sizes to @var{sizes}, in order of decreasing preference. The first\n\ one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.\n\ +If @var{all} is true, add suitable vector sizes even when they are generally\n\ +not expected to be worthwhile.\n\ \n\ The hook does not need to do anything if the vector returned by\n\ @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is the only one relevant\n\ for autovectorization. The default implementation does nothing.", void, - (vector_sizes *sizes), + (vector_sizes *sizes, bool all), default_autovectorize_vector_sizes) /* Function to get a target mode for a vector mask. */ diff --git a/gcc/targhooks.c b/gcc/targhooks.c index d820618b8fe..b27111639f4 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -1316,7 +1316,7 @@ default_split_reduction (machine_mode mode) is tried. */ void -default_autovectorize_vector_sizes (vector_sizes *) +default_autovectorize_vector_sizes (vector_sizes *, bool) { } diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 810c2b1f065..229aacd7b65 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -110,7 +110,7 @@ default_builtin_support_vector_misalignment (machine_mode mode, int, bool); extern machine_mode default_preferred_simd_mode (scalar_mode mode); extern machine_mode default_split_reduction (machine_mode); -extern void default_autovectorize_vector_sizes (vector_sizes *); +extern void default_autovectorize_vector_sizes (vector_sizes *, bool); extern opt_machine_mode default_get_mask_mode (poly_uint64, poly_uint64); extern bool default_empty_mask_is_expensive (unsigned); extern void *default_init_cost (struct loop *); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9b1e6d3a413..3ecff36b7b5 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2019-05-20 Jakub Jelinek + + * gcc.target/i386/avx512f-simd-1.c: New test. + 2019-05-20 Christophe Lyon * gcc.target/aarch64/target_attr_10.c: Add quotes to expected diff --git a/gcc/testsuite/gcc.target/i386/avx512f-simd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-simd-1.c new file mode 100644 index 00000000000..235fb917e17 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-simd-1.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-fopenmp-simd -O2 -mavx512f -masm=att" } */ +/* { dg-final { scan-assembler "vpadd\[^\n\r]*%xmm" } } */ +/* { dg-final { scan-assembler "vpadd\[^\n\r]*%ymm" } } */ +/* { dg-final { scan-assembler "vpadd\[^\n\r]*%zmm" } } */ + +#define N 1024 +int a[N]; + +void +f1 (void) +{ + int i; + #pragma omp simd simdlen (4) + for (i = 0; i < N; ++i) + a[i] = a[i] + 1; +} + +void +f2 (void) +{ + int i; + #pragma omp simd simdlen (8) + for (i = 0; i < N; ++i) + a[i] = a[i] + 2; +} + +void +f3 (void) +{ + int i; + #pragma omp simd simdlen (16) + for (i = 0; i < N; ++i) + a[i] = a[i] + 3; +} diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 57764176df0..e1229a51c48 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -2254,7 +2254,8 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo, /* Autodetect first vector size we try. */ current_vector_size = 0; - targetm.vectorize.autovectorize_vector_sizes (&vector_sizes); + targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, + loop->simdlen != 0); unsigned int next_size = 0; DUMP_VECT_SCOPE ("analyze_loop_nest"); @@ -2273,6 +2274,8 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo, unsigned n_stmts = 0; poly_uint64 autodetected_vector_size = 0; + opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL); + poly_uint64 first_vector_size = 0; while (1) { /* Check the CFG characteristics of the loop (nesting, entry/exit). */ @@ -2283,6 +2286,7 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo, if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "bad loop form.\n"); + gcc_checking_assert (first_loop_vinfo == NULL); return loop_vinfo; } @@ -2296,10 +2300,27 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo, { LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; - return loop_vinfo; + if (loop->simdlen + && maybe_ne (LOOP_VINFO_VECT_FACTOR (loop_vinfo), + (unsigned HOST_WIDE_INT) loop->simdlen)) + { + if (first_loop_vinfo == NULL) + { + first_loop_vinfo = loop_vinfo; + first_vector_size = current_vector_size; + loop->aux = NULL; + } + else + delete loop_vinfo; + } + else + { + delete first_loop_vinfo; + return loop_vinfo; + } } - - delete loop_vinfo; + else + delete loop_vinfo; if (next_size == 0) autodetected_vector_size = current_vector_size; @@ -2308,10 +2329,31 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo, && known_eq (vector_sizes[next_size], autodetected_vector_size)) next_size += 1; - if (fatal - || next_size == vector_sizes.length () + if (fatal) + { + gcc_checking_assert (first_loop_vinfo == NULL); + return opt_loop_vec_info::propagate_failure (res); + } + + if (next_size == vector_sizes.length () || known_eq (current_vector_size, 0U)) - return opt_loop_vec_info::propagate_failure (res); + { + if (first_loop_vinfo) + { + current_vector_size = first_vector_size; + loop->aux = (loop_vec_info) first_loop_vinfo; + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "***** Choosing vector size "); + dump_dec (MSG_NOTE, current_vector_size); + dump_printf (MSG_NOTE, "\n"); + } + return first_loop_vinfo; + } + else + return opt_loop_vec_info::propagate_failure (res); + } /* Try the next biggest vector size. */ current_vector_size = vector_sizes[next_size++]; @@ -8670,7 +8712,7 @@ vect_transform_loop (loop_vec_info loop_vinfo) if (epilogue) { auto_vector_sizes vector_sizes; - targetm.vectorize.autovectorize_vector_sizes (&vector_sizes); + targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, false); unsigned int next_size = 0; /* Note LOOP_VINFO_NITERS_KNOWN_P and LOOP_VINFO_INT_NITERS work diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 52c7b47d809..2810228f9a5 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2983,7 +2983,7 @@ vect_slp_bb (basic_block bb) /* Autodetect first vector size we try. */ current_vector_size = 0; - targetm.vectorize.autovectorize_vector_sizes (&vector_sizes); + targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, false); unsigned int next_size = 0; gsi = gsi_start_bb (bb); -- 2.30.2