From 76a34e3f8565e36d164006e62f7380bfe6057154 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Sat, 13 Jan 2018 17:59:40 +0000 Subject: [PATCH] Add an empty_mask_is_expensive hook This patch adds a hook to control whether we avoid executing masked (predicated) stores when the mask is all false. We don't want to do that by default for SVE. 2018-01-13 Richard Sandiford Alan Hayward David Sherwood gcc/ * target.def (empty_mask_is_expensive): New hook. * doc/tm.texi.in (TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE): New hook. * doc/tm.texi: Regenerate. * targhooks.h (default_empty_mask_is_expensive): Declare. * targhooks.c (default_empty_mask_is_expensive): New function. * tree-vectorizer.c (vectorize_loops): Only call optimize_mask_stores if the target says that empty masks are expensive. * config/aarch64/aarch64.c (aarch64_empty_mask_is_expensive): New function. (TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE): Redefine. Co-Authored-By: Alan Hayward Co-Authored-By: David Sherwood From-SVN: r256631 --- gcc/ChangeLog | 15 +++++++++++++++ gcc/config/aarch64/aarch64.c | 13 +++++++++++++ gcc/doc/tm.texi | 6 ++++++ gcc/doc/tm.texi.in | 2 ++ gcc/target.def | 11 +++++++++++ gcc/targhooks.c | 8 ++++++++ gcc/targhooks.h | 1 + gcc/tree-vectorizer.c | 3 ++- 8 files changed, 58 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c4cbe9d89e8..1b52ec1f2cb 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2018-01-13 Richard Sandiford + Alan Hayward + David Sherwood + + * target.def (empty_mask_is_expensive): New hook. + * doc/tm.texi.in (TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE): New hook. + * doc/tm.texi: Regenerate. + * targhooks.h (default_empty_mask_is_expensive): Declare. + * targhooks.c (default_empty_mask_is_expensive): New function. + * tree-vectorizer.c (vectorize_loops): Only call optimize_mask_stores + if the target says that empty masks are expensive. + * config/aarch64/aarch64.c (aarch64_empty_mask_is_expensive): + New function. + (TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE): Redefine. + 2018-01-13 Richard Sandiford Alan Hayward David Sherwood diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 964cc540d85..6d6294a3c39 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -16875,6 +16875,16 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load, return true; } +/* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that + it isn't worth branching around empty masked ops (including masked + stores). */ + +static bool +aarch64_empty_mask_is_expensive (unsigned) +{ + return false; +} + /* Return 1 if pseudo register should be created and used to hold GOT address for PIC code. */ @@ -17499,6 +17509,9 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_VECTORIZE_GET_MASK_MODE #define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode +#undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE +#define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE \ + aarch64_empty_mask_is_expensive #undef TARGET_INIT_LIBFUNCS #define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 25b0a1b2123..faf7b8b7a9c 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5924,6 +5924,12 @@ is @var{length} bytes long and that contains @var{nunits} elements, if such a mode exists. @end deftypefn +@deftypefn {Target Hook} bool TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE (unsigned @var{ifn}) +This hook returns true if masked internal function @var{ifn} (really of +type @code{internal_fn}) should be considered expensive when the mask is +all zeros. GCC can then try to branch around the instruction instead. +@end deftypefn + @deftypefn {Target Hook} {void *} TARGET_VECTORIZE_INIT_COST (struct loop *@var{loop_info}) This hook should initialize target-specific data structures in preparation for modeling the costs of vectorizing a loop or basic block. The default allocates three unsigned integers for accumulating costs for the prologue, body, and epilogue of the loop or basic block. If @var{loop_info} is non-NULL, it identifies the loop being vectorized; otherwise a single block is being vectorized. @end deftypefn diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index b0ac8b2fd8f..84bd973c164 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4108,6 +4108,8 @@ address; but often a machine-dependent strategy can generate better code. @hook TARGET_VECTORIZE_GET_MASK_MODE +@hook TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE + @hook TARGET_VECTORIZE_INIT_COST @hook TARGET_VECTORIZE_ADD_STMT_COST diff --git a/gcc/target.def b/gcc/target.def index 783ac999809..94a0ad4dca9 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1933,6 +1933,17 @@ if such a mode exists.", (poly_uint64 nunits, poly_uint64 length), default_get_mask_mode) +/* Function to say whether a masked operation is expensive when the + mask is all zeros. */ +DEFHOOK +(empty_mask_is_expensive, + "This hook returns true if masked internal function @var{ifn} (really of\n\ +type @code{internal_fn}) should be considered expensive when the mask is\n\ +all zeros. GCC can then try to branch around the instruction instead.", + bool, + (unsigned ifn), + default_empty_mask_is_expensive) + /* Target builtin that implements vector gather operation. */ DEFHOOK (builtin_gather, diff --git a/gcc/targhooks.c b/gcc/targhooks.c index 5b60944a071..d83076fdff8 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -1319,6 +1319,14 @@ default_get_mask_mode (poly_uint64 nunits, poly_uint64 vector_size) return opt_machine_mode (); } +/* By default consider masked stores to be expensive. */ + +bool +default_empty_mask_is_expensive (unsigned ifn) +{ + return ifn == IFN_MASK_STORE; +} + /* By default, the cost model accumulates three separate costs (prologue, loop body, and epilogue) for a vectorized loop or block. So allocate an array of three unsigned ints, set it to zero, and return its address. */ diff --git a/gcc/targhooks.h b/gcc/targhooks.h index f55fde773d1..eca35d2b1ac 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -111,6 +111,7 @@ extern machine_mode default_preferred_simd_mode (scalar_mode mode); extern machine_mode default_split_reduction (machine_mode); extern void default_autovectorize_vector_sizes (vector_sizes *); extern opt_machine_mode default_get_mask_mode (poly_uint64, poly_uint64); +extern bool default_empty_mask_is_expensive (unsigned); extern void *default_init_cost (struct loop *); extern unsigned default_add_stmt_cost (void *, int, enum vect_cost_for_stmt, struct _stmt_vec_info *, int, diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index a0cc2d6bddb..fb81b986529 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -826,7 +826,8 @@ vectorize_loops (void) if (loop_vinfo) has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo); delete loop_vinfo; - if (has_mask_store) + if (has_mask_store + && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE)) optimize_mask_stores (loop); loop->aux = NULL; } -- 2.30.2