From e0bd6c9f0aa67d88bbb20019362a4572fc5fac3c Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 12 Sep 2017 13:27:55 +0000 Subject: [PATCH] Turn SLOW_UNALIGNED_ACCESS into a target hook 2017-09-12 Richard Sandiford Alan Hayward David Sherwood gcc/ * defaults.h (SLOW_UNALIGNED_ACCESS): Delete. * target.def (slow_unaligned_access): New hook. * targhooks.h (default_slow_unaligned_access): Declare. * targhooks.c (default_slow_unaligned_access): New function. * doc/tm.texi.in (SLOW_UNALIGNED_ACCESS): Replace with... (TARGET_SLOW_UNALIGNED_ACCESS): ...this. * doc/tm.texi: Regenerate. * config/alpha/alpha.h (SLOW_UNALIGNED_ACCESS): Delete. * config/arm/arm.h (SLOW_UNALIGNED_ACCESS): Delete. * config/i386/i386.h (SLOW_UNALIGNED_ACCESS): Delete commented-out definition. * config/powerpcspe/powerpcspe.h (SLOW_UNALIGNED_ACCESS): Delete. * config/powerpcspe/powerpcspe.c (TARGET_SLOW_UNALIGNED_ACCESS): Redefine. (rs6000_slow_unaligned_access): New function. (rs6000_emit_move): Use it instead of SLOW_UNALIGNED_ACCESS. (expand_block_compare): Likewise. (expand_strn_compare): Likewise. (rs6000_rtx_costs): Likewise. * config/riscv/riscv.h (SLOW_UNALIGNED_ACCESS): Delete. (riscv_slow_unaligned_access): Likewise. * config/riscv/riscv.c (riscv_slow_unaligned_access): Rename to... (riscv_slow_unaligned_access_p): ...this and make static. (riscv_option_override): Update accordingly. (riscv_slow_unaligned_access): New function. (TARGET_SLOW_UNALIGNED_ACCESS): Redefine. * config/rs6000/rs6000.h (SLOW_UNALIGNED_ACCESS): Delete. * config/rs6000/rs6000.c (TARGET_SLOW_UNALIGNED_ACCESS): Redefine. (rs6000_slow_unaligned_access): New function. (rs6000_emit_move): Use it instead of SLOW_UNALIGNED_ACCESS. (rs6000_rtx_costs): Likewise. * config/rs6000/rs6000-string.c (expand_block_compare) (expand_strn_compare): Use targetm.slow_unaligned_access instead of SLOW_UNALIGNED_ACCESS. * config/tilegx/tilegx.h (SLOW_UNALIGNED_ACCESS): Delete. * config/tilepro/tilepro.h (SLOW_UNALIGNED_ACCESS): Delete. * calls.c (expand_call): Use targetm.slow_unaligned_access instead of SLOW_UNALIGNED_ACCESS. * expmed.c (simple_mem_bitfield_p): Likewise. * expr.c (alignment_for_piecewise_move): Likewise. (emit_group_load_1): Likewise. (emit_group_store): Likewise. (copy_blkmode_from_reg): Likewise. (emit_push_insn): Likewise. (expand_assignment): Likewise. (store_field): Likewise. (expand_expr_real_1): Likewise. * gimple-fold.c (gimple_fold_builtin_memory_op): Likewise. * lra-constraints.c (simplify_operand_subreg): Likewise. * stor-layout.c (bit_field_mode_iterator::next_mode): Likewise. * gimple-ssa-store-merging.c: Likewise in block comment at start of file. * tree-ssa-strlen.c: Include target.h. (handle_builtin_memcmp): Use targetm.slow_unaligned_access instead of SLOW_UNALIGNED_ACCESS. * system.h (SLOW_UNALIGNED_ACCESS): Poison. Co-Authored-By: Alan Hayward Co-Authored-By: David Sherwood From-SVN: r252009 --- gcc/ChangeLog | 61 ++++++++++++++++++++++++++++++ gcc/calls.c | 4 +- gcc/config/alpha/alpha.h | 6 --- gcc/config/arm/arm.h | 2 - gcc/config/i386/i386.h | 14 ------- gcc/config/powerpcspe/powerpcspe.c | 47 ++++++++++++++++------- gcc/config/powerpcspe/powerpcspe.h | 14 ------- gcc/config/riscv/riscv.c | 17 +++++++-- gcc/config/riscv/riscv.h | 3 -- gcc/config/rs6000/rs6000-string.c | 13 ++++--- gcc/config/rs6000/rs6000.c | 34 +++++++++++++---- gcc/config/rs6000/rs6000.h | 13 ------- gcc/config/tilegx/tilegx.h | 3 -- gcc/config/tilepro/tilepro.h | 3 -- gcc/defaults.h | 4 -- gcc/doc/tm.texi | 23 ++++++----- gcc/doc/tm.texi.in | 18 +-------- gcc/expmed.c | 2 +- gcc/expr.c | 19 +++++----- gcc/gimple-fold.c | 4 +- gcc/gimple-ssa-store-merging.c | 2 +- gcc/lra-constraints.c | 5 ++- gcc/stor-layout.c | 2 +- gcc/system.h | 2 +- gcc/target.def | 19 ++++++++++ gcc/targhooks.c | 8 ++++ gcc/targhooks.h | 1 + gcc/tree-ssa-strlen.c | 3 +- 28 files changed, 204 insertions(+), 142 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 267eabf5895..ce59e7a2ba9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,64 @@ +2017-09-12 Richard Sandiford + Alan Hayward + David Sherwood + + * defaults.h (SLOW_UNALIGNED_ACCESS): Delete. + * target.def (slow_unaligned_access): New hook. + * targhooks.h (default_slow_unaligned_access): Declare. + * targhooks.c (default_slow_unaligned_access): New function. + * doc/tm.texi.in (SLOW_UNALIGNED_ACCESS): Replace with... + (TARGET_SLOW_UNALIGNED_ACCESS): ...this. + * doc/tm.texi: Regenerate. + * config/alpha/alpha.h (SLOW_UNALIGNED_ACCESS): Delete. + * config/arm/arm.h (SLOW_UNALIGNED_ACCESS): Delete. + * config/i386/i386.h (SLOW_UNALIGNED_ACCESS): Delete commented-out + definition. + * config/powerpcspe/powerpcspe.h (SLOW_UNALIGNED_ACCESS): Delete. + * config/powerpcspe/powerpcspe.c (TARGET_SLOW_UNALIGNED_ACCESS): + Redefine. + (rs6000_slow_unaligned_access): New function. + (rs6000_emit_move): Use it instead of SLOW_UNALIGNED_ACCESS. + (expand_block_compare): Likewise. + (expand_strn_compare): Likewise. + (rs6000_rtx_costs): Likewise. + * config/riscv/riscv.h (SLOW_UNALIGNED_ACCESS): Delete. + (riscv_slow_unaligned_access): Likewise. + * config/riscv/riscv.c (riscv_slow_unaligned_access): Rename to... + (riscv_slow_unaligned_access_p): ...this and make static. + (riscv_option_override): Update accordingly. + (riscv_slow_unaligned_access): New function. + (TARGET_SLOW_UNALIGNED_ACCESS): Redefine. + * config/rs6000/rs6000.h (SLOW_UNALIGNED_ACCESS): Delete. + * config/rs6000/rs6000.c (TARGET_SLOW_UNALIGNED_ACCESS): Redefine. + (rs6000_slow_unaligned_access): New function. + (rs6000_emit_move): Use it instead of SLOW_UNALIGNED_ACCESS. + (rs6000_rtx_costs): Likewise. + * config/rs6000/rs6000-string.c (expand_block_compare) + (expand_strn_compare): Use targetm.slow_unaligned_access instead + of SLOW_UNALIGNED_ACCESS. + * config/tilegx/tilegx.h (SLOW_UNALIGNED_ACCESS): Delete. + * config/tilepro/tilepro.h (SLOW_UNALIGNED_ACCESS): Delete. + * calls.c (expand_call): Use targetm.slow_unaligned_access instead + of SLOW_UNALIGNED_ACCESS. + * expmed.c (simple_mem_bitfield_p): Likewise. + * expr.c (alignment_for_piecewise_move): Likewise. + (emit_group_load_1): Likewise. + (emit_group_store): Likewise. + (copy_blkmode_from_reg): Likewise. + (emit_push_insn): Likewise. + (expand_assignment): Likewise. + (store_field): Likewise. + (expand_expr_real_1): Likewise. + * gimple-fold.c (gimple_fold_builtin_memory_op): Likewise. + * lra-constraints.c (simplify_operand_subreg): Likewise. + * stor-layout.c (bit_field_mode_iterator::next_mode): Likewise. + * gimple-ssa-store-merging.c: Likewise in block comment at start + of file. + * tree-ssa-strlen.c: Include target.h. + (handle_builtin_memcmp): Use targetm.slow_unaligned_access instead + of SLOW_UNALIGNED_ACCESS. + * system.h (SLOW_UNALIGNED_ACCESS): Poison. + 2017-09-12 Richard Sandiford PR rtl-optimization/82185 diff --git a/gcc/calls.c b/gcc/calls.c index a4271bb6ab7..f55e89882de 100644 --- a/gcc/calls.c +++ b/gcc/calls.c @@ -3135,8 +3135,8 @@ expand_call (tree exp, rtx target, int ignore) && target && MEM_P (target) && !(MEM_ALIGN (target) < TYPE_ALIGN (rettype) - && SLOW_UNALIGNED_ACCESS (TYPE_MODE (rettype), - MEM_ALIGN (target)))) + && targetm.slow_unaligned_access (TYPE_MODE (rettype), + MEM_ALIGN (target)))) structure_value_addr = XEXP (target, 0); else { diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h index 8bca5b3408b..7d81ac4cf3c 100644 --- a/gcc/config/alpha/alpha.h +++ b/gcc/config/alpha/alpha.h @@ -300,12 +300,6 @@ extern enum alpha_fp_trap_mode alpha_fptm; #define STRICT_ALIGNMENT 1 -/* Set this nonzero if unaligned move instructions are extremely slow. - - On the Alpha, they trap. */ - -#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1 - /* Standard register usage. */ /* Number of actual hardware registers. diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 5fdb65be646..bef66027687 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -1917,8 +1917,6 @@ enum arm_auto_incmodes /* Nonzero if access to memory by bytes is slow and undesirable. */ #define SLOW_BYTE_ACCESS 0 -#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1 - /* Immediate shift counts are truncated by the output routines (or was it the assembler?). Shift counts in a register are truncated by ARM. Note that the native compiler puts too large (> 32) immediate shift counts diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 873122caa2b..e8ed8976f44 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2017,20 +2017,6 @@ do { \ /* Nonzero if access to memory by shorts is slow and undesirable. */ #define SLOW_SHORT_ACCESS 0 -/* Define this macro to be the value 1 if unaligned accesses have a - cost many times greater than aligned accesses, for example if they - are emulated in a trap handler. - - When this macro is nonzero, the compiler will act as if - `STRICT_ALIGNMENT' were nonzero when generating code for block - moves. This can cause significantly more instructions to be - produced. Therefore, do not set this macro nonzero if unaligned - accesses only add a cycle or two to the time for a memory access. - - If the value of this macro is always zero, it need not be defined. */ - -/* #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 0 */ - /* Define this macro if it is as good or better to call a constant function address than to call an address kept in a register. diff --git a/gcc/config/powerpcspe/powerpcspe.c b/gcc/config/powerpcspe/powerpcspe.c index b964e6ed776..446a8bbe1ea 100644 --- a/gcc/config/powerpcspe/powerpcspe.c +++ b/gcc/config/powerpcspe/powerpcspe.c @@ -1986,6 +1986,9 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \ rs6000_hard_regno_call_part_clobbered + +#undef TARGET_SLOW_UNALIGNED_ACCESS +#define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access /* Processor table. */ @@ -8366,6 +8369,21 @@ rs6000_data_alignment (tree type, unsigned int align, enum data_align how) return align; } +/* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory + instructions simply ignore the low bits; SPE vector memory + instructions trap on unaligned accesses; VSX memory instructions are + aligned to 4 or 8 bytes. */ + +static bool +rs6000_slow_unaligned_access (machine_mode mode, unsigned int align) +{ + return (STRICT_ALIGNMENT + || (!TARGET_EFFICIENT_UNALIGNED_VSX + && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32) + || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode)) + && (int) align < VECTOR_ALIGN (mode))))); +} + /* Previous GCC releases forced all vector types to have 16-byte alignment. */ bool @@ -11015,13 +11033,14 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode) if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM && mode == DImode - && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0])) - || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1]))) - && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32 - ? 32 : MEM_ALIGN (operands[0]))) - || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32 - ? 32 - : MEM_ALIGN (operands[1])))) + && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0])) + || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1]))) + && ! (rs6000_slow_unaligned_access (SImode, + (MEM_ALIGN (operands[0]) > 32 + ? 32 : MEM_ALIGN (operands[0]))) + || rs6000_slow_unaligned_access (SImode, + (MEM_ALIGN (operands[1]) > 32 + ? 32 : MEM_ALIGN (operands[1])))) && ! MEM_VOLATILE_P (operands [0]) && ! MEM_VOLATILE_P (operands [1])) { @@ -19989,9 +20008,9 @@ expand_block_compare (rtx operands[]) unsigned int base_align = UINTVAL (align_rtx) / BITS_PER_UNIT; - /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */ - if (SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src1)) - || SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src2))) + /* rs6000_slow_unaligned_access -- don't do unaligned stuff. */ + if (rs6000_slow_unaligned_access (word_mode, MEM_ALIGN (orig_src1)) + || rs6000_slow_unaligned_access (word_mode, MEM_ALIGN (orig_src2))) return false; gcc_assert (GET_MODE (target) == SImode); @@ -20380,9 +20399,9 @@ expand_strn_compare (rtx operands[], int no_length) int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT; int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT; - /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */ - if (SLOW_UNALIGNED_ACCESS (word_mode, align1) - || SLOW_UNALIGNED_ACCESS (word_mode, align2)) + /* rs6000_slow_unaligned_access -- don't do unaligned stuff. */ + if (rs6000_slow_unaligned_access (word_mode, align1) + || rs6000_slow_unaligned_access (word_mode, align2)) return false; gcc_assert (GET_MODE (target) == SImode); @@ -37439,7 +37458,7 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code, than generating address, e.g., (plus (reg) (const)). L1 cache latency is about two instructions. */ *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2); - if (SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (x))) + if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x))) *total += COSTS_N_INSNS (100); return true; diff --git a/gcc/config/powerpcspe/powerpcspe.h b/gcc/config/powerpcspe/powerpcspe.h index 548e615ab37..a3b234706fa 100644 --- a/gcc/config/powerpcspe/powerpcspe.h +++ b/gcc/config/powerpcspe/powerpcspe.h @@ -998,20 +998,6 @@ enum data_align { align_abi, align_opt, align_both }; /* Nonzero if move instructions will actually fail to work when given unaligned data. */ #define STRICT_ALIGNMENT 0 - -/* Define this macro to be the value 1 if unaligned accesses have a cost - many times greater than aligned accesses, for example if they are - emulated in a trap handler. */ -/* Altivec vector memory instructions simply ignore the low bits; SPE vector - memory instructions trap on unaligned accesses; VSX memory instructions are - aligned to 4 or 8 bytes. */ -#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) \ - (STRICT_ALIGNMENT \ - || (!TARGET_EFFICIENT_UNALIGNED_VSX \ - && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) && (ALIGN) < 32) \ - || ((VECTOR_MODE_P (MODE) || FLOAT128_VECTOR_P (MODE)) \ - && (int) (ALIGN) < VECTOR_ALIGN (MODE))))) - /* Standard register usage. */ diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c index 62472e934ff..0e440f7a0d3 100644 --- a/gcc/config/riscv/riscv.c +++ b/gcc/config/riscv/riscv.c @@ -217,7 +217,7 @@ struct riscv_cpu_info { /* Global variables for machine-dependent things. */ /* Whether unaligned accesses execute very slowly. */ -bool riscv_slow_unaligned_access; +static bool riscv_slow_unaligned_access_p; /* Which tuning parameters to use. */ static const struct riscv_tune_info *tune_info; @@ -3744,8 +3744,8 @@ riscv_option_override (void) /* Use -mtune's setting for slow_unaligned_access, even when optimizing for size. For architectures that trap and emulate unaligned accesses, the performance cost is too great, even for -Os. */ - riscv_slow_unaligned_access = (cpu->tune_info->slow_unaligned_access - || TARGET_STRICT_ALIGN); + riscv_slow_unaligned_access_p = (cpu->tune_info->slow_unaligned_access + || TARGET_STRICT_ALIGN); /* If the user hasn't specified a branch cost, use the processor's default. */ @@ -3966,6 +3966,14 @@ riscv_cannot_copy_insn_p (rtx_insn *insn) return recog_memoized (insn) >= 0 && get_attr_cannot_copy (insn); } +/* Implement TARGET_SLOW_UNALIGNED_ACCESS. */ + +static bool +riscv_slow_unaligned_access (machine_mode, unsigned int) +{ + return riscv_slow_unaligned_access_p; +} + /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" @@ -4102,6 +4110,9 @@ riscv_cannot_copy_insn_p (rtx_insn *insn) #undef TARGET_MODES_TIEABLE_P #define TARGET_MODES_TIEABLE_P riscv_modes_tieable_p +#undef TARGET_SLOW_UNALIGNED_ACCESS +#define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-riscv.h" diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index c0cf8c37b86..d851fd861a2 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -130,8 +130,6 @@ along with GCC; see the file COPYING3. If not see of the privileged architecture. */ #define STRICT_ALIGNMENT TARGET_STRICT_ALIGN -#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) riscv_slow_unaligned_access - /* Define this if you wish to imitate the way many other C compilers handle alignment of bitfields and the structures that contain them. @@ -854,7 +852,6 @@ while (0) #ifndef USED_FOR_TARGET extern const enum reg_class riscv_regno_to_class[]; -extern bool riscv_slow_unaligned_access; #endif #define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \ diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c index fb57ab3d4cf..19463c98687 100644 --- a/gcc/config/rs6000/rs6000-string.c +++ b/gcc/config/rs6000/rs6000-string.c @@ -32,6 +32,7 @@ #include "explow.h" #include "expr.h" #include "output.h" +#include "target.h" /* Expand a block clear operation, and return 1 if successful. Return 0 if we should let the compiler generate normal code. @@ -338,9 +339,9 @@ expand_block_compare (rtx operands[]) unsigned int base_align = UINTVAL (align_rtx) / BITS_PER_UNIT; - /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */ - if (SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src1)) - || SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src2))) + /* targetm.slow_unaligned_access -- don't do unaligned stuff. */ + if (targetm.slow_unaligned_access (word_mode, MEM_ALIGN (orig_src1)) + || targetm.slow_unaligned_access (word_mode, MEM_ALIGN (orig_src2))) return false; gcc_assert (GET_MODE (target) == SImode); @@ -729,9 +730,9 @@ expand_strn_compare (rtx operands[], int no_length) int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT; int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT; - /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */ - if (SLOW_UNALIGNED_ACCESS (word_mode, align1) - || SLOW_UNALIGNED_ACCESS (word_mode, align2)) + /* targetm.slow_unaligned_access -- don't do unaligned stuff. */ + if (targetm.slow_unaligned_access (word_mode, align1) + || targetm.slow_unaligned_access (word_mode, align2)) return false; gcc_assert (GET_MODE (target) == SImode); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index ecdf776b986..2ff7e1e307d 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1976,6 +1976,9 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \ rs6000_hard_regno_call_part_clobbered + +#undef TARGET_SLOW_UNALIGNED_ACCESS +#define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access /* Processor table. */ @@ -7902,6 +7905,20 @@ rs6000_data_alignment (tree type, unsigned int align, enum data_align how) return align; } +/* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory + instructions simply ignore the low bits; VSX memory instructions + are aligned to 4 or 8 bytes. */ + +static bool +rs6000_slow_unaligned_access (machine_mode mode, unsigned int align) +{ + return (STRICT_ALIGNMENT + || (!TARGET_EFFICIENT_UNALIGNED_VSX + && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32) + || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode)) + && (int) align < VECTOR_ALIGN (mode))))); +} + /* Previous GCC releases forced all vector types to have 16-byte alignment. */ bool @@ -10500,13 +10517,14 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode) if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM && mode == DImode - && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0])) - || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1]))) - && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32 - ? 32 : MEM_ALIGN (operands[0]))) - || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32 - ? 32 - : MEM_ALIGN (operands[1])))) + && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0])) + || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1]))) + && ! (rs6000_slow_unaligned_access (SImode, + (MEM_ALIGN (operands[0]) > 32 + ? 32 : MEM_ALIGN (operands[0]))) + || rs6000_slow_unaligned_access (SImode, + (MEM_ALIGN (operands[1]) > 32 + ? 32 : MEM_ALIGN (operands[1])))) && ! MEM_VOLATILE_P (operands [0]) && ! MEM_VOLATILE_P (operands [1])) { @@ -34252,7 +34270,7 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code, than generating address, e.g., (plus (reg) (const)). L1 cache latency is about two instructions. */ *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2); - if (SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (x))) + if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x))) *total += COSTS_N_INSNS (100); return true; diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 20f5b6acffa..4e2d0bb5422 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -968,19 +968,6 @@ enum data_align { align_abi, align_opt, align_both }; /* Nonzero if move instructions will actually fail to work when given unaligned data. */ #define STRICT_ALIGNMENT 0 - -/* Define this macro to be the value 1 if unaligned accesses have a cost - many times greater than aligned accesses, for example if they are - emulated in a trap handler. */ -/* Altivec vector memory instructions simply ignore the low bits; VSX memory - instructions are aligned to 4 or 8 bytes. */ -#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) \ - (STRICT_ALIGNMENT \ - || (!TARGET_EFFICIENT_UNALIGNED_VSX \ - && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) && (ALIGN) < 32) \ - || ((VECTOR_MODE_P (MODE) || FLOAT128_VECTOR_P (MODE)) \ - && (int) (ALIGN) < VECTOR_ALIGN (MODE))))) - /* Standard register usage. */ diff --git a/gcc/config/tilegx/tilegx.h b/gcc/config/tilegx/tilegx.h index 66cbd0dc9e5..bbeefa7d08b 100644 --- a/gcc/config/tilegx/tilegx.h +++ b/gcc/config/tilegx/tilegx.h @@ -94,9 +94,6 @@ #define BIGGEST_FIELD_ALIGNMENT 128 #define WIDEST_HARDWARE_FP_SIZE 64 -/* Unaligned moves trap and are very slow. */ -#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1 - /* Make strings word-aligned so strcpy from constants will be faster. */ #define CONSTANT_ALIGNMENT(EXP, ALIGN) \ diff --git a/gcc/config/tilepro/tilepro.h b/gcc/config/tilepro/tilepro.h index 599a7d8cc89..221f32a62c1 100644 --- a/gcc/config/tilepro/tilepro.h +++ b/gcc/config/tilepro/tilepro.h @@ -58,9 +58,6 @@ #define FASTEST_ALIGNMENT 32 #define BIGGEST_FIELD_ALIGNMENT 64 -/* Unaligned moves trap and are very slow. */ -#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1 - /* Make strings word-aligned so strcpy from constants will be faster. */ #define CONSTANT_ALIGNMENT(EXP, ALIGN) \ diff --git a/gcc/defaults.h b/gcc/defaults.h index d27eddf55b0..d3265fcefcb 100644 --- a/gcc/defaults.h +++ b/gcc/defaults.h @@ -1170,10 +1170,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define ATTRIBUTE_ALIGNED_VALUE BIGGEST_ALIGNMENT #endif -#ifndef SLOW_UNALIGNED_ACCESS -#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT -#endif - /* For most ports anything that evaluates to a constant symbolic or integer value is acceptable as a constant address. */ #ifndef CONSTANT_ADDRESS_P diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index c29d249d0d5..eb8a6189b95 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -6386,23 +6386,22 @@ may eliminate subsequent memory access if subsequent accesses occur to other fields in the same word of the structure, but to different bytes. @end defmac -@defmac SLOW_UNALIGNED_ACCESS (@var{mode}, @var{alignment}) -Define this macro to be the value 1 if memory accesses described by the +@deftypefn {Target Hook} bool TARGET_SLOW_UNALIGNED_ACCESS (machine_mode @var{mode}, unsigned int @var{align}) +This hook returns true if memory accesses described by the @var{mode} and @var{alignment} parameters have a cost many times greater -than aligned accesses, for example if they are emulated in a trap -handler. This macro is invoked only for unaligned accesses, i.e. when +than aligned accesses, for example if they are emulated in a trap handler. +This hook is invoked only for unaligned accesses, i.e. when @code{@var{alignment} < GET_MODE_ALIGNMENT (@var{mode})}. -When this macro is nonzero, the compiler will act as if -@code{STRICT_ALIGNMENT} were nonzero when generating code for block +When this hook returns true, the compiler will act as if +@code{STRICT_ALIGNMENT} were true when generating code for block moves. This can cause significantly more instructions to be produced. -Therefore, do not set this macro nonzero if unaligned accesses only add a -cycle or two to the time for a memory access. +Therefore, do not make this hook return true if unaligned accesses only +add a cycle or two to the time for a memory access. -If the value of this macro is always zero, it need not be defined. If -this macro is defined, it should produce a nonzero value when -@code{STRICT_ALIGNMENT} is nonzero. -@end defmac +The hook must return true whenever @code{STRICT_ALIGNMENT} is true. +The default implementation returns @code{STRICT_ALIGNMENT}. +@end deftypefn @defmac MOVE_RATIO (@var{speed}) The threshold of number of scalar memory-to-memory move insns, @emph{below} diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 8c35a0a0261..ce51bbad22c 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4559,23 +4559,7 @@ may eliminate subsequent memory access if subsequent accesses occur to other fields in the same word of the structure, but to different bytes. @end defmac -@defmac SLOW_UNALIGNED_ACCESS (@var{mode}, @var{alignment}) -Define this macro to be the value 1 if memory accesses described by the -@var{mode} and @var{alignment} parameters have a cost many times greater -than aligned accesses, for example if they are emulated in a trap -handler. This macro is invoked only for unaligned accesses, i.e. when -@code{@var{alignment} < GET_MODE_ALIGNMENT (@var{mode})}. - -When this macro is nonzero, the compiler will act as if -@code{STRICT_ALIGNMENT} were nonzero when generating code for block -moves. This can cause significantly more instructions to be produced. -Therefore, do not set this macro nonzero if unaligned accesses only add a -cycle or two to the time for a memory access. - -If the value of this macro is always zero, it need not be defined. If -this macro is defined, it should produce a nonzero value when -@code{STRICT_ALIGNMENT} is nonzero. -@end defmac +@hook TARGET_SLOW_UNALIGNED_ACCESS @defmac MOVE_RATIO (@var{speed}) The threshold of number of scalar memory-to-memory move insns, @emph{below} diff --git a/gcc/expmed.c b/gcc/expmed.c index ca48c60683d..c61a8db43e1 100644 --- a/gcc/expmed.c +++ b/gcc/expmed.c @@ -569,7 +569,7 @@ simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, return (MEM_P (op0) && bitnum % BITS_PER_UNIT == 0 && bitsize == GET_MODE_BITSIZE (mode) - && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0)) + && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0)) || (bitnum % GET_MODE_ALIGNMENT (mode) == 0 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode)))); } diff --git a/gcc/expr.c b/gcc/expr.c index c97f4a63bd2..989badcac59 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -730,7 +730,7 @@ alignment_for_piecewise_move (unsigned int max_pieces, unsigned int align) { tmode = mode_iter.require (); if (GET_MODE_SIZE (tmode) > max_pieces - || SLOW_UNALIGNED_ACCESS (tmode, align)) + || targetm.slow_unaligned_access (tmode, align)) break; xmode = tmode; } @@ -2179,7 +2179,7 @@ emit_group_load_1 (rtx *tmps, rtx dst, rtx orig_src, tree type, int ssize) /* Optimize the access just a bit. */ if (MEM_P (src) - && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (src)) + && (! targetm.slow_unaligned_access (mode, MEM_ALIGN (src)) || MEM_ALIGN (src) >= GET_MODE_ALIGNMENT (mode)) && bytepos * BITS_PER_UNIT % GET_MODE_ALIGNMENT (mode) == 0 && bytelen == GET_MODE_SIZE (mode)) @@ -2584,7 +2584,7 @@ emit_group_store (rtx orig_dst, rtx src, tree type ATTRIBUTE_UNUSED, int ssize) /* Optimize the access just a bit. */ else if (MEM_P (dest) - && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (dest)) + && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (dest)) || MEM_ALIGN (dest) >= GET_MODE_ALIGNMENT (mode)) && bytepos * BITS_PER_UNIT % GET_MODE_ALIGNMENT (mode) == 0 && bytelen == GET_MODE_SIZE (mode)) @@ -2653,7 +2653,7 @@ copy_blkmode_from_reg (rtx target, rtx srcreg, tree type) /* We can use a single move if we have an exact mode for the size. */ else if (MEM_P (target) - && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (target)) + && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (target)) || MEM_ALIGN (target) >= GET_MODE_ALIGNMENT (mode)) && bytes == GET_MODE_SIZE (mode)) { @@ -4348,7 +4348,7 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, /* Here we avoid the case of a structure whose weak alignment forces many pushes of a small amount of data, and such small pushes do rounding that causes trouble. */ - && ((! SLOW_UNALIGNED_ACCESS (word_mode, align)) + && ((!targetm.slow_unaligned_access (word_mode, align)) || align >= BIGGEST_ALIGNMENT || (PUSH_ROUNDING (align / BITS_PER_UNIT) == (align / BITS_PER_UNIT))) @@ -4947,7 +4947,7 @@ expand_assignment (tree to, tree from, bool nontemporal) < GET_MODE_ALIGNMENT (mode)) && (((icode = optab_handler (movmisalign_optab, mode)) != CODE_FOR_nothing) - || SLOW_UNALIGNED_ACCESS (mode, align))) + || targetm.slow_unaligned_access (mode, align))) { rtx reg, mem; @@ -6783,7 +6783,7 @@ store_field (rtx target, HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos, || (mode != BLKmode && ((((MEM_ALIGN (target) < GET_MODE_ALIGNMENT (mode)) || bitpos % GET_MODE_ALIGNMENT (mode)) - && SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (target))) + && targetm.slow_unaligned_access (mode, MEM_ALIGN (target))) || (bitpos % BITS_PER_UNIT != 0))) || (bitsize >= 0 && mode != BLKmode && GET_MODE_BITSIZE (mode) > bitsize) @@ -10229,7 +10229,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode, expand_insn (icode, 2, ops); temp = ops[0].value; } - else if (SLOW_UNALIGNED_ACCESS (mode, align)) + else if (targetm.slow_unaligned_access (mode, align)) temp = extract_bit_field (temp, GET_MODE_BITSIZE (mode), 0, TYPE_UNSIGNED (TREE_TYPE (exp)), (modifier == EXPAND_STACK_PARM @@ -10663,7 +10663,8 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode, && ((modifier == EXPAND_CONST_ADDRESS || modifier == EXPAND_INITIALIZER) ? STRICT_ALIGNMENT - : SLOW_UNALIGNED_ACCESS (mode1, MEM_ALIGN (op0)))) + : targetm.slow_unaligned_access (mode1, + MEM_ALIGN (op0)))) || (bitpos % BITS_PER_UNIT != 0))) /* If the type and the field are a constant size and the size of the type isn't the same size as the bitfield, diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c index 8366e4b50b8..a1dce4c5c6b 100644 --- a/gcc/gimple-fold.c +++ b/gcc/gimple-fold.c @@ -756,7 +756,7 @@ gimple_fold_builtin_memory_op (gimple_stmt_iterator *gsi, /* If the destination pointer is not aligned we must be able to emit an unaligned store. */ && (dest_align >= GET_MODE_ALIGNMENT (mode) - || !SLOW_UNALIGNED_ACCESS (mode, dest_align) + || !targetm.slow_unaligned_access (mode, dest_align) || (optab_handler (movmisalign_optab, mode) != CODE_FOR_nothing))) { @@ -769,7 +769,7 @@ gimple_fold_builtin_memory_op (gimple_stmt_iterator *gsi, if (tem) srcmem = tem; else if (src_align < GET_MODE_ALIGNMENT (mode) - && SLOW_UNALIGNED_ACCESS (mode, src_align) + && targetm.slow_unaligned_access (mode, src_align) && (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)) srcmem = NULL_TREE; diff --git a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c index c60d56aad14..a4a38b104bf 100644 --- a/gcc/gimple-ssa-store-merging.c +++ b/gcc/gimple-ssa-store-merging.c @@ -63,7 +63,7 @@ of a size that is a power of 2. For example it can try to emit a 40-bit store as a 32-bit store followed by an 8-bit store. We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT or - SLOW_UNALIGNED_ACCESS rules. + TARGET_SLOW_UNALIGNED_ACCESS rules. Note on endianness and example: Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores: diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c index ca9fb1169d9..427f1d96492 100644 --- a/gcc/lra-constraints.c +++ b/gcc/lra-constraints.c @@ -1557,9 +1557,10 @@ simplify_operand_subreg (int nop, machine_mode reg_mode) && GET_MODE_SIZE (innermode) <= UNITS_PER_WORD && WORD_REGISTER_OPERATIONS) && (!(MEM_ALIGN (subst) < GET_MODE_ALIGNMENT (mode) - && SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (subst))) + && targetm.slow_unaligned_access (mode, MEM_ALIGN (subst))) || (MEM_ALIGN (reg) < GET_MODE_ALIGNMENT (innermode) - && SLOW_UNALIGNED_ACCESS (innermode, MEM_ALIGN (reg))))) + && targetm.slow_unaligned_access (innermode, + MEM_ALIGN (reg))))) return true; *curr_id->operand_loc[nop] = operand; diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c index 3549f49b93e..a6d430760fc 100644 --- a/gcc/stor-layout.c +++ b/gcc/stor-layout.c @@ -2793,7 +2793,7 @@ bit_field_mode_iterator::next_mode (scalar_int_mode *out_mode) /* Stop if the mode requires too much alignment. */ if (GET_MODE_ALIGNMENT (mode) > m_align - && SLOW_UNALIGNED_ACCESS (mode, m_align)) + && targetm.slow_unaligned_access (mode, m_align)) break; *out_mode = mode; diff --git a/gcc/system.h b/gcc/system.h index 672fe2d3ceb..ef025c04d43 100644 --- a/gcc/system.h +++ b/gcc/system.h @@ -912,7 +912,7 @@ extern void fancy_abort (const char *, int, const char *) CLEAR_BY_PIECES_P MOVE_BY_PIECES_P SET_BY_PIECES_P \ STORE_BY_PIECES_P TARGET_FLT_EVAL_METHOD \ HARD_REGNO_CALL_PART_CLOBBERED HARD_REGNO_MODE_OK \ - MODES_TIEABLE_P FUNCTION_ARG_PADDING + MODES_TIEABLE_P FUNCTION_ARG_PADDING SLOW_UNALIGNED_ACCESS /* Target macros only used for code built for the target, that have moved to libgcc-tm.h or have never been present elsewhere. */ diff --git a/gcc/target.def b/gcc/target.def index 110300cb58c..032765069c4 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -3511,6 +3511,25 @@ negative number from this hook.", int, (machine_mode mode), default_compare_by_pieces_branch_ratio) +DEFHOOK +(slow_unaligned_access, + "This hook returns true if memory accesses described by the\n\ +@var{mode} and @var{alignment} parameters have a cost many times greater\n\ +than aligned accesses, for example if they are emulated in a trap handler.\n\ +This hook is invoked only for unaligned accesses, i.e. when\n\ +@code{@var{alignment} < GET_MODE_ALIGNMENT (@var{mode})}.\n\ +\n\ +When this hook returns true, the compiler will act as if\n\ +@code{STRICT_ALIGNMENT} were true when generating code for block\n\ +moves. This can cause significantly more instructions to be produced.\n\ +Therefore, do not make this hook return true if unaligned accesses only\n\ +add a cycle or two to the time for a memory access.\n\ +\n\ +The hook must return true whenever @code{STRICT_ALIGNMENT} is true.\n\ +The default implementation returns @code{STRICT_ALIGNMENT}.", + bool, (machine_mode mode, unsigned int align), + default_slow_unaligned_access) + DEFHOOK (optab_supported_p, "Return true if the optimizers should use optab @var{op} with\n\ diff --git a/gcc/targhooks.c b/gcc/targhooks.c index cbc15abea5f..dd6491e077b 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -1558,6 +1558,14 @@ default_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED, #endif } +/* The default implementation of TARGET_SLOW_UNALIGNED_ACCESS. */ + +bool +default_slow_unaligned_access (machine_mode, unsigned int) +{ + return STRICT_ALIGNMENT; +} + /* For hooks which use the MOVE_RATIO macro, this gives the legacy default behavior. SPEED_P is true if we are compiling for speed. */ diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 51780866061..a12f07892d6 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -197,6 +197,7 @@ extern tree default_builtin_tm_load_store (tree); extern int default_memory_move_cost (machine_mode, reg_class_t, bool); extern int default_register_move_cost (machine_mode, reg_class_t, reg_class_t); +extern bool default_slow_unaligned_access (machine_mode, unsigned int); extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT, unsigned int, diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c index 1269fa8a2bc..4ec0dacf38a 100644 --- a/gcc/tree-ssa-strlen.c +++ b/gcc/tree-ssa-strlen.c @@ -45,6 +45,7 @@ along with GCC; see the file COPYING3. If not see #include "ipa-chkp.h" #include "tree-hash-traits.h" #include "builtins.h" +#include "target.h" /* A vector indexed by SSA_NAME_VERSION. 0 means unknown, positive value is an index into strinfo vector, negative value stands for @@ -2124,7 +2125,7 @@ handle_builtin_memcmp (gimple_stmt_iterator *gsi) unsigned align = MIN (align1, align2); scalar_int_mode mode; if (int_mode_for_size (leni, 1).exists (&mode) - && (align >= leni || !SLOW_UNALIGNED_ACCESS (mode, align))) + && (align >= leni || !targetm.slow_unaligned_access (mode, align))) { location_t loc = gimple_location (stmt2); tree type, off; -- 2.30.2