gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2020 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #define IN_TARGET_CODE 1
  24
  25 #include "config.h"
  26 #define INCLUDE_STRING
  27 #include "system.h"
  28 #include "coretypes.h"
  29 #include "backend.h"
  30 #include "target.h"
  31 #include "rtl.h"
  32 #include "tree.h"
  33 #include "memmodel.h"
  34 #include "cfghooks.h"
  35 #include "df.h"
  36 #include "tm_p.h"
  37 #include "stringpool.h"
  38 #include "attribs.h"
  39 #include "optabs.h"
  40 #include "regs.h"
  41 #include "emit-rtl.h"
  42 #include "recog.h"
  43 #include "cgraph.h"
  44 #include "diagnostic-core.h"
  45 #include "alias.h"
  46 #include "fold-const.h"
  47 #include "stor-layout.h"
  48 #include "calls.h"
  49 #include "varasm.h"
  50 #include "output.h"
  51 #include "insn-attr.h"
  52 #include "flags.h"
  53 #include "reload.h"
  54 #include "explow.h"
  55 #include "expr.h"
  56 #include "cfgrtl.h"
  57 #include "sched-int.h"
  58 #include "common/common-target.h"
  59 #include "langhooks.h"
  60 #include "intl.h"
  61 #include "libfuncs.h"
  62 #include "opts.h"
  63 #include "dumpfile.h"
  64 #include "target-globals.h"
  65 #include "builtins.h"
  66 #include "tm-constrs.h"
  67 #include "rtl-iter.h"
  68 #include "optabs-libfuncs.h"
  69 #include "gimplify.h"
  70 #include "gimple.h"
  71 #include "selftest.h"
  72
  73 /* This file should be included last.  */
  74 #include "target-def.h"
  75
  76 /* Forward definitions of types.  */
  77 typedef struct minipool_node    Mnode;
  78 typedef struct minipool_fixup   Mfix;
  79
  80 /* The last .arch and .fpu assembly strings that we printed.  */
  81 static std::string arm_last_printed_arch_string;
  82 static std::string arm_last_printed_fpu_string;
  83
  84 void (*arm_lang_output_object_attributes_hook)(void);
  85
  86 struct four_ints
  87 {
  88   int i[4];
  89 };
  90
  91 /* Forward function declarations.  */
  92 static bool arm_const_not_ok_for_debug_p (rtx);
  93 static int arm_needs_doubleword_align (machine_mode, const_tree);
  94 static int arm_compute_static_chain_stack_bytes (void);
  95 static arm_stack_offsets *arm_get_frame_offsets (void);
  96 static void arm_compute_frame_layout (void);
  97 static void arm_add_gc_roots (void);
  98 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
  99                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
 100 static unsigned bit_count (unsigned long);
 101 static unsigned bitmap_popcount (const sbitmap);
 102 static int arm_address_register_rtx_p (rtx, int);
 103 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 104 static bool is_called_in_ARM_mode (tree);
 105 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 106 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 107 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 108 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 109 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 110 inline static int thumb1_index_register_rtx_p (rtx, int);
 111 static int thumb_far_jump_used_p (void);
 112 static bool thumb_force_lr_save (void);
 113 static unsigned arm_size_return_regs (void);
 114 static bool arm_assemble_integer (rtx, unsigned int, int);
 115 static void arm_print_operand (FILE *, rtx, int);
 116 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 117 static bool arm_print_operand_punct_valid_p (unsigned char code);
 118 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 119 static arm_cc get_arm_condition_code (rtx);
 120 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 121 static const char *output_multi_immediate (rtx *, const char *, const char *,
 122                                            int, HOST_WIDE_INT);
 123 static const char *shift_op (rtx, HOST_WIDE_INT *);
 124 static struct machine_function *arm_init_machine_status (void);
 125 static void thumb_exit (FILE *, int);
 126 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 127 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 128 static Mnode *add_minipool_forward_ref (Mfix *);
 129 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 130 static Mnode *add_minipool_backward_ref (Mfix *);
 131 static void assign_minipool_offsets (Mfix *);
 132 static void arm_print_value (FILE *, rtx);
 133 static void dump_minipool (rtx_insn *);
 134 static int arm_barrier_cost (rtx_insn *);
 135 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 136 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 137 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 138                                machine_mode, rtx);
 139 static void arm_reorg (void);
 140 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 141 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 142 static unsigned long arm_compute_save_core_reg_mask (void);
 143 static unsigned long arm_isr_value (tree);
 144 static unsigned long arm_compute_func_type (void);
 145 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 146 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 147 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 148 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 149 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 150 #endif
 151 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 152 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 153 static void arm_output_function_epilogue (FILE *);
 154 static void arm_output_function_prologue (FILE *);
 155 static int arm_comp_type_attributes (const_tree, const_tree);
 156 static void arm_set_default_type_attributes (tree);
 157 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 158 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 159 static int optimal_immediate_sequence (enum rtx_code code,
 160                                        unsigned HOST_WIDE_INT val,
 161                                        struct four_ints *return_sequence);
 162 static int optimal_immediate_sequence_1 (enum rtx_code code,
 163                                          unsigned HOST_WIDE_INT val,
 164                                          struct four_ints *return_sequence,
 165                                          int i);
 166 static int arm_get_strip_length (int);
 167 static bool arm_function_ok_for_sibcall (tree, tree);
 168 static machine_mode arm_promote_function_mode (const_tree,
 169                                                     machine_mode, int *,
 170                                                     const_tree, int);
 171 static bool arm_return_in_memory (const_tree, const_tree);
 172 static rtx arm_function_value (const_tree, const_tree, bool);
 173 static rtx arm_libcall_value_1 (machine_mode);
 174 static rtx arm_libcall_value (machine_mode, const_rtx);
 175 static bool arm_function_value_regno_p (const unsigned int);
 176 static void arm_internal_label (FILE *, const char *, unsigned long);
 177 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 178                                  tree);
 179 static bool arm_have_conditional_execution (void);
 180 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 181 static bool arm_legitimate_constant_p (machine_mode, rtx);
 182 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 183 static int arm_insn_cost (rtx_insn *, bool);
 184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 187 static void emit_constant_insn (rtx cond, rtx pattern);
 188 static rtx_insn *emit_set_insn (rtx, rtx);
 189 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
 190 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 191 static void arm_emit_multi_reg_pop (unsigned long);
 192 static int vfp_emit_fstmd (int, int);
 193 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
 194 static int arm_arg_partial_bytes (cumulative_args_t,
 195                                   const function_arg_info &);
 196 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
 197 static void arm_function_arg_advance (cumulative_args_t,
 198                                       const function_arg_info &);
 199 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 200 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 201 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 202                                       const_tree);
 203 static rtx aapcs_libcall_value (machine_mode);
 204 static int aapcs_select_return_coproc (const_tree, const_tree);
 205
 206 #ifdef OBJECT_FORMAT_ELF
 207 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 208 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 209 #endif
 210 #ifndef ARM_PE
 211 static void arm_encode_section_info (tree, rtx, int);
 212 #endif
 213
 214 static void arm_file_end (void);
 215 static void arm_file_start (void);
 216 static void arm_insert_attributes (tree, tree *);
 217
 218 static void arm_setup_incoming_varargs (cumulative_args_t,
 219                                         const function_arg_info &, int *, int);
 220 static bool arm_pass_by_reference (cumulative_args_t,
 221                                    const function_arg_info &);
 222 static bool arm_promote_prototypes (const_tree);
 223 static bool arm_default_short_enums (void);
 224 static bool arm_align_anon_bitfield (void);
 225 static bool arm_return_in_msb (const_tree);
 226 static bool arm_must_pass_in_stack (const function_arg_info &);
 227 static bool arm_return_in_memory (const_tree, const_tree);
 228 #if ARM_UNWIND_INFO
 229 static void arm_unwind_emit (FILE *, rtx_insn *);
 230 static bool arm_output_ttype (rtx);
 231 static void arm_asm_emit_except_personality (rtx);
 232 #endif
 233 static void arm_asm_init_sections (void);
 234 static rtx arm_dwarf_register_span (rtx);
 235
 236 static tree arm_cxx_guard_type (void);
 237 static bool arm_cxx_guard_mask_bit (void);
 238 static tree arm_get_cookie_size (tree);
 239 static bool arm_cookie_has_size (void);
 240 static bool arm_cxx_cdtor_returns_this (void);
 241 static bool arm_cxx_key_method_may_be_inline (void);
 242 static void arm_cxx_determine_class_data_visibility (tree);
 243 static bool arm_cxx_class_data_always_comdat (void);
 244 static bool arm_cxx_use_aeabi_atexit (void);
 245 static void arm_init_libfuncs (void);
 246 static tree arm_build_builtin_va_list (void);
 247 static void arm_expand_builtin_va_start (tree, rtx);
 248 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 249 static void arm_option_override (void);
 250 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
 251                                 struct cl_target_option *);
 252 static void arm_override_options_after_change (void);
 253 static void arm_option_print (FILE *, int, struct cl_target_option *);
 254 static void arm_set_current_function (tree);
 255 static bool arm_can_inline_p (tree, tree);
 256 static void arm_relayout_function (tree);
 257 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 258 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 259 static bool arm_sched_can_speculate_insn (rtx_insn *);
 260 static bool arm_macro_fusion_p (void);
 261 static bool arm_cannot_copy_insn_p (rtx_insn *);
 262 static int arm_issue_rate (void);
 263 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
 264 static int arm_first_cycle_multipass_dfa_lookahead (void);
 265 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 266 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 267 static bool arm_output_addr_const_extra (FILE *, rtx);
 268 static bool arm_allocate_stack_slots_for_args (void);
 269 static bool arm_warn_func_return (tree);
 270 static tree arm_promoted_type (const_tree t);
 271 static bool arm_scalar_mode_supported_p (scalar_mode);
 272 static bool arm_frame_pointer_required (void);
 273 static bool arm_can_eliminate (const int, const int);
 274 static void arm_asm_trampoline_template (FILE *);
 275 static void arm_trampoline_init (rtx, tree, rtx);
 276 static rtx arm_trampoline_adjust_address (rtx);
 277 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 278 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 279 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 280 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 281 static bool arm_array_mode_supported_p (machine_mode,
 282                                         unsigned HOST_WIDE_INT);
 283 static machine_mode arm_preferred_simd_mode (scalar_mode);
 284 static bool arm_class_likely_spilled_p (reg_class_t);
 285 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 286 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 287 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 288                                                      const_tree type,
 289                                                      int misalignment,
 290                                                      bool is_packed);
 291 static void arm_conditional_register_usage (void);
 292 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 293 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 294 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
 295 static int arm_default_branch_cost (bool, bool);
 296 static int arm_cortex_a5_branch_cost (bool, bool);
 297 static int arm_cortex_m_branch_cost (bool, bool);
 298 static int arm_cortex_m7_branch_cost (bool, bool);
 299
 300 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
 301                                           const vec_perm_indices &);
 302
 303 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 304
 305 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 306                                            tree vectype,
 307                                            int misalign ATTRIBUTE_UNUSED);
 308 static unsigned arm_add_stmt_cost (vec_info *vinfo, void *data, int count,
 309                                    enum vect_cost_for_stmt kind,
 310                                    struct _stmt_vec_info *stmt_info,
 311                                    tree vectype, int misalign,
 312                                    enum vect_cost_model_location where);
 313
 314 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 315                                          bool op0_preserve_value);
 316 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 317
 318 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 319 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 320                                      const_tree);
 321 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 322 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 323 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 324                                                 int reloc);
 325 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 326 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 327 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 328 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 329 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 330 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 331 static rtx_insn * thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
 332                                         vec<const char *> &, vec<rtx> &,
 333                                         HARD_REG_SET &);
 334 \f
 335 /* Table of machine attributes.  */
 336 static const struct attribute_spec arm_attribute_table[] =
 337 {
 338   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
 339        affects_type_identity, handler, exclude } */
 340   /* Function calls made to this symbol must be done indirectly, because
 341      it may lie outside of the 26 bit addressing range of a normal function
 342      call.  */
 343   { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
 344   /* Whereas these functions are always known to reside within the 26 bit
 345      addressing range.  */
 346   { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
 347   /* Specify the procedure call conventions for a function.  */
 348   { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
 349     NULL },
 350   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 351   { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
 352     NULL },
 353   { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
 354     NULL },
 355   { "naked",        0, 0, true,  false, false, false,
 356     arm_handle_fndecl_attribute, NULL },
 357 #ifdef ARM_PE
 358   /* ARM/PE has three new attributes:
 359      interfacearm - ?
 360      dllexport - for exporting a function/variable that will live in a dll
 361      dllimport - for importing a function/variable from a dll
 362
 363      Microsoft allows multiple declspecs in one __declspec, separating
 364      them with spaces.  We do NOT support this.  Instead, use __declspec
 365      multiple times.
 366   */
 367   { "dllimport",    0, 0, true,  false, false, false, NULL, NULL },
 368   { "dllexport",    0, 0, true,  false, false, false, NULL, NULL },
 369   { "interfacearm", 0, 0, true,  false, false, false,
 370     arm_handle_fndecl_attribute, NULL },
 371 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 372   { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
 373     NULL },
 374   { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
 375     NULL },
 376   { "notshared",    0, 0, false, true, false, false,
 377     arm_handle_notshared_attribute, NULL },
 378 #endif
 379   /* ARMv8-M Security Extensions support.  */
 380   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
 381     arm_handle_cmse_nonsecure_entry, NULL },
 382   { "cmse_nonsecure_call", 0, 0, true, false, false, true,
 383     arm_handle_cmse_nonsecure_call, NULL },
 384   { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL },
 385   { NULL, 0, 0, false, false, false, false, NULL, NULL }
 386 };
 387 \f
 388 /* Initialize the GCC target structure.  */
 389 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 390 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 391 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 392 #endif
 393
 394 #undef TARGET_CHECK_BUILTIN_CALL
 395 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
 396
 397 #undef TARGET_LEGITIMIZE_ADDRESS
 398 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 399
 400 #undef  TARGET_ATTRIBUTE_TABLE
 401 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 402
 403 #undef  TARGET_INSERT_ATTRIBUTES
 404 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 405
 406 #undef TARGET_ASM_FILE_START
 407 #define TARGET_ASM_FILE_START arm_file_start
 408 #undef TARGET_ASM_FILE_END
 409 #define TARGET_ASM_FILE_END arm_file_end
 410
 411 #undef  TARGET_ASM_ALIGNED_SI_OP
 412 #define TARGET_ASM_ALIGNED_SI_OP NULL
 413 #undef  TARGET_ASM_INTEGER
 414 #define TARGET_ASM_INTEGER arm_assemble_integer
 415
 416 #undef TARGET_PRINT_OPERAND
 417 #define TARGET_PRINT_OPERAND arm_print_operand
 418 #undef TARGET_PRINT_OPERAND_ADDRESS
 419 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 420 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 421 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 422
 423 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 424 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 425
 426 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 427 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 428
 429 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 430 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 431
 432 #undef TARGET_CAN_INLINE_P
 433 #define TARGET_CAN_INLINE_P arm_can_inline_p
 434
 435 #undef TARGET_RELAYOUT_FUNCTION
 436 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 437
 438 #undef  TARGET_OPTION_OVERRIDE
 439 #define TARGET_OPTION_OVERRIDE arm_option_override
 440
 441 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 442 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 443
 444 #undef TARGET_OPTION_RESTORE
 445 #define TARGET_OPTION_RESTORE arm_option_restore
 446
 447 #undef TARGET_OPTION_PRINT
 448 #define TARGET_OPTION_PRINT arm_option_print
 449
 450 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 451 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 452
 453 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 454 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 455
 456 #undef TARGET_SCHED_MACRO_FUSION_P
 457 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 458
 459 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 460 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 461
 462 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 463 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 464
 465 #undef  TARGET_SCHED_ADJUST_COST
 466 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 467
 468 #undef TARGET_SET_CURRENT_FUNCTION
 469 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 470
 471 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 472 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 473
 474 #undef TARGET_SCHED_REORDER
 475 #define TARGET_SCHED_REORDER arm_sched_reorder
 476
 477 #undef TARGET_REGISTER_MOVE_COST
 478 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 479
 480 #undef TARGET_MEMORY_MOVE_COST
 481 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 482
 483 #undef TARGET_ENCODE_SECTION_INFO
 484 #ifdef ARM_PE
 485 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 486 #else
 487 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 488 #endif
 489
 490 #undef  TARGET_STRIP_NAME_ENCODING
 491 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 492
 493 #undef  TARGET_ASM_INTERNAL_LABEL
 494 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 495
 496 #undef TARGET_FLOATN_MODE
 497 #define TARGET_FLOATN_MODE arm_floatn_mode
 498
 499 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 500 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 501
 502 #undef  TARGET_FUNCTION_VALUE
 503 #define TARGET_FUNCTION_VALUE arm_function_value
 504
 505 #undef  TARGET_LIBCALL_VALUE
 506 #define TARGET_LIBCALL_VALUE arm_libcall_value
 507
 508 #undef TARGET_FUNCTION_VALUE_REGNO_P
 509 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 510
 511 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 512 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 513 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 514 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 515
 516 #undef  TARGET_RTX_COSTS
 517 #define TARGET_RTX_COSTS arm_rtx_costs
 518 #undef  TARGET_ADDRESS_COST
 519 #define TARGET_ADDRESS_COST arm_address_cost
 520 #undef TARGET_INSN_COST
 521 #define TARGET_INSN_COST arm_insn_cost
 522
 523 #undef TARGET_SHIFT_TRUNCATION_MASK
 524 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 525 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 526 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 527 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 528 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 529 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 530 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 531 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
 532 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
 533   arm_autovectorize_vector_modes
 534
 535 #undef  TARGET_MACHINE_DEPENDENT_REORG
 536 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 537
 538 #undef  TARGET_INIT_BUILTINS
 539 #define TARGET_INIT_BUILTINS  arm_init_builtins
 540 #undef  TARGET_EXPAND_BUILTIN
 541 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 542 #undef  TARGET_BUILTIN_DECL
 543 #define TARGET_BUILTIN_DECL arm_builtin_decl
 544
 545 #undef TARGET_INIT_LIBFUNCS
 546 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 547
 548 #undef TARGET_PROMOTE_FUNCTION_MODE
 549 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 550 #undef TARGET_PROMOTE_PROTOTYPES
 551 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 552 #undef TARGET_PASS_BY_REFERENCE
 553 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 554 #undef TARGET_ARG_PARTIAL_BYTES
 555 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 556 #undef TARGET_FUNCTION_ARG
 557 #define TARGET_FUNCTION_ARG arm_function_arg
 558 #undef TARGET_FUNCTION_ARG_ADVANCE
 559 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 560 #undef TARGET_FUNCTION_ARG_PADDING
 561 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 562 #undef TARGET_FUNCTION_ARG_BOUNDARY
 563 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 564
 565 #undef  TARGET_SETUP_INCOMING_VARARGS
 566 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 567
 568 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 569 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 570
 571 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 572 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 573 #undef TARGET_TRAMPOLINE_INIT
 574 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 575 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 576 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 577
 578 #undef TARGET_WARN_FUNC_RETURN
 579 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 580
 581 #undef TARGET_DEFAULT_SHORT_ENUMS
 582 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 583
 584 #undef TARGET_ALIGN_ANON_BITFIELD
 585 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 586
 587 #undef TARGET_NARROW_VOLATILE_BITFIELD
 588 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 589
 590 #undef TARGET_CXX_GUARD_TYPE
 591 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 592
 593 #undef TARGET_CXX_GUARD_MASK_BIT
 594 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 595
 596 #undef TARGET_CXX_GET_COOKIE_SIZE
 597 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 598
 599 #undef TARGET_CXX_COOKIE_HAS_SIZE
 600 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 601
 602 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 603 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 604
 605 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 606 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 607
 608 #undef TARGET_CXX_USE_AEABI_ATEXIT
 609 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 610
 611 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 612 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 613   arm_cxx_determine_class_data_visibility
 614
 615 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 616 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 617
 618 #undef TARGET_RETURN_IN_MSB
 619 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 620
 621 #undef TARGET_RETURN_IN_MEMORY
 622 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 623
 624 #undef TARGET_MUST_PASS_IN_STACK
 625 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 626
 627 #if ARM_UNWIND_INFO
 628 #undef TARGET_ASM_UNWIND_EMIT
 629 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 630
 631 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 632 #undef TARGET_ASM_TTYPE
 633 #define TARGET_ASM_TTYPE arm_output_ttype
 634
 635 #undef TARGET_ARM_EABI_UNWINDER
 636 #define TARGET_ARM_EABI_UNWINDER true
 637
 638 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 639 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 640
 641 #endif /* ARM_UNWIND_INFO */
 642
 643 #undef TARGET_ASM_INIT_SECTIONS
 644 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 645
 646 #undef TARGET_DWARF_REGISTER_SPAN
 647 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 648
 649 #undef  TARGET_CANNOT_COPY_INSN_P
 650 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 651
 652 #ifdef HAVE_AS_TLS
 653 #undef TARGET_HAVE_TLS
 654 #define TARGET_HAVE_TLS true
 655 #endif
 656
 657 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 658 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 659
 660 #undef TARGET_LEGITIMATE_CONSTANT_P
 661 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 662
 663 #undef TARGET_CANNOT_FORCE_CONST_MEM
 664 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 665
 666 #undef TARGET_MAX_ANCHOR_OFFSET
 667 #define TARGET_MAX_ANCHOR_OFFSET 4095
 668
 669 /* The minimum is set such that the total size of the block
 670    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 671    divisible by eight, ensuring natural spacing of anchors.  */
 672 #undef TARGET_MIN_ANCHOR_OFFSET
 673 #define TARGET_MIN_ANCHOR_OFFSET -4088
 674
 675 #undef TARGET_SCHED_ISSUE_RATE
 676 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 677
 678 #undef TARGET_SCHED_VARIABLE_ISSUE
 679 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
 680
 681 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 682 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 683   arm_first_cycle_multipass_dfa_lookahead
 684
 685 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 686 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 687   arm_first_cycle_multipass_dfa_lookahead_guard
 688
 689 #undef TARGET_MANGLE_TYPE
 690 #define TARGET_MANGLE_TYPE arm_mangle_type
 691
 692 #undef TARGET_INVALID_CONVERSION
 693 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
 694
 695 #undef TARGET_INVALID_UNARY_OP
 696 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
 697
 698 #undef TARGET_INVALID_BINARY_OP
 699 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
 700
 701 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 702 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 703
 704 #undef TARGET_BUILD_BUILTIN_VA_LIST
 705 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 706 #undef TARGET_EXPAND_BUILTIN_VA_START
 707 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 708 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 709 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 710
 711 #ifdef HAVE_AS_TLS
 712 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 713 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 714 #endif
 715
 716 #undef TARGET_LEGITIMATE_ADDRESS_P
 717 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 718
 719 #undef TARGET_PREFERRED_RELOAD_CLASS
 720 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 721
 722 #undef TARGET_PROMOTED_TYPE
 723 #define TARGET_PROMOTED_TYPE arm_promoted_type
 724
 725 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 726 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 727
 728 #undef TARGET_COMPUTE_FRAME_LAYOUT
 729 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 730
 731 #undef TARGET_FRAME_POINTER_REQUIRED
 732 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 733
 734 #undef TARGET_CAN_ELIMINATE
 735 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 736
 737 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 738 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 739
 740 #undef TARGET_CLASS_LIKELY_SPILLED_P
 741 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 742
 743 #undef TARGET_VECTORIZE_BUILTINS
 744 #define TARGET_VECTORIZE_BUILTINS
 745
 746 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 747 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 748   arm_builtin_vectorized_function
 749
 750 #undef TARGET_VECTOR_ALIGNMENT
 751 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 752
 753 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 754 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 755   arm_vector_alignment_reachable
 756
 757 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 758 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 759   arm_builtin_support_vector_misalignment
 760
 761 #undef TARGET_PREFERRED_RENAME_CLASS
 762 #define TARGET_PREFERRED_RENAME_CLASS \
 763   arm_preferred_rename_class
 764
 765 #undef TARGET_VECTORIZE_VEC_PERM_CONST
 766 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
 767
 768 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 769 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 770   arm_builtin_vectorization_cost
 771 #undef TARGET_VECTORIZE_ADD_STMT_COST
 772 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 773
 774 #undef TARGET_CANONICALIZE_COMPARISON
 775 #define TARGET_CANONICALIZE_COMPARISON \
 776   arm_canonicalize_comparison
 777
 778 #undef TARGET_ASAN_SHADOW_OFFSET
 779 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 780
 781 #undef MAX_INSN_PER_IT_BLOCK
 782 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 783
 784 #undef TARGET_CAN_USE_DOLOOP_P
 785 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 786
 787 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 788 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 789
 790 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 791 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 792
 793 #undef TARGET_SCHED_FUSION_PRIORITY
 794 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 795
 796 #undef  TARGET_ASM_FUNCTION_SECTION
 797 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 798
 799 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 800 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 801
 802 #undef TARGET_SECTION_TYPE_FLAGS
 803 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 804
 805 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 806 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 807
 808 #undef TARGET_C_EXCESS_PRECISION
 809 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 810
 811 /* Although the architecture reserves bits 0 and 1, only the former is
 812    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 813 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 814 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 815
 816 #undef TARGET_FIXED_CONDITION_CODE_REGS
 817 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 818
 819 #undef TARGET_HARD_REGNO_NREGS
 820 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 821 #undef TARGET_HARD_REGNO_MODE_OK
 822 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 823
 824 #undef TARGET_MODES_TIEABLE_P
 825 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 826
 827 #undef TARGET_CAN_CHANGE_MODE_CLASS
 828 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 829
 830 #undef TARGET_CONSTANT_ALIGNMENT
 831 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 832
 833 #undef TARGET_INVALID_WITHIN_DOLOOP
 834 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
 835
 836 #undef TARGET_MD_ASM_ADJUST
 837 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
 838 \f
 839 /* Obstack for minipool constant handling.  */
 840 static struct obstack minipool_obstack;
 841 static char *         minipool_startobj;
 842
 843 /* The maximum number of insns skipped which
 844    will be conditionalised if possible.  */
 845 static int max_insns_skipped = 5;
 846
 847 extern FILE * asm_out_file;
 848
 849 /* True if we are currently building a constant table.  */
 850 int making_const_table;
 851
 852 /* The processor for which instructions should be scheduled.  */
 853 enum processor_type arm_tune = TARGET_CPU_arm_none;
 854
 855 /* The current tuning set.  */
 856 const struct tune_params *current_tune;
 857
 858 /* Which floating point hardware to schedule for.  */
 859 int arm_fpu_attr;
 860
 861 /* Used for Thumb call_via trampolines.  */
 862 rtx thumb_call_via_label[14];
 863 static int thumb_call_reg_needed;
 864
 865 /* The bits in this mask specify which instruction scheduling options should
 866    be used.  */
 867 unsigned int tune_flags = 0;
 868
 869 /* The highest ARM architecture version supported by the
 870    target.  */
 871 enum base_architecture arm_base_arch = BASE_ARCH_0;
 872
 873 /* Active target architecture and tuning.  */
 874
 875 struct arm_build_target arm_active_target;
 876
 877 /* The following are used in the arm.md file as equivalents to bits
 878    in the above two flag variables.  */
 879
 880 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 881 int arm_arch4 = 0;
 882
 883 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 884 int arm_arch4t = 0;
 885
 886 /* Nonzero if this chip supports the ARM Architecture 5T extensions.  */
 887 int arm_arch5t = 0;
 888
 889 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 890 int arm_arch5te = 0;
 891
 892 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 893 int arm_arch6 = 0;
 894
 895 /* Nonzero if this chip supports the ARM 6K extensions.  */
 896 int arm_arch6k = 0;
 897
 898 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 899 int arm_arch6kz = 0;
 900
 901 /* Nonzero if instructions present in ARMv6-M can be used.  */
 902 int arm_arch6m = 0;
 903
 904 /* Nonzero if this chip supports the ARM 7 extensions.  */
 905 int arm_arch7 = 0;
 906
 907 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 908 int arm_arch_lpae = 0;
 909
 910 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 911 int arm_arch_notm = 0;
 912
 913 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 914 int arm_arch7em = 0;
 915
 916 /* Nonzero if instructions present in ARMv8 can be used.  */
 917 int arm_arch8 = 0;
 918
 919 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 920 int arm_arch8_1 = 0;
 921
 922 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 923 int arm_arch8_2 = 0;
 924
 925 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions.  */
 926 int arm_arch8_3 = 0;
 927
 928 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions.  */
 929 int arm_arch8_4 = 0;
 930 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
 931    extensions.  */
 932 int arm_arch8_1m_main = 0;
 933
 934 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 935    Architecture 8.2.  */
 936 int arm_fp16_inst = 0;
 937
 938 /* Nonzero if this chip can benefit from load scheduling.  */
 939 int arm_ld_sched = 0;
 940
 941 /* Nonzero if this chip is a StrongARM.  */
 942 int arm_tune_strongarm = 0;
 943
 944 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 945 int arm_arch_iwmmxt = 0;
 946
 947 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 948 int arm_arch_iwmmxt2 = 0;
 949
 950 /* Nonzero if this chip is an XScale.  */
 951 int arm_arch_xscale = 0;
 952
 953 /* Nonzero if tuning for XScale  */
 954 int arm_tune_xscale = 0;
 955
 956 /* Nonzero if we want to tune for stores that access the write-buffer.
 957    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 958 int arm_tune_wbuf = 0;
 959
 960 /* Nonzero if tuning for Cortex-A9.  */
 961 int arm_tune_cortex_a9 = 0;
 962
 963 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 964    preprocessor.
 965    XXX This is a bit of a hack, it's intended to help work around
 966    problems in GLD which doesn't understand that armv5t code is
 967    interworking clean.  */
 968 int arm_cpp_interwork = 0;
 969
 970 /* Nonzero if chip supports Thumb 1.  */
 971 int arm_arch_thumb1;
 972
 973 /* Nonzero if chip supports Thumb 2.  */
 974 int arm_arch_thumb2;
 975
 976 /* Nonzero if chip supports integer division instruction.  */
 977 int arm_arch_arm_hwdiv;
 978 int arm_arch_thumb_hwdiv;
 979
 980 /* Nonzero if chip disallows volatile memory access in IT block.  */
 981 int arm_arch_no_volatile_ce;
 982
 983 /* Nonzero if we shouldn't use literal pools.  */
 984 bool arm_disable_literal_pool = false;
 985
 986 /* The register number to be used for the PIC offset register.  */
 987 unsigned arm_pic_register = INVALID_REGNUM;
 988
 989 enum arm_pcs arm_pcs_default;
 990
 991 /* For an explanation of these variables, see final_prescan_insn below.  */
 992 int arm_ccfsm_state;
 993 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 994 enum arm_cond_code arm_current_cc;
 995
 996 rtx arm_target_insn;
 997 int arm_target_label;
 998 /* The number of conditionally executed insns, including the current insn.  */
 999 int arm_condexec_count = 0;
1000 /* A bitmask specifying the patterns for the IT block.
1001    Zero means do not output an IT block before this insn. */
1002 int arm_condexec_mask = 0;
1003 /* The number of bits used in arm_condexec_mask.  */
1004 int arm_condexec_masklen = 0;
1005
1006 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
1007 int arm_arch_crc = 0;
1008
1009 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
1010 int arm_arch_dotprod = 0;
1011
1012 /* Nonzero if chip supports the ARMv8-M security extensions.  */
1013 int arm_arch_cmse = 0;
1014
1015 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
1016 int arm_m_profile_small_mul = 0;
1017
1018 /* Nonzero if chip supports the AdvSIMD I8MM instructions.  */
1019 int arm_arch_i8mm = 0;
1020
1021 /* Nonzero if chip supports the BFloat16 instructions.  */
1022 int arm_arch_bf16 = 0;
1023
1024 /* Nonzero if chip supports the Custom Datapath Extension.  */
1025 int arm_arch_cde = 0;
1026 int arm_arch_cde_coproc = 0;
1027 const int arm_arch_cde_coproc_bits[] = {
1028   0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1029 };
1030
1031 /* The condition codes of the ARM, and the inverse function.  */
1032 static const char * const arm_condition_codes[] =
1033 {
1034   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1035   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1036 };
1037
1038 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
1039 int arm_regs_in_sequence[] =
1040 {
1041   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1042 };
1043
1044 #define DEF_FP_SYSREG(reg) #reg,
1045 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1046   FP_SYSREGS
1047 };
1048 #undef DEF_FP_SYSREG
1049
1050 #define ARM_LSL_NAME "lsl"
1051 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1052
1053 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1054                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1055                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
1056 \f
1057 /* Initialization code.  */
1058
1059 struct cpu_tune
1060 {
1061   enum processor_type scheduler;
1062   unsigned int tune_flags;
1063   const struct tune_params *tune;
1064 };
1065
1066 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1067 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1068   {                                                             \
1069     num_slots,                                                  \
1070     l1_size,                                                    \
1071     l1_line_size                                                \
1072   }
1073
1074 /* arm generic vectorizer costs.  */
1075 static const
1076 struct cpu_vec_costs arm_default_vec_cost = {
1077   1,                                    /* scalar_stmt_cost.  */
1078   1,                                    /* scalar load_cost.  */
1079   1,                                    /* scalar_store_cost.  */
1080   1,                                    /* vec_stmt_cost.  */
1081   1,                                    /* vec_to_scalar_cost.  */
1082   1,                                    /* scalar_to_vec_cost.  */
1083   1,                                    /* vec_align_load_cost.  */
1084   1,                                    /* vec_unalign_load_cost.  */
1085   1,                                    /* vec_unalign_store_cost.  */
1086   1,                                    /* vec_store_cost.  */
1087   3,                                    /* cond_taken_branch_cost.  */
1088   1,                                    /* cond_not_taken_branch_cost.  */
1089 };
1090
1091 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1092 #include "aarch-cost-tables.h"
1093
1094
1095
1096 const struct cpu_cost_table cortexa9_extra_costs =
1097 {
1098   /* ALU */
1099   {
1100     0,                  /* arith.  */
1101     0,                  /* logical.  */
1102     0,                  /* shift.  */
1103     COSTS_N_INSNS (1),  /* shift_reg.  */
1104     COSTS_N_INSNS (1),  /* arith_shift.  */
1105     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1106     0,                  /* log_shift.  */
1107     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1108     COSTS_N_INSNS (1),  /* extend.  */
1109     COSTS_N_INSNS (2),  /* extend_arith.  */
1110     COSTS_N_INSNS (1),  /* bfi.  */
1111     COSTS_N_INSNS (1),  /* bfx.  */
1112     0,                  /* clz.  */
1113     0,                  /* rev.  */
1114     0,                  /* non_exec.  */
1115     true                /* non_exec_costs_exec.  */
1116   },
1117   {
1118     /* MULT SImode */
1119     {
1120       COSTS_N_INSNS (3),        /* simple.  */
1121       COSTS_N_INSNS (3),        /* flag_setting.  */
1122       COSTS_N_INSNS (2),        /* extend.  */
1123       COSTS_N_INSNS (3),        /* add.  */
1124       COSTS_N_INSNS (2),        /* extend_add.  */
1125       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1126     },
1127     /* MULT DImode */
1128     {
1129       0,                        /* simple (N/A).  */
1130       0,                        /* flag_setting (N/A).  */
1131       COSTS_N_INSNS (4),        /* extend.  */
1132       0,                        /* add (N/A).  */
1133       COSTS_N_INSNS (4),        /* extend_add.  */
1134       0                         /* idiv (N/A).  */
1135     }
1136   },
1137   /* LD/ST */
1138   {
1139     COSTS_N_INSNS (2),  /* load.  */
1140     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1141     COSTS_N_INSNS (2),  /* ldrd.  */
1142     COSTS_N_INSNS (2),  /* ldm_1st.  */
1143     1,                  /* ldm_regs_per_insn_1st.  */
1144     2,                  /* ldm_regs_per_insn_subsequent.  */
1145     COSTS_N_INSNS (5),  /* loadf.  */
1146     COSTS_N_INSNS (5),  /* loadd.  */
1147     COSTS_N_INSNS (1),  /* load_unaligned.  */
1148     COSTS_N_INSNS (2),  /* store.  */
1149     COSTS_N_INSNS (2),  /* strd.  */
1150     COSTS_N_INSNS (2),  /* stm_1st.  */
1151     1,                  /* stm_regs_per_insn_1st.  */
1152     2,                  /* stm_regs_per_insn_subsequent.  */
1153     COSTS_N_INSNS (1),  /* storef.  */
1154     COSTS_N_INSNS (1),  /* stored.  */
1155     COSTS_N_INSNS (1),  /* store_unaligned.  */
1156     COSTS_N_INSNS (1),  /* loadv.  */
1157     COSTS_N_INSNS (1)   /* storev.  */
1158   },
1159   {
1160     /* FP SFmode */
1161     {
1162       COSTS_N_INSNS (14),       /* div.  */
1163       COSTS_N_INSNS (4),        /* mult.  */
1164       COSTS_N_INSNS (7),        /* mult_addsub. */
1165       COSTS_N_INSNS (30),       /* fma.  */
1166       COSTS_N_INSNS (3),        /* addsub.  */
1167       COSTS_N_INSNS (1),        /* fpconst.  */
1168       COSTS_N_INSNS (1),        /* neg.  */
1169       COSTS_N_INSNS (3),        /* compare.  */
1170       COSTS_N_INSNS (3),        /* widen.  */
1171       COSTS_N_INSNS (3),        /* narrow.  */
1172       COSTS_N_INSNS (3),        /* toint.  */
1173       COSTS_N_INSNS (3),        /* fromint.  */
1174       COSTS_N_INSNS (3)         /* roundint.  */
1175     },
1176     /* FP DFmode */
1177     {
1178       COSTS_N_INSNS (24),       /* div.  */
1179       COSTS_N_INSNS (5),        /* mult.  */
1180       COSTS_N_INSNS (8),        /* mult_addsub.  */
1181       COSTS_N_INSNS (30),       /* fma.  */
1182       COSTS_N_INSNS (3),        /* addsub.  */
1183       COSTS_N_INSNS (1),        /* fpconst.  */
1184       COSTS_N_INSNS (1),        /* neg.  */
1185       COSTS_N_INSNS (3),        /* compare.  */
1186       COSTS_N_INSNS (3),        /* widen.  */
1187       COSTS_N_INSNS (3),        /* narrow.  */
1188       COSTS_N_INSNS (3),        /* toint.  */
1189       COSTS_N_INSNS (3),        /* fromint.  */
1190       COSTS_N_INSNS (3)         /* roundint.  */
1191     }
1192   },
1193   /* Vector */
1194   {
1195     COSTS_N_INSNS (1)   /* alu.  */
1196   }
1197 };
1198
1199 const struct cpu_cost_table cortexa8_extra_costs =
1200 {
1201   /* ALU */
1202   {
1203     0,                  /* arith.  */
1204     0,                  /* logical.  */
1205     COSTS_N_INSNS (1),  /* shift.  */
1206     0,                  /* shift_reg.  */
1207     COSTS_N_INSNS (1),  /* arith_shift.  */
1208     0,                  /* arith_shift_reg.  */
1209     COSTS_N_INSNS (1),  /* log_shift.  */
1210     0,                  /* log_shift_reg.  */
1211     0,                  /* extend.  */
1212     0,                  /* extend_arith.  */
1213     0,                  /* bfi.  */
1214     0,                  /* bfx.  */
1215     0,                  /* clz.  */
1216     0,                  /* rev.  */
1217     0,                  /* non_exec.  */
1218     true                /* non_exec_costs_exec.  */
1219   },
1220   {
1221     /* MULT SImode */
1222     {
1223       COSTS_N_INSNS (1),        /* simple.  */
1224       COSTS_N_INSNS (1),        /* flag_setting.  */
1225       COSTS_N_INSNS (1),        /* extend.  */
1226       COSTS_N_INSNS (1),        /* add.  */
1227       COSTS_N_INSNS (1),        /* extend_add.  */
1228       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1229     },
1230     /* MULT DImode */
1231     {
1232       0,                        /* simple (N/A).  */
1233       0,                        /* flag_setting (N/A).  */
1234       COSTS_N_INSNS (2),        /* extend.  */
1235       0,                        /* add (N/A).  */
1236       COSTS_N_INSNS (2),        /* extend_add.  */
1237       0                         /* idiv (N/A).  */
1238     }
1239   },
1240   /* LD/ST */
1241   {
1242     COSTS_N_INSNS (1),  /* load.  */
1243     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1244     COSTS_N_INSNS (1),  /* ldrd.  */
1245     COSTS_N_INSNS (1),  /* ldm_1st.  */
1246     1,                  /* ldm_regs_per_insn_1st.  */
1247     2,                  /* ldm_regs_per_insn_subsequent.  */
1248     COSTS_N_INSNS (1),  /* loadf.  */
1249     COSTS_N_INSNS (1),  /* loadd.  */
1250     COSTS_N_INSNS (1),  /* load_unaligned.  */
1251     COSTS_N_INSNS (1),  /* store.  */
1252     COSTS_N_INSNS (1),  /* strd.  */
1253     COSTS_N_INSNS (1),  /* stm_1st.  */
1254     1,                  /* stm_regs_per_insn_1st.  */
1255     2,                  /* stm_regs_per_insn_subsequent.  */
1256     COSTS_N_INSNS (1),  /* storef.  */
1257     COSTS_N_INSNS (1),  /* stored.  */
1258     COSTS_N_INSNS (1),  /* store_unaligned.  */
1259     COSTS_N_INSNS (1),  /* loadv.  */
1260     COSTS_N_INSNS (1)   /* storev.  */
1261   },
1262   {
1263     /* FP SFmode */
1264     {
1265       COSTS_N_INSNS (36),       /* div.  */
1266       COSTS_N_INSNS (11),       /* mult.  */
1267       COSTS_N_INSNS (20),       /* mult_addsub. */
1268       COSTS_N_INSNS (30),       /* fma.  */
1269       COSTS_N_INSNS (9),        /* addsub.  */
1270       COSTS_N_INSNS (3),        /* fpconst.  */
1271       COSTS_N_INSNS (3),        /* neg.  */
1272       COSTS_N_INSNS (6),        /* compare.  */
1273       COSTS_N_INSNS (4),        /* widen.  */
1274       COSTS_N_INSNS (4),        /* narrow.  */
1275       COSTS_N_INSNS (8),        /* toint.  */
1276       COSTS_N_INSNS (8),        /* fromint.  */
1277       COSTS_N_INSNS (8)         /* roundint.  */
1278     },
1279     /* FP DFmode */
1280     {
1281       COSTS_N_INSNS (64),       /* div.  */
1282       COSTS_N_INSNS (16),       /* mult.  */
1283       COSTS_N_INSNS (25),       /* mult_addsub.  */
1284       COSTS_N_INSNS (30),       /* fma.  */
1285       COSTS_N_INSNS (9),        /* addsub.  */
1286       COSTS_N_INSNS (3),        /* fpconst.  */
1287       COSTS_N_INSNS (3),        /* neg.  */
1288       COSTS_N_INSNS (6),        /* compare.  */
1289       COSTS_N_INSNS (6),        /* widen.  */
1290       COSTS_N_INSNS (6),        /* narrow.  */
1291       COSTS_N_INSNS (8),        /* toint.  */
1292       COSTS_N_INSNS (8),        /* fromint.  */
1293       COSTS_N_INSNS (8)         /* roundint.  */
1294     }
1295   },
1296   /* Vector */
1297   {
1298     COSTS_N_INSNS (1)   /* alu.  */
1299   }
1300 };
1301
1302 const struct cpu_cost_table cortexa5_extra_costs =
1303 {
1304   /* ALU */
1305   {
1306     0,                  /* arith.  */
1307     0,                  /* logical.  */
1308     COSTS_N_INSNS (1),  /* shift.  */
1309     COSTS_N_INSNS (1),  /* shift_reg.  */
1310     COSTS_N_INSNS (1),  /* arith_shift.  */
1311     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1312     COSTS_N_INSNS (1),  /* log_shift.  */
1313     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1314     COSTS_N_INSNS (1),  /* extend.  */
1315     COSTS_N_INSNS (1),  /* extend_arith.  */
1316     COSTS_N_INSNS (1),  /* bfi.  */
1317     COSTS_N_INSNS (1),  /* bfx.  */
1318     COSTS_N_INSNS (1),  /* clz.  */
1319     COSTS_N_INSNS (1),  /* rev.  */
1320     0,                  /* non_exec.  */
1321     true                /* non_exec_costs_exec.  */
1322   },
1323
1324   {
1325     /* MULT SImode */
1326     {
1327       0,                        /* simple.  */
1328       COSTS_N_INSNS (1),        /* flag_setting.  */
1329       COSTS_N_INSNS (1),        /* extend.  */
1330       COSTS_N_INSNS (1),        /* add.  */
1331       COSTS_N_INSNS (1),        /* extend_add.  */
1332       COSTS_N_INSNS (7)         /* idiv.  */
1333     },
1334     /* MULT DImode */
1335     {
1336       0,                        /* simple (N/A).  */
1337       0,                        /* flag_setting (N/A).  */
1338       COSTS_N_INSNS (1),        /* extend.  */
1339       0,                        /* add.  */
1340       COSTS_N_INSNS (2),        /* extend_add.  */
1341       0                         /* idiv (N/A).  */
1342     }
1343   },
1344   /* LD/ST */
1345   {
1346     COSTS_N_INSNS (1),  /* load.  */
1347     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1348     COSTS_N_INSNS (6),  /* ldrd.  */
1349     COSTS_N_INSNS (1),  /* ldm_1st.  */
1350     1,                  /* ldm_regs_per_insn_1st.  */
1351     2,                  /* ldm_regs_per_insn_subsequent.  */
1352     COSTS_N_INSNS (2),  /* loadf.  */
1353     COSTS_N_INSNS (4),  /* loadd.  */
1354     COSTS_N_INSNS (1),  /* load_unaligned.  */
1355     COSTS_N_INSNS (1),  /* store.  */
1356     COSTS_N_INSNS (3),  /* strd.  */
1357     COSTS_N_INSNS (1),  /* stm_1st.  */
1358     1,                  /* stm_regs_per_insn_1st.  */
1359     2,                  /* stm_regs_per_insn_subsequent.  */
1360     COSTS_N_INSNS (2),  /* storef.  */
1361     COSTS_N_INSNS (2),  /* stored.  */
1362     COSTS_N_INSNS (1),  /* store_unaligned.  */
1363     COSTS_N_INSNS (1),  /* loadv.  */
1364     COSTS_N_INSNS (1)   /* storev.  */
1365   },
1366   {
1367     /* FP SFmode */
1368     {
1369       COSTS_N_INSNS (15),       /* div.  */
1370       COSTS_N_INSNS (3),        /* mult.  */
1371       COSTS_N_INSNS (7),        /* mult_addsub. */
1372       COSTS_N_INSNS (7),        /* fma.  */
1373       COSTS_N_INSNS (3),        /* addsub.  */
1374       COSTS_N_INSNS (3),        /* fpconst.  */
1375       COSTS_N_INSNS (3),        /* neg.  */
1376       COSTS_N_INSNS (3),        /* compare.  */
1377       COSTS_N_INSNS (3),        /* widen.  */
1378       COSTS_N_INSNS (3),        /* narrow.  */
1379       COSTS_N_INSNS (3),        /* toint.  */
1380       COSTS_N_INSNS (3),        /* fromint.  */
1381       COSTS_N_INSNS (3)         /* roundint.  */
1382     },
1383     /* FP DFmode */
1384     {
1385       COSTS_N_INSNS (30),       /* div.  */
1386       COSTS_N_INSNS (6),        /* mult.  */
1387       COSTS_N_INSNS (10),       /* mult_addsub.  */
1388       COSTS_N_INSNS (7),        /* fma.  */
1389       COSTS_N_INSNS (3),        /* addsub.  */
1390       COSTS_N_INSNS (3),        /* fpconst.  */
1391       COSTS_N_INSNS (3),        /* neg.  */
1392       COSTS_N_INSNS (3),        /* compare.  */
1393       COSTS_N_INSNS (3),        /* widen.  */
1394       COSTS_N_INSNS (3),        /* narrow.  */
1395       COSTS_N_INSNS (3),        /* toint.  */
1396       COSTS_N_INSNS (3),        /* fromint.  */
1397       COSTS_N_INSNS (3)         /* roundint.  */
1398     }
1399   },
1400   /* Vector */
1401   {
1402     COSTS_N_INSNS (1)   /* alu.  */
1403   }
1404 };
1405
1406
1407 const struct cpu_cost_table cortexa7_extra_costs =
1408 {
1409   /* ALU */
1410   {
1411     0,                  /* arith.  */
1412     0,                  /* logical.  */
1413     COSTS_N_INSNS (1),  /* shift.  */
1414     COSTS_N_INSNS (1),  /* shift_reg.  */
1415     COSTS_N_INSNS (1),  /* arith_shift.  */
1416     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1417     COSTS_N_INSNS (1),  /* log_shift.  */
1418     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1419     COSTS_N_INSNS (1),  /* extend.  */
1420     COSTS_N_INSNS (1),  /* extend_arith.  */
1421     COSTS_N_INSNS (1),  /* bfi.  */
1422     COSTS_N_INSNS (1),  /* bfx.  */
1423     COSTS_N_INSNS (1),  /* clz.  */
1424     COSTS_N_INSNS (1),  /* rev.  */
1425     0,                  /* non_exec.  */
1426     true                /* non_exec_costs_exec.  */
1427   },
1428
1429   {
1430     /* MULT SImode */
1431     {
1432       0,                        /* simple.  */
1433       COSTS_N_INSNS (1),        /* flag_setting.  */
1434       COSTS_N_INSNS (1),        /* extend.  */
1435       COSTS_N_INSNS (1),        /* add.  */
1436       COSTS_N_INSNS (1),        /* extend_add.  */
1437       COSTS_N_INSNS (7)         /* idiv.  */
1438     },
1439     /* MULT DImode */
1440     {
1441       0,                        /* simple (N/A).  */
1442       0,                        /* flag_setting (N/A).  */
1443       COSTS_N_INSNS (1),        /* extend.  */
1444       0,                        /* add.  */
1445       COSTS_N_INSNS (2),        /* extend_add.  */
1446       0                         /* idiv (N/A).  */
1447     }
1448   },
1449   /* LD/ST */
1450   {
1451     COSTS_N_INSNS (1),  /* load.  */
1452     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1453     COSTS_N_INSNS (3),  /* ldrd.  */
1454     COSTS_N_INSNS (1),  /* ldm_1st.  */
1455     1,                  /* ldm_regs_per_insn_1st.  */
1456     2,                  /* ldm_regs_per_insn_subsequent.  */
1457     COSTS_N_INSNS (2),  /* loadf.  */
1458     COSTS_N_INSNS (2),  /* loadd.  */
1459     COSTS_N_INSNS (1),  /* load_unaligned.  */
1460     COSTS_N_INSNS (1),  /* store.  */
1461     COSTS_N_INSNS (3),  /* strd.  */
1462     COSTS_N_INSNS (1),  /* stm_1st.  */
1463     1,                  /* stm_regs_per_insn_1st.  */
1464     2,                  /* stm_regs_per_insn_subsequent.  */
1465     COSTS_N_INSNS (2),  /* storef.  */
1466     COSTS_N_INSNS (2),  /* stored.  */
1467     COSTS_N_INSNS (1),  /* store_unaligned.  */
1468     COSTS_N_INSNS (1),  /* loadv.  */
1469     COSTS_N_INSNS (1)   /* storev.  */
1470   },
1471   {
1472     /* FP SFmode */
1473     {
1474       COSTS_N_INSNS (15),       /* div.  */
1475       COSTS_N_INSNS (3),        /* mult.  */
1476       COSTS_N_INSNS (7),        /* mult_addsub. */
1477       COSTS_N_INSNS (7),        /* fma.  */
1478       COSTS_N_INSNS (3),        /* addsub.  */
1479       COSTS_N_INSNS (3),        /* fpconst.  */
1480       COSTS_N_INSNS (3),        /* neg.  */
1481       COSTS_N_INSNS (3),        /* compare.  */
1482       COSTS_N_INSNS (3),        /* widen.  */
1483       COSTS_N_INSNS (3),        /* narrow.  */
1484       COSTS_N_INSNS (3),        /* toint.  */
1485       COSTS_N_INSNS (3),        /* fromint.  */
1486       COSTS_N_INSNS (3)         /* roundint.  */
1487     },
1488     /* FP DFmode */
1489     {
1490       COSTS_N_INSNS (30),       /* div.  */
1491       COSTS_N_INSNS (6),        /* mult.  */
1492       COSTS_N_INSNS (10),       /* mult_addsub.  */
1493       COSTS_N_INSNS (7),        /* fma.  */
1494       COSTS_N_INSNS (3),        /* addsub.  */
1495       COSTS_N_INSNS (3),        /* fpconst.  */
1496       COSTS_N_INSNS (3),        /* neg.  */
1497       COSTS_N_INSNS (3),        /* compare.  */
1498       COSTS_N_INSNS (3),        /* widen.  */
1499       COSTS_N_INSNS (3),        /* narrow.  */
1500       COSTS_N_INSNS (3),        /* toint.  */
1501       COSTS_N_INSNS (3),        /* fromint.  */
1502       COSTS_N_INSNS (3)         /* roundint.  */
1503     }
1504   },
1505   /* Vector */
1506   {
1507     COSTS_N_INSNS (1)   /* alu.  */
1508   }
1509 };
1510
1511 const struct cpu_cost_table cortexa12_extra_costs =
1512 {
1513   /* ALU */
1514   {
1515     0,                  /* arith.  */
1516     0,                  /* logical.  */
1517     0,                  /* shift.  */
1518     COSTS_N_INSNS (1),  /* shift_reg.  */
1519     COSTS_N_INSNS (1),  /* arith_shift.  */
1520     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1521     COSTS_N_INSNS (1),  /* log_shift.  */
1522     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1523     0,                  /* extend.  */
1524     COSTS_N_INSNS (1),  /* extend_arith.  */
1525     0,                  /* bfi.  */
1526     COSTS_N_INSNS (1),  /* bfx.  */
1527     COSTS_N_INSNS (1),  /* clz.  */
1528     COSTS_N_INSNS (1),  /* rev.  */
1529     0,                  /* non_exec.  */
1530     true                /* non_exec_costs_exec.  */
1531   },
1532   /* MULT SImode */
1533   {
1534     {
1535       COSTS_N_INSNS (2),        /* simple.  */
1536       COSTS_N_INSNS (3),        /* flag_setting.  */
1537       COSTS_N_INSNS (2),        /* extend.  */
1538       COSTS_N_INSNS (3),        /* add.  */
1539       COSTS_N_INSNS (2),        /* extend_add.  */
1540       COSTS_N_INSNS (18)        /* idiv.  */
1541     },
1542     /* MULT DImode */
1543     {
1544       0,                        /* simple (N/A).  */
1545       0,                        /* flag_setting (N/A).  */
1546       COSTS_N_INSNS (3),        /* extend.  */
1547       0,                        /* add (N/A).  */
1548       COSTS_N_INSNS (3),        /* extend_add.  */
1549       0                         /* idiv (N/A).  */
1550     }
1551   },
1552   /* LD/ST */
1553   {
1554     COSTS_N_INSNS (3),  /* load.  */
1555     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1556     COSTS_N_INSNS (3),  /* ldrd.  */
1557     COSTS_N_INSNS (3),  /* ldm_1st.  */
1558     1,                  /* ldm_regs_per_insn_1st.  */
1559     2,                  /* ldm_regs_per_insn_subsequent.  */
1560     COSTS_N_INSNS (3),  /* loadf.  */
1561     COSTS_N_INSNS (3),  /* loadd.  */
1562     0,                  /* load_unaligned.  */
1563     0,                  /* store.  */
1564     0,                  /* strd.  */
1565     0,                  /* stm_1st.  */
1566     1,                  /* stm_regs_per_insn_1st.  */
1567     2,                  /* stm_regs_per_insn_subsequent.  */
1568     COSTS_N_INSNS (2),  /* storef.  */
1569     COSTS_N_INSNS (2),  /* stored.  */
1570     0,                  /* store_unaligned.  */
1571     COSTS_N_INSNS (1),  /* loadv.  */
1572     COSTS_N_INSNS (1)   /* storev.  */
1573   },
1574   {
1575     /* FP SFmode */
1576     {
1577       COSTS_N_INSNS (17),       /* div.  */
1578       COSTS_N_INSNS (4),        /* mult.  */
1579       COSTS_N_INSNS (8),        /* mult_addsub. */
1580       COSTS_N_INSNS (8),        /* fma.  */
1581       COSTS_N_INSNS (4),        /* addsub.  */
1582       COSTS_N_INSNS (2),        /* fpconst. */
1583       COSTS_N_INSNS (2),        /* neg.  */
1584       COSTS_N_INSNS (2),        /* compare.  */
1585       COSTS_N_INSNS (4),        /* widen.  */
1586       COSTS_N_INSNS (4),        /* narrow.  */
1587       COSTS_N_INSNS (4),        /* toint.  */
1588       COSTS_N_INSNS (4),        /* fromint.  */
1589       COSTS_N_INSNS (4)         /* roundint.  */
1590     },
1591     /* FP DFmode */
1592     {
1593       COSTS_N_INSNS (31),       /* div.  */
1594       COSTS_N_INSNS (4),        /* mult.  */
1595       COSTS_N_INSNS (8),        /* mult_addsub.  */
1596       COSTS_N_INSNS (8),        /* fma.  */
1597       COSTS_N_INSNS (4),        /* addsub.  */
1598       COSTS_N_INSNS (2),        /* fpconst.  */
1599       COSTS_N_INSNS (2),        /* neg.  */
1600       COSTS_N_INSNS (2),        /* compare.  */
1601       COSTS_N_INSNS (4),        /* widen.  */
1602       COSTS_N_INSNS (4),        /* narrow.  */
1603       COSTS_N_INSNS (4),        /* toint.  */
1604       COSTS_N_INSNS (4),        /* fromint.  */
1605       COSTS_N_INSNS (4)         /* roundint.  */
1606     }
1607   },
1608   /* Vector */
1609   {
1610     COSTS_N_INSNS (1)   /* alu.  */
1611   }
1612 };
1613
1614 const struct cpu_cost_table cortexa15_extra_costs =
1615 {
1616   /* ALU */
1617   {
1618     0,                  /* arith.  */
1619     0,                  /* logical.  */
1620     0,                  /* shift.  */
1621     0,                  /* shift_reg.  */
1622     COSTS_N_INSNS (1),  /* arith_shift.  */
1623     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1624     COSTS_N_INSNS (1),  /* log_shift.  */
1625     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1626     0,                  /* extend.  */
1627     COSTS_N_INSNS (1),  /* extend_arith.  */
1628     COSTS_N_INSNS (1),  /* bfi.  */
1629     0,                  /* bfx.  */
1630     0,                  /* clz.  */
1631     0,                  /* rev.  */
1632     0,                  /* non_exec.  */
1633     true                /* non_exec_costs_exec.  */
1634   },
1635   /* MULT SImode */
1636   {
1637     {
1638       COSTS_N_INSNS (2),        /* simple.  */
1639       COSTS_N_INSNS (3),        /* flag_setting.  */
1640       COSTS_N_INSNS (2),        /* extend.  */
1641       COSTS_N_INSNS (2),        /* add.  */
1642       COSTS_N_INSNS (2),        /* extend_add.  */
1643       COSTS_N_INSNS (18)        /* idiv.  */
1644     },
1645     /* MULT DImode */
1646     {
1647       0,                        /* simple (N/A).  */
1648       0,                        /* flag_setting (N/A).  */
1649       COSTS_N_INSNS (3),        /* extend.  */
1650       0,                        /* add (N/A).  */
1651       COSTS_N_INSNS (3),        /* extend_add.  */
1652       0                         /* idiv (N/A).  */
1653     }
1654   },
1655   /* LD/ST */
1656   {
1657     COSTS_N_INSNS (3),  /* load.  */
1658     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1659     COSTS_N_INSNS (3),  /* ldrd.  */
1660     COSTS_N_INSNS (4),  /* ldm_1st.  */
1661     1,                  /* ldm_regs_per_insn_1st.  */
1662     2,                  /* ldm_regs_per_insn_subsequent.  */
1663     COSTS_N_INSNS (4),  /* loadf.  */
1664     COSTS_N_INSNS (4),  /* loadd.  */
1665     0,                  /* load_unaligned.  */
1666     0,                  /* store.  */
1667     0,                  /* strd.  */
1668     COSTS_N_INSNS (1),  /* stm_1st.  */
1669     1,                  /* stm_regs_per_insn_1st.  */
1670     2,                  /* stm_regs_per_insn_subsequent.  */
1671     0,                  /* storef.  */
1672     0,                  /* stored.  */
1673     0,                  /* store_unaligned.  */
1674     COSTS_N_INSNS (1),  /* loadv.  */
1675     COSTS_N_INSNS (1)   /* storev.  */
1676   },
1677   {
1678     /* FP SFmode */
1679     {
1680       COSTS_N_INSNS (17),       /* div.  */
1681       COSTS_N_INSNS (4),        /* mult.  */
1682       COSTS_N_INSNS (8),        /* mult_addsub. */
1683       COSTS_N_INSNS (8),        /* fma.  */
1684       COSTS_N_INSNS (4),        /* addsub.  */
1685       COSTS_N_INSNS (2),        /* fpconst. */
1686       COSTS_N_INSNS (2),        /* neg.  */
1687       COSTS_N_INSNS (5),        /* compare.  */
1688       COSTS_N_INSNS (4),        /* widen.  */
1689       COSTS_N_INSNS (4),        /* narrow.  */
1690       COSTS_N_INSNS (4),        /* toint.  */
1691       COSTS_N_INSNS (4),        /* fromint.  */
1692       COSTS_N_INSNS (4)         /* roundint.  */
1693     },
1694     /* FP DFmode */
1695     {
1696       COSTS_N_INSNS (31),       /* div.  */
1697       COSTS_N_INSNS (4),        /* mult.  */
1698       COSTS_N_INSNS (8),        /* mult_addsub.  */
1699       COSTS_N_INSNS (8),        /* fma.  */
1700       COSTS_N_INSNS (4),        /* addsub.  */
1701       COSTS_N_INSNS (2),        /* fpconst.  */
1702       COSTS_N_INSNS (2),        /* neg.  */
1703       COSTS_N_INSNS (2),        /* compare.  */
1704       COSTS_N_INSNS (4),        /* widen.  */
1705       COSTS_N_INSNS (4),        /* narrow.  */
1706       COSTS_N_INSNS (4),        /* toint.  */
1707       COSTS_N_INSNS (4),        /* fromint.  */
1708       COSTS_N_INSNS (4)         /* roundint.  */
1709     }
1710   },
1711   /* Vector */
1712   {
1713     COSTS_N_INSNS (1)   /* alu.  */
1714   }
1715 };
1716
1717 const struct cpu_cost_table v7m_extra_costs =
1718 {
1719   /* ALU */
1720   {
1721     0,                  /* arith.  */
1722     0,                  /* logical.  */
1723     0,                  /* shift.  */
1724     0,                  /* shift_reg.  */
1725     0,                  /* arith_shift.  */
1726     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1727     0,                  /* log_shift.  */
1728     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1729     0,                  /* extend.  */
1730     COSTS_N_INSNS (1),  /* extend_arith.  */
1731     0,                  /* bfi.  */
1732     0,                  /* bfx.  */
1733     0,                  /* clz.  */
1734     0,                  /* rev.  */
1735     COSTS_N_INSNS (1),  /* non_exec.  */
1736     false               /* non_exec_costs_exec.  */
1737   },
1738   {
1739     /* MULT SImode */
1740     {
1741       COSTS_N_INSNS (1),        /* simple.  */
1742       COSTS_N_INSNS (1),        /* flag_setting.  */
1743       COSTS_N_INSNS (2),        /* extend.  */
1744       COSTS_N_INSNS (1),        /* add.  */
1745       COSTS_N_INSNS (3),        /* extend_add.  */
1746       COSTS_N_INSNS (8)         /* idiv.  */
1747     },
1748     /* MULT DImode */
1749     {
1750       0,                        /* simple (N/A).  */
1751       0,                        /* flag_setting (N/A).  */
1752       COSTS_N_INSNS (2),        /* extend.  */
1753       0,                        /* add (N/A).  */
1754       COSTS_N_INSNS (3),        /* extend_add.  */
1755       0                         /* idiv (N/A).  */
1756     }
1757   },
1758   /* LD/ST */
1759   {
1760     COSTS_N_INSNS (2),  /* load.  */
1761     0,                  /* load_sign_extend.  */
1762     COSTS_N_INSNS (3),  /* ldrd.  */
1763     COSTS_N_INSNS (2),  /* ldm_1st.  */
1764     1,                  /* ldm_regs_per_insn_1st.  */
1765     1,                  /* ldm_regs_per_insn_subsequent.  */
1766     COSTS_N_INSNS (2),  /* loadf.  */
1767     COSTS_N_INSNS (3),  /* loadd.  */
1768     COSTS_N_INSNS (1),  /* load_unaligned.  */
1769     COSTS_N_INSNS (2),  /* store.  */
1770     COSTS_N_INSNS (3),  /* strd.  */
1771     COSTS_N_INSNS (2),  /* stm_1st.  */
1772     1,                  /* stm_regs_per_insn_1st.  */
1773     1,                  /* stm_regs_per_insn_subsequent.  */
1774     COSTS_N_INSNS (2),  /* storef.  */
1775     COSTS_N_INSNS (3),  /* stored.  */
1776     COSTS_N_INSNS (1),  /* store_unaligned.  */
1777     COSTS_N_INSNS (1),  /* loadv.  */
1778     COSTS_N_INSNS (1)   /* storev.  */
1779   },
1780   {
1781     /* FP SFmode */
1782     {
1783       COSTS_N_INSNS (7),        /* div.  */
1784       COSTS_N_INSNS (2),        /* mult.  */
1785       COSTS_N_INSNS (5),        /* mult_addsub.  */
1786       COSTS_N_INSNS (3),        /* fma.  */
1787       COSTS_N_INSNS (1),        /* addsub.  */
1788       0,                        /* fpconst.  */
1789       0,                        /* neg.  */
1790       0,                        /* compare.  */
1791       0,                        /* widen.  */
1792       0,                        /* narrow.  */
1793       0,                        /* toint.  */
1794       0,                        /* fromint.  */
1795       0                         /* roundint.  */
1796     },
1797     /* FP DFmode */
1798     {
1799       COSTS_N_INSNS (15),       /* div.  */
1800       COSTS_N_INSNS (5),        /* mult.  */
1801       COSTS_N_INSNS (7),        /* mult_addsub.  */
1802       COSTS_N_INSNS (7),        /* fma.  */
1803       COSTS_N_INSNS (3),        /* addsub.  */
1804       0,                        /* fpconst.  */
1805       0,                        /* neg.  */
1806       0,                        /* compare.  */
1807       0,                        /* widen.  */
1808       0,                        /* narrow.  */
1809       0,                        /* toint.  */
1810       0,                        /* fromint.  */
1811       0                         /* roundint.  */
1812     }
1813   },
1814   /* Vector */
1815   {
1816     COSTS_N_INSNS (1)   /* alu.  */
1817   }
1818 };
1819
1820 const struct addr_mode_cost_table generic_addr_mode_costs =
1821 {
1822   /* int.  */
1823   {
1824     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1825     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1826     COSTS_N_INSNS (0)   /* AMO_WB.  */
1827   },
1828   /* float.  */
1829   {
1830     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1831     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1832     COSTS_N_INSNS (0)   /* AMO_WB.  */
1833   },
1834   /* vector.  */
1835   {
1836     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1837     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1838     COSTS_N_INSNS (0)   /* AMO_WB.  */
1839   }
1840 };
1841
1842 const struct tune_params arm_slowmul_tune =
1843 {
1844   &generic_extra_costs,                 /* Insn extra costs.  */
1845   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1846   NULL,                                 /* Sched adj cost.  */
1847   arm_default_branch_cost,
1848   &arm_default_vec_cost,
1849   3,                                            /* Constant limit.  */
1850   5,                                            /* Max cond insns.  */
1851   8,                                            /* Memset max inline.  */
1852   1,                                            /* Issue rate.  */
1853   ARM_PREFETCH_NOT_BENEFICIAL,
1854   tune_params::PREF_CONST_POOL_TRUE,
1855   tune_params::PREF_LDRD_FALSE,
1856   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1857   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1858   tune_params::DISPARAGE_FLAGS_NEITHER,
1859   tune_params::PREF_NEON_STRINGOPS_FALSE,
1860   tune_params::FUSE_NOTHING,
1861   tune_params::SCHED_AUTOPREF_OFF
1862 };
1863
1864 const struct tune_params arm_fastmul_tune =
1865 {
1866   &generic_extra_costs,                 /* Insn extra costs.  */
1867   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1868   NULL,                                 /* Sched adj cost.  */
1869   arm_default_branch_cost,
1870   &arm_default_vec_cost,
1871   1,                                            /* Constant limit.  */
1872   5,                                            /* Max cond insns.  */
1873   8,                                            /* Memset max inline.  */
1874   1,                                            /* Issue rate.  */
1875   ARM_PREFETCH_NOT_BENEFICIAL,
1876   tune_params::PREF_CONST_POOL_TRUE,
1877   tune_params::PREF_LDRD_FALSE,
1878   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1879   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1880   tune_params::DISPARAGE_FLAGS_NEITHER,
1881   tune_params::PREF_NEON_STRINGOPS_FALSE,
1882   tune_params::FUSE_NOTHING,
1883   tune_params::SCHED_AUTOPREF_OFF
1884 };
1885
1886 /* StrongARM has early execution of branches, so a sequence that is worth
1887    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1888
1889 const struct tune_params arm_strongarm_tune =
1890 {
1891   &generic_extra_costs,                 /* Insn extra costs.  */
1892   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1893   NULL,                                 /* Sched adj cost.  */
1894   arm_default_branch_cost,
1895   &arm_default_vec_cost,
1896   1,                                            /* Constant limit.  */
1897   3,                                            /* Max cond insns.  */
1898   8,                                            /* Memset max inline.  */
1899   1,                                            /* Issue rate.  */
1900   ARM_PREFETCH_NOT_BENEFICIAL,
1901   tune_params::PREF_CONST_POOL_TRUE,
1902   tune_params::PREF_LDRD_FALSE,
1903   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1904   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1905   tune_params::DISPARAGE_FLAGS_NEITHER,
1906   tune_params::PREF_NEON_STRINGOPS_FALSE,
1907   tune_params::FUSE_NOTHING,
1908   tune_params::SCHED_AUTOPREF_OFF
1909 };
1910
1911 const struct tune_params arm_xscale_tune =
1912 {
1913   &generic_extra_costs,                 /* Insn extra costs.  */
1914   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1915   xscale_sched_adjust_cost,
1916   arm_default_branch_cost,
1917   &arm_default_vec_cost,
1918   2,                                            /* Constant limit.  */
1919   3,                                            /* Max cond insns.  */
1920   8,                                            /* Memset max inline.  */
1921   1,                                            /* Issue rate.  */
1922   ARM_PREFETCH_NOT_BENEFICIAL,
1923   tune_params::PREF_CONST_POOL_TRUE,
1924   tune_params::PREF_LDRD_FALSE,
1925   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1926   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1927   tune_params::DISPARAGE_FLAGS_NEITHER,
1928   tune_params::PREF_NEON_STRINGOPS_FALSE,
1929   tune_params::FUSE_NOTHING,
1930   tune_params::SCHED_AUTOPREF_OFF
1931 };
1932
1933 const struct tune_params arm_9e_tune =
1934 {
1935   &generic_extra_costs,                 /* Insn extra costs.  */
1936   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1937   NULL,                                 /* Sched adj cost.  */
1938   arm_default_branch_cost,
1939   &arm_default_vec_cost,
1940   1,                                            /* Constant limit.  */
1941   5,                                            /* Max cond insns.  */
1942   8,                                            /* Memset max inline.  */
1943   1,                                            /* Issue rate.  */
1944   ARM_PREFETCH_NOT_BENEFICIAL,
1945   tune_params::PREF_CONST_POOL_TRUE,
1946   tune_params::PREF_LDRD_FALSE,
1947   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1948   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1949   tune_params::DISPARAGE_FLAGS_NEITHER,
1950   tune_params::PREF_NEON_STRINGOPS_FALSE,
1951   tune_params::FUSE_NOTHING,
1952   tune_params::SCHED_AUTOPREF_OFF
1953 };
1954
1955 const struct tune_params arm_marvell_pj4_tune =
1956 {
1957   &generic_extra_costs,                 /* Insn extra costs.  */
1958   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1959   NULL,                                 /* Sched adj cost.  */
1960   arm_default_branch_cost,
1961   &arm_default_vec_cost,
1962   1,                                            /* Constant limit.  */
1963   5,                                            /* Max cond insns.  */
1964   8,                                            /* Memset max inline.  */
1965   2,                                            /* Issue rate.  */
1966   ARM_PREFETCH_NOT_BENEFICIAL,
1967   tune_params::PREF_CONST_POOL_TRUE,
1968   tune_params::PREF_LDRD_FALSE,
1969   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1970   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1971   tune_params::DISPARAGE_FLAGS_NEITHER,
1972   tune_params::PREF_NEON_STRINGOPS_FALSE,
1973   tune_params::FUSE_NOTHING,
1974   tune_params::SCHED_AUTOPREF_OFF
1975 };
1976
1977 const struct tune_params arm_v6t2_tune =
1978 {
1979   &generic_extra_costs,                 /* Insn extra costs.  */
1980   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1981   NULL,                                 /* Sched adj cost.  */
1982   arm_default_branch_cost,
1983   &arm_default_vec_cost,
1984   1,                                            /* Constant limit.  */
1985   5,                                            /* Max cond insns.  */
1986   8,                                            /* Memset max inline.  */
1987   1,                                            /* Issue rate.  */
1988   ARM_PREFETCH_NOT_BENEFICIAL,
1989   tune_params::PREF_CONST_POOL_FALSE,
1990   tune_params::PREF_LDRD_FALSE,
1991   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1992   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1993   tune_params::DISPARAGE_FLAGS_NEITHER,
1994   tune_params::PREF_NEON_STRINGOPS_FALSE,
1995   tune_params::FUSE_NOTHING,
1996   tune_params::SCHED_AUTOPREF_OFF
1997 };
1998
1999
2000 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
2001 const struct tune_params arm_cortex_tune =
2002 {
2003   &generic_extra_costs,
2004   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2005   NULL,                                 /* Sched adj cost.  */
2006   arm_default_branch_cost,
2007   &arm_default_vec_cost,
2008   1,                                            /* Constant limit.  */
2009   5,                                            /* Max cond insns.  */
2010   8,                                            /* Memset max inline.  */
2011   2,                                            /* Issue rate.  */
2012   ARM_PREFETCH_NOT_BENEFICIAL,
2013   tune_params::PREF_CONST_POOL_FALSE,
2014   tune_params::PREF_LDRD_FALSE,
2015   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2016   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2017   tune_params::DISPARAGE_FLAGS_NEITHER,
2018   tune_params::PREF_NEON_STRINGOPS_FALSE,
2019   tune_params::FUSE_NOTHING,
2020   tune_params::SCHED_AUTOPREF_OFF
2021 };
2022
2023 const struct tune_params arm_cortex_a8_tune =
2024 {
2025   &cortexa8_extra_costs,
2026   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2027   NULL,                                 /* Sched adj cost.  */
2028   arm_default_branch_cost,
2029   &arm_default_vec_cost,
2030   1,                                            /* Constant limit.  */
2031   5,                                            /* Max cond insns.  */
2032   8,                                            /* Memset max inline.  */
2033   2,                                            /* Issue rate.  */
2034   ARM_PREFETCH_NOT_BENEFICIAL,
2035   tune_params::PREF_CONST_POOL_FALSE,
2036   tune_params::PREF_LDRD_FALSE,
2037   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2038   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2039   tune_params::DISPARAGE_FLAGS_NEITHER,
2040   tune_params::PREF_NEON_STRINGOPS_TRUE,
2041   tune_params::FUSE_NOTHING,
2042   tune_params::SCHED_AUTOPREF_OFF
2043 };
2044
2045 const struct tune_params arm_cortex_a7_tune =
2046 {
2047   &cortexa7_extra_costs,
2048   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2049   NULL,                                 /* Sched adj cost.  */
2050   arm_default_branch_cost,
2051   &arm_default_vec_cost,
2052   1,                                            /* Constant limit.  */
2053   5,                                            /* Max cond insns.  */
2054   8,                                            /* Memset max inline.  */
2055   2,                                            /* Issue rate.  */
2056   ARM_PREFETCH_NOT_BENEFICIAL,
2057   tune_params::PREF_CONST_POOL_FALSE,
2058   tune_params::PREF_LDRD_FALSE,
2059   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2060   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2061   tune_params::DISPARAGE_FLAGS_NEITHER,
2062   tune_params::PREF_NEON_STRINGOPS_TRUE,
2063   tune_params::FUSE_NOTHING,
2064   tune_params::SCHED_AUTOPREF_OFF
2065 };
2066
2067 const struct tune_params arm_cortex_a15_tune =
2068 {
2069   &cortexa15_extra_costs,
2070   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2071   NULL,                                 /* Sched adj cost.  */
2072   arm_default_branch_cost,
2073   &arm_default_vec_cost,
2074   1,                                            /* Constant limit.  */
2075   2,                                            /* Max cond insns.  */
2076   8,                                            /* Memset max inline.  */
2077   3,                                            /* Issue rate.  */
2078   ARM_PREFETCH_NOT_BENEFICIAL,
2079   tune_params::PREF_CONST_POOL_FALSE,
2080   tune_params::PREF_LDRD_TRUE,
2081   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2082   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2083   tune_params::DISPARAGE_FLAGS_ALL,
2084   tune_params::PREF_NEON_STRINGOPS_TRUE,
2085   tune_params::FUSE_NOTHING,
2086   tune_params::SCHED_AUTOPREF_FULL
2087 };
2088
2089 const struct tune_params arm_cortex_a35_tune =
2090 {
2091   &cortexa53_extra_costs,
2092   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2093   NULL,                                 /* Sched adj cost.  */
2094   arm_default_branch_cost,
2095   &arm_default_vec_cost,
2096   1,                                            /* Constant limit.  */
2097   5,                                            /* Max cond insns.  */
2098   8,                                            /* Memset max inline.  */
2099   1,                                            /* Issue rate.  */
2100   ARM_PREFETCH_NOT_BENEFICIAL,
2101   tune_params::PREF_CONST_POOL_FALSE,
2102   tune_params::PREF_LDRD_FALSE,
2103   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2104   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2105   tune_params::DISPARAGE_FLAGS_NEITHER,
2106   tune_params::PREF_NEON_STRINGOPS_TRUE,
2107   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2108   tune_params::SCHED_AUTOPREF_OFF
2109 };
2110
2111 const struct tune_params arm_cortex_a53_tune =
2112 {
2113   &cortexa53_extra_costs,
2114   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2115   NULL,                                 /* Sched adj cost.  */
2116   arm_default_branch_cost,
2117   &arm_default_vec_cost,
2118   1,                                            /* Constant limit.  */
2119   5,                                            /* Max cond insns.  */
2120   8,                                            /* Memset max inline.  */
2121   2,                                            /* Issue rate.  */
2122   ARM_PREFETCH_NOT_BENEFICIAL,
2123   tune_params::PREF_CONST_POOL_FALSE,
2124   tune_params::PREF_LDRD_FALSE,
2125   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2126   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2127   tune_params::DISPARAGE_FLAGS_NEITHER,
2128   tune_params::PREF_NEON_STRINGOPS_TRUE,
2129   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2130   tune_params::SCHED_AUTOPREF_OFF
2131 };
2132
2133 const struct tune_params arm_cortex_a57_tune =
2134 {
2135   &cortexa57_extra_costs,
2136   &generic_addr_mode_costs,             /* addressing mode costs */
2137   NULL,                                 /* Sched adj cost.  */
2138   arm_default_branch_cost,
2139   &arm_default_vec_cost,
2140   1,                                            /* Constant limit.  */
2141   2,                                            /* Max cond insns.  */
2142   8,                                            /* Memset max inline.  */
2143   3,                                            /* Issue rate.  */
2144   ARM_PREFETCH_NOT_BENEFICIAL,
2145   tune_params::PREF_CONST_POOL_FALSE,
2146   tune_params::PREF_LDRD_TRUE,
2147   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2148   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2149   tune_params::DISPARAGE_FLAGS_ALL,
2150   tune_params::PREF_NEON_STRINGOPS_TRUE,
2151   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2152   tune_params::SCHED_AUTOPREF_FULL
2153 };
2154
2155 const struct tune_params arm_exynosm1_tune =
2156 {
2157   &exynosm1_extra_costs,
2158   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2159   NULL,                                         /* Sched adj cost.  */
2160   arm_default_branch_cost,
2161   &arm_default_vec_cost,
2162   1,                                            /* Constant limit.  */
2163   2,                                            /* Max cond insns.  */
2164   8,                                            /* Memset max inline.  */
2165   3,                                            /* Issue rate.  */
2166   ARM_PREFETCH_NOT_BENEFICIAL,
2167   tune_params::PREF_CONST_POOL_FALSE,
2168   tune_params::PREF_LDRD_TRUE,
2169   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2170   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2171   tune_params::DISPARAGE_FLAGS_ALL,
2172   tune_params::PREF_NEON_STRINGOPS_TRUE,
2173   tune_params::FUSE_NOTHING,
2174   tune_params::SCHED_AUTOPREF_OFF
2175 };
2176
2177 const struct tune_params arm_xgene1_tune =
2178 {
2179   &xgene1_extra_costs,
2180   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2181   NULL,                                 /* Sched adj cost.  */
2182   arm_default_branch_cost,
2183   &arm_default_vec_cost,
2184   1,                                            /* Constant limit.  */
2185   2,                                            /* Max cond insns.  */
2186   32,                                           /* Memset max inline.  */
2187   4,                                            /* Issue rate.  */
2188   ARM_PREFETCH_NOT_BENEFICIAL,
2189   tune_params::PREF_CONST_POOL_FALSE,
2190   tune_params::PREF_LDRD_TRUE,
2191   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2192   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2193   tune_params::DISPARAGE_FLAGS_ALL,
2194   tune_params::PREF_NEON_STRINGOPS_FALSE,
2195   tune_params::FUSE_NOTHING,
2196   tune_params::SCHED_AUTOPREF_OFF
2197 };
2198
2199 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2200    less appealing.  Set max_insns_skipped to a low value.  */
2201
2202 const struct tune_params arm_cortex_a5_tune =
2203 {
2204   &cortexa5_extra_costs,
2205   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2206   NULL,                                 /* Sched adj cost.  */
2207   arm_cortex_a5_branch_cost,
2208   &arm_default_vec_cost,
2209   1,                                            /* Constant limit.  */
2210   1,                                            /* Max cond insns.  */
2211   8,                                            /* Memset max inline.  */
2212   2,                                            /* Issue rate.  */
2213   ARM_PREFETCH_NOT_BENEFICIAL,
2214   tune_params::PREF_CONST_POOL_FALSE,
2215   tune_params::PREF_LDRD_FALSE,
2216   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2217   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2218   tune_params::DISPARAGE_FLAGS_NEITHER,
2219   tune_params::PREF_NEON_STRINGOPS_TRUE,
2220   tune_params::FUSE_NOTHING,
2221   tune_params::SCHED_AUTOPREF_OFF
2222 };
2223
2224 const struct tune_params arm_cortex_a9_tune =
2225 {
2226   &cortexa9_extra_costs,
2227   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2228   cortex_a9_sched_adjust_cost,
2229   arm_default_branch_cost,
2230   &arm_default_vec_cost,
2231   1,                                            /* Constant limit.  */
2232   5,                                            /* Max cond insns.  */
2233   8,                                            /* Memset max inline.  */
2234   2,                                            /* Issue rate.  */
2235   ARM_PREFETCH_BENEFICIAL(4,32,32),
2236   tune_params::PREF_CONST_POOL_FALSE,
2237   tune_params::PREF_LDRD_FALSE,
2238   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2239   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2240   tune_params::DISPARAGE_FLAGS_NEITHER,
2241   tune_params::PREF_NEON_STRINGOPS_FALSE,
2242   tune_params::FUSE_NOTHING,
2243   tune_params::SCHED_AUTOPREF_OFF
2244 };
2245
2246 const struct tune_params arm_cortex_a12_tune =
2247 {
2248   &cortexa12_extra_costs,
2249   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2250   NULL,                                 /* Sched adj cost.  */
2251   arm_default_branch_cost,
2252   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2253   1,                                            /* Constant limit.  */
2254   2,                                            /* Max cond insns.  */
2255   8,                                            /* Memset max inline.  */
2256   2,                                            /* Issue rate.  */
2257   ARM_PREFETCH_NOT_BENEFICIAL,
2258   tune_params::PREF_CONST_POOL_FALSE,
2259   tune_params::PREF_LDRD_TRUE,
2260   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2261   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2262   tune_params::DISPARAGE_FLAGS_ALL,
2263   tune_params::PREF_NEON_STRINGOPS_TRUE,
2264   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2265   tune_params::SCHED_AUTOPREF_OFF
2266 };
2267
2268 const struct tune_params arm_cortex_a73_tune =
2269 {
2270   &cortexa57_extra_costs,
2271   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2272   NULL,                                         /* Sched adj cost.  */
2273   arm_default_branch_cost,
2274   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2275   1,                                            /* Constant limit.  */
2276   2,                                            /* Max cond insns.  */
2277   8,                                            /* Memset max inline.  */
2278   2,                                            /* Issue rate.  */
2279   ARM_PREFETCH_NOT_BENEFICIAL,
2280   tune_params::PREF_CONST_POOL_FALSE,
2281   tune_params::PREF_LDRD_TRUE,
2282   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2283   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2284   tune_params::DISPARAGE_FLAGS_ALL,
2285   tune_params::PREF_NEON_STRINGOPS_TRUE,
2286   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2287   tune_params::SCHED_AUTOPREF_FULL
2288 };
2289
2290 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2291    cycle to execute each.  An LDR from the constant pool also takes two cycles
2292    to execute, but mildly increases pipelining opportunity (consecutive
2293    loads/stores can be pipelined together, saving one cycle), and may also
2294    improve icache utilisation.  Hence we prefer the constant pool for such
2295    processors.  */
2296
2297 const struct tune_params arm_v7m_tune =
2298 {
2299   &v7m_extra_costs,
2300   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2301   NULL,                                 /* Sched adj cost.  */
2302   arm_cortex_m_branch_cost,
2303   &arm_default_vec_cost,
2304   1,                                            /* Constant limit.  */
2305   2,                                            /* Max cond insns.  */
2306   8,                                            /* Memset max inline.  */
2307   1,                                            /* Issue rate.  */
2308   ARM_PREFETCH_NOT_BENEFICIAL,
2309   tune_params::PREF_CONST_POOL_TRUE,
2310   tune_params::PREF_LDRD_FALSE,
2311   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2312   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2313   tune_params::DISPARAGE_FLAGS_NEITHER,
2314   tune_params::PREF_NEON_STRINGOPS_FALSE,
2315   tune_params::FUSE_NOTHING,
2316   tune_params::SCHED_AUTOPREF_OFF
2317 };
2318
2319 /* Cortex-M7 tuning.  */
2320
2321 const struct tune_params arm_cortex_m7_tune =
2322 {
2323   &v7m_extra_costs,
2324   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2325   NULL,                                 /* Sched adj cost.  */
2326   arm_cortex_m7_branch_cost,
2327   &arm_default_vec_cost,
2328   0,                                            /* Constant limit.  */
2329   1,                                            /* Max cond insns.  */
2330   8,                                            /* Memset max inline.  */
2331   2,                                            /* Issue rate.  */
2332   ARM_PREFETCH_NOT_BENEFICIAL,
2333   tune_params::PREF_CONST_POOL_TRUE,
2334   tune_params::PREF_LDRD_FALSE,
2335   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2336   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2337   tune_params::DISPARAGE_FLAGS_NEITHER,
2338   tune_params::PREF_NEON_STRINGOPS_FALSE,
2339   tune_params::FUSE_NOTHING,
2340   tune_params::SCHED_AUTOPREF_OFF
2341 };
2342
2343 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2344    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2345    cortex-m23.  */
2346 const struct tune_params arm_v6m_tune =
2347 {
2348   &generic_extra_costs,                 /* Insn extra costs.  */
2349   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2350   NULL,                                 /* Sched adj cost.  */
2351   arm_default_branch_cost,
2352   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2353   1,                                            /* Constant limit.  */
2354   5,                                            /* Max cond insns.  */
2355   8,                                            /* Memset max inline.  */
2356   1,                                            /* Issue rate.  */
2357   ARM_PREFETCH_NOT_BENEFICIAL,
2358   tune_params::PREF_CONST_POOL_FALSE,
2359   tune_params::PREF_LDRD_FALSE,
2360   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2361   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2362   tune_params::DISPARAGE_FLAGS_NEITHER,
2363   tune_params::PREF_NEON_STRINGOPS_FALSE,
2364   tune_params::FUSE_NOTHING,
2365   tune_params::SCHED_AUTOPREF_OFF
2366 };
2367
2368 const struct tune_params arm_fa726te_tune =
2369 {
2370   &generic_extra_costs,                         /* Insn extra costs.  */
2371   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2372   fa726te_sched_adjust_cost,
2373   arm_default_branch_cost,
2374   &arm_default_vec_cost,
2375   1,                                            /* Constant limit.  */
2376   5,                                            /* Max cond insns.  */
2377   8,                                            /* Memset max inline.  */
2378   2,                                            /* Issue rate.  */
2379   ARM_PREFETCH_NOT_BENEFICIAL,
2380   tune_params::PREF_CONST_POOL_TRUE,
2381   tune_params::PREF_LDRD_FALSE,
2382   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2383   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2384   tune_params::DISPARAGE_FLAGS_NEITHER,
2385   tune_params::PREF_NEON_STRINGOPS_FALSE,
2386   tune_params::FUSE_NOTHING,
2387   tune_params::SCHED_AUTOPREF_OFF
2388 };
2389
2390 /* Auto-generated CPU, FPU and architecture tables.  */
2391 #include "arm-cpu-data.h"
2392
2393 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2394    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2395    is thus chosen to be big enough to hold the longest architecture name.  */
2396
2397 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2398
2399 /* Supported TLS relocations.  */
2400
2401 enum tls_reloc {
2402   TLS_GD32,
2403   TLS_GD32_FDPIC,
2404   TLS_LDM32,
2405   TLS_LDM32_FDPIC,
2406   TLS_LDO32,
2407   TLS_IE32,
2408   TLS_IE32_FDPIC,
2409   TLS_LE32,
2410   TLS_DESCSEQ   /* GNU scheme */
2411 };
2412
2413 /* The maximum number of insns to be used when loading a constant.  */
2414 inline static int
2415 arm_constant_limit (bool size_p)
2416 {
2417   return size_p ? 1 : current_tune->constant_limit;
2418 }
2419
2420 /* Emit an insn that's a simple single-set.  Both the operands must be known
2421    to be valid.  */
2422 inline static rtx_insn *
2423 emit_set_insn (rtx x, rtx y)
2424 {
2425   return emit_insn (gen_rtx_SET (x, y));
2426 }
2427
2428 /* Return the number of bits set in VALUE.  */
2429 static unsigned
2430 bit_count (unsigned long value)
2431 {
2432   unsigned long count = 0;
2433
2434   while (value)
2435     {
2436       count++;
2437       value &= value - 1;  /* Clear the least-significant set bit.  */
2438     }
2439
2440   return count;
2441 }
2442
2443 /* Return the number of bits set in BMAP.  */
2444 static unsigned
2445 bitmap_popcount (const sbitmap bmap)
2446 {
2447   unsigned int count = 0;
2448   unsigned int n = 0;
2449   sbitmap_iterator sbi;
2450
2451   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2452     count++;
2453   return count;
2454 }
2455
2456 typedef struct
2457 {
2458   machine_mode mode;
2459   const char *name;
2460 } arm_fixed_mode_set;
2461
2462 /* A small helper for setting fixed-point library libfuncs.  */
2463
2464 static void
2465 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2466                              const char *funcname, const char *modename,
2467                              int num_suffix)
2468 {
2469   char buffer[50];
2470
2471   if (num_suffix == 0)
2472     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2473   else
2474     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2475
2476   set_optab_libfunc (optable, mode, buffer);
2477 }
2478
2479 static void
2480 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2481                             machine_mode from, const char *funcname,
2482                             const char *toname, const char *fromname)
2483 {
2484   char buffer[50];
2485   const char *maybe_suffix_2 = "";
2486
2487   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2488   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2489       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2490       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2491     maybe_suffix_2 = "2";
2492
2493   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2494            maybe_suffix_2);
2495
2496   set_conv_libfunc (optable, to, from, buffer);
2497 }
2498
2499 static GTY(()) rtx speculation_barrier_libfunc;
2500
2501 /* Record that we have no arithmetic or comparison libfuncs for
2502    machine mode MODE.  */
2503
2504 static void
2505 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2506 {
2507   /* Arithmetic.  */
2508   set_optab_libfunc (add_optab, mode, NULL);
2509   set_optab_libfunc (sdiv_optab, mode, NULL);
2510   set_optab_libfunc (smul_optab, mode, NULL);
2511   set_optab_libfunc (neg_optab, mode, NULL);
2512   set_optab_libfunc (sub_optab, mode, NULL);
2513
2514   /* Comparisons.  */
2515   set_optab_libfunc (eq_optab, mode, NULL);
2516   set_optab_libfunc (ne_optab, mode, NULL);
2517   set_optab_libfunc (lt_optab, mode, NULL);
2518   set_optab_libfunc (le_optab, mode, NULL);
2519   set_optab_libfunc (ge_optab, mode, NULL);
2520   set_optab_libfunc (gt_optab, mode, NULL);
2521   set_optab_libfunc (unord_optab, mode, NULL);
2522 }
2523
2524 /* Set up library functions unique to ARM.  */
2525 static void
2526 arm_init_libfuncs (void)
2527 {
2528   machine_mode mode_iter;
2529
2530   /* For Linux, we have access to kernel support for atomic operations.  */
2531   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2532     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2533
2534   /* There are no special library functions unless we are using the
2535      ARM BPABI.  */
2536   if (!TARGET_BPABI)
2537     return;
2538
2539   /* The functions below are described in Section 4 of the "Run-Time
2540      ABI for the ARM architecture", Version 1.0.  */
2541
2542   /* Double-precision floating-point arithmetic.  Table 2.  */
2543   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2544   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2545   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2546   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2547   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2548
2549   /* Double-precision comparisons.  Table 3.  */
2550   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2551   set_optab_libfunc (ne_optab, DFmode, NULL);
2552   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2553   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2554   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2555   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2556   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2557
2558   /* Single-precision floating-point arithmetic.  Table 4.  */
2559   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2560   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2561   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2562   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2563   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2564
2565   /* Single-precision comparisons.  Table 5.  */
2566   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2567   set_optab_libfunc (ne_optab, SFmode, NULL);
2568   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2569   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2570   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2571   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2572   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2573
2574   /* Floating-point to integer conversions.  Table 6.  */
2575   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2576   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2577   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2578   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2579   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2580   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2581   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2582   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2583
2584   /* Conversions between floating types.  Table 7.  */
2585   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2586   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2587
2588   /* Integer to floating-point conversions.  Table 8.  */
2589   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2590   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2591   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2592   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2593   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2594   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2595   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2596   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2597
2598   /* Long long.  Table 9.  */
2599   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2600   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2601   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2602   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2603   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2604   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2605   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2606   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2607
2608   /* Integer (32/32->32) division.  \S 4.3.1.  */
2609   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2610   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2611
2612   /* The divmod functions are designed so that they can be used for
2613      plain division, even though they return both the quotient and the
2614      remainder.  The quotient is returned in the usual location (i.e.,
2615      r0 for SImode, {r0, r1} for DImode), just as would be expected
2616      for an ordinary division routine.  Because the AAPCS calling
2617      conventions specify that all of { r0, r1, r2, r3 } are
2618      callee-saved registers, there is no need to tell the compiler
2619      explicitly that those registers are clobbered by these
2620      routines.  */
2621   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2622   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2623
2624   /* For SImode division the ABI provides div-without-mod routines,
2625      which are faster.  */
2626   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2627   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2628
2629   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2630      divmod libcalls instead.  */
2631   set_optab_libfunc (smod_optab, DImode, NULL);
2632   set_optab_libfunc (umod_optab, DImode, NULL);
2633   set_optab_libfunc (smod_optab, SImode, NULL);
2634   set_optab_libfunc (umod_optab, SImode, NULL);
2635
2636   /* Half-precision float operations.  The compiler handles all operations
2637      with NULL libfuncs by converting the SFmode.  */
2638   switch (arm_fp16_format)
2639     {
2640     case ARM_FP16_FORMAT_IEEE:
2641     case ARM_FP16_FORMAT_ALTERNATIVE:
2642
2643       /* Conversions.  */
2644       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2645                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2646                          ? "__gnu_f2h_ieee"
2647                          : "__gnu_f2h_alternative"));
2648       set_conv_libfunc (sext_optab, SFmode, HFmode,
2649                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2650                          ? "__gnu_h2f_ieee"
2651                          : "__gnu_h2f_alternative"));
2652
2653       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2654                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2655                          ? "__gnu_d2h_ieee"
2656                          : "__gnu_d2h_alternative"));
2657
2658       arm_block_arith_comp_libfuncs_for_mode (HFmode);
2659       break;
2660
2661     default:
2662       break;
2663     }
2664
2665   /* For all possible libcalls in BFmode, record NULL.  */
2666   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2667     {
2668       set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2669       set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2670       set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2671       set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2672     }
2673   arm_block_arith_comp_libfuncs_for_mode (BFmode);
2674
2675   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2676   {
2677     const arm_fixed_mode_set fixed_arith_modes[] =
2678       {
2679         { E_QQmode, "qq" },
2680         { E_UQQmode, "uqq" },
2681         { E_HQmode, "hq" },
2682         { E_UHQmode, "uhq" },
2683         { E_SQmode, "sq" },
2684         { E_USQmode, "usq" },
2685         { E_DQmode, "dq" },
2686         { E_UDQmode, "udq" },
2687         { E_TQmode, "tq" },
2688         { E_UTQmode, "utq" },
2689         { E_HAmode, "ha" },
2690         { E_UHAmode, "uha" },
2691         { E_SAmode, "sa" },
2692         { E_USAmode, "usa" },
2693         { E_DAmode, "da" },
2694         { E_UDAmode, "uda" },
2695         { E_TAmode, "ta" },
2696         { E_UTAmode, "uta" }
2697       };
2698     const arm_fixed_mode_set fixed_conv_modes[] =
2699       {
2700         { E_QQmode, "qq" },
2701         { E_UQQmode, "uqq" },
2702         { E_HQmode, "hq" },
2703         { E_UHQmode, "uhq" },
2704         { E_SQmode, "sq" },
2705         { E_USQmode, "usq" },
2706         { E_DQmode, "dq" },
2707         { E_UDQmode, "udq" },
2708         { E_TQmode, "tq" },
2709         { E_UTQmode, "utq" },
2710         { E_HAmode, "ha" },
2711         { E_UHAmode, "uha" },
2712         { E_SAmode, "sa" },
2713         { E_USAmode, "usa" },
2714         { E_DAmode, "da" },
2715         { E_UDAmode, "uda" },
2716         { E_TAmode, "ta" },
2717         { E_UTAmode, "uta" },
2718         { E_QImode, "qi" },
2719         { E_HImode, "hi" },
2720         { E_SImode, "si" },
2721         { E_DImode, "di" },
2722         { E_TImode, "ti" },
2723         { E_SFmode, "sf" },
2724         { E_DFmode, "df" }
2725       };
2726     unsigned int i, j;
2727
2728     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2729       {
2730         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2731                                      "add", fixed_arith_modes[i].name, 3);
2732         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2733                                      "ssadd", fixed_arith_modes[i].name, 3);
2734         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2735                                      "usadd", fixed_arith_modes[i].name, 3);
2736         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2737                                      "sub", fixed_arith_modes[i].name, 3);
2738         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2739                                      "sssub", fixed_arith_modes[i].name, 3);
2740         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2741                                      "ussub", fixed_arith_modes[i].name, 3);
2742         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2743                                      "mul", fixed_arith_modes[i].name, 3);
2744         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2745                                      "ssmul", fixed_arith_modes[i].name, 3);
2746         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2747                                      "usmul", fixed_arith_modes[i].name, 3);
2748         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2749                                      "div", fixed_arith_modes[i].name, 3);
2750         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2751                                      "udiv", fixed_arith_modes[i].name, 3);
2752         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2753                                      "ssdiv", fixed_arith_modes[i].name, 3);
2754         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2755                                      "usdiv", fixed_arith_modes[i].name, 3);
2756         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2757                                      "neg", fixed_arith_modes[i].name, 2);
2758         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2759                                      "ssneg", fixed_arith_modes[i].name, 2);
2760         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2761                                      "usneg", fixed_arith_modes[i].name, 2);
2762         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2763                                      "ashl", fixed_arith_modes[i].name, 3);
2764         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2765                                      "ashr", fixed_arith_modes[i].name, 3);
2766         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2767                                      "lshr", fixed_arith_modes[i].name, 3);
2768         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2769                                      "ssashl", fixed_arith_modes[i].name, 3);
2770         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2771                                      "usashl", fixed_arith_modes[i].name, 3);
2772         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2773                                      "cmp", fixed_arith_modes[i].name, 2);
2774       }
2775
2776     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2777       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2778         {
2779           if (i == j
2780               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2781                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2782             continue;
2783
2784           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2785                                       fixed_conv_modes[j].mode, "fract",
2786                                       fixed_conv_modes[i].name,
2787                                       fixed_conv_modes[j].name);
2788           arm_set_fixed_conv_libfunc (satfract_optab,
2789                                       fixed_conv_modes[i].mode,
2790                                       fixed_conv_modes[j].mode, "satfract",
2791                                       fixed_conv_modes[i].name,
2792                                       fixed_conv_modes[j].name);
2793           arm_set_fixed_conv_libfunc (fractuns_optab,
2794                                       fixed_conv_modes[i].mode,
2795                                       fixed_conv_modes[j].mode, "fractuns",
2796                                       fixed_conv_modes[i].name,
2797                                       fixed_conv_modes[j].name);
2798           arm_set_fixed_conv_libfunc (satfractuns_optab,
2799                                       fixed_conv_modes[i].mode,
2800                                       fixed_conv_modes[j].mode, "satfractuns",
2801                                       fixed_conv_modes[i].name,
2802                                       fixed_conv_modes[j].name);
2803         }
2804   }
2805
2806   if (TARGET_AAPCS_BASED)
2807     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2808
2809   speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2810 }
2811
2812 /* On AAPCS systems, this is the "struct __va_list".  */
2813 static GTY(()) tree va_list_type;
2814
2815 /* Return the type to use as __builtin_va_list.  */
2816 static tree
2817 arm_build_builtin_va_list (void)
2818 {
2819   tree va_list_name;
2820   tree ap_field;
2821
2822   if (!TARGET_AAPCS_BASED)
2823     return std_build_builtin_va_list ();
2824
2825   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2826      defined as:
2827
2828        struct __va_list
2829        {
2830          void *__ap;
2831        };
2832
2833      The C Library ABI further reinforces this definition in \S
2834      4.1.
2835
2836      We must follow this definition exactly.  The structure tag
2837      name is visible in C++ mangled names, and thus forms a part
2838      of the ABI.  The field name may be used by people who
2839      #include <stdarg.h>.  */
2840   /* Create the type.  */
2841   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2842   /* Give it the required name.  */
2843   va_list_name = build_decl (BUILTINS_LOCATION,
2844                              TYPE_DECL,
2845                              get_identifier ("__va_list"),
2846                              va_list_type);
2847   DECL_ARTIFICIAL (va_list_name) = 1;
2848   TYPE_NAME (va_list_type) = va_list_name;
2849   TYPE_STUB_DECL (va_list_type) = va_list_name;
2850   /* Create the __ap field.  */
2851   ap_field = build_decl (BUILTINS_LOCATION,
2852                          FIELD_DECL,
2853                          get_identifier ("__ap"),
2854                          ptr_type_node);
2855   DECL_ARTIFICIAL (ap_field) = 1;
2856   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2857   TYPE_FIELDS (va_list_type) = ap_field;
2858   /* Compute its layout.  */
2859   layout_type (va_list_type);
2860
2861   return va_list_type;
2862 }
2863
2864 /* Return an expression of type "void *" pointing to the next
2865    available argument in a variable-argument list.  VALIST is the
2866    user-level va_list object, of type __builtin_va_list.  */
2867 static tree
2868 arm_extract_valist_ptr (tree valist)
2869 {
2870   if (TREE_TYPE (valist) == error_mark_node)
2871     return error_mark_node;
2872
2873   /* On an AAPCS target, the pointer is stored within "struct
2874      va_list".  */
2875   if (TARGET_AAPCS_BASED)
2876     {
2877       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2878       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2879                        valist, ap_field, NULL_TREE);
2880     }
2881
2882   return valist;
2883 }
2884
2885 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2886 static void
2887 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2888 {
2889   valist = arm_extract_valist_ptr (valist);
2890   std_expand_builtin_va_start (valist, nextarg);
2891 }
2892
2893 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2894 static tree
2895 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2896                           gimple_seq *post_p)
2897 {
2898   valist = arm_extract_valist_ptr (valist);
2899   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2900 }
2901
2902 /* Check any incompatible options that the user has specified.  */
2903 static void
2904 arm_option_check_internal (struct gcc_options *opts)
2905 {
2906   int flags = opts->x_target_flags;
2907
2908   /* iWMMXt and NEON are incompatible.  */
2909   if (TARGET_IWMMXT
2910       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2911     error ("iWMMXt and NEON are incompatible");
2912
2913   /* Make sure that the processor choice does not conflict with any of the
2914      other command line choices.  */
2915   if (TARGET_ARM_P (flags)
2916       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2917     error ("target CPU does not support ARM mode");
2918
2919   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2920   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2921     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2922
2923   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2924     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2925
2926   /* If this target is normally configured to use APCS frames, warn if they
2927      are turned off and debugging is turned on.  */
2928   if (TARGET_ARM_P (flags)
2929       && write_symbols != NO_DEBUG
2930       && !TARGET_APCS_FRAME
2931       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2932     warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2933              "debugging");
2934
2935   /* iWMMXt unsupported under Thumb mode.  */
2936   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2937     error ("iWMMXt unsupported under Thumb mode");
2938
2939   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2940     error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2941
2942   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2943     {
2944       error ("RTP PIC is incompatible with Thumb");
2945       flag_pic = 0;
2946     }
2947
2948   if (target_pure_code || target_slow_flash_data)
2949     {
2950       const char *flag = (target_pure_code ? "-mpure-code" :
2951                                              "-mslow-flash-data");
2952       bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
2953
2954       /* We only support -mslow-flash-data on M-profile targets with
2955          MOVT.  */
2956       if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
2957         error ("%s only supports non-pic code on M-profile targets with the "
2958                "MOVT instruction", flag);
2959
2960       /* We only support -mpure-code on M-profile targets.  */
2961       if (target_pure_code && common_unsupported_modes)
2962         error ("%s only supports non-pic code on M-profile targets", flag);
2963
2964       /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2965          -mword-relocations forbids relocation of MOVT/MOVW.  */
2966       if (target_word_relocations)
2967         error ("%s incompatible with %<-mword-relocations%>", flag);
2968     }
2969 }
2970
2971 /* Recompute the global settings depending on target attribute options.  */
2972
2973 static void
2974 arm_option_params_internal (void)
2975 {
2976   /* If we are not using the default (ARM mode) section anchor offset
2977      ranges, then set the correct ranges now.  */
2978   if (TARGET_THUMB1)
2979     {
2980       /* Thumb-1 LDR instructions cannot have negative offsets.
2981          Permissible positive offset ranges are 5-bit (for byte loads),
2982          6-bit (for halfword loads), or 7-bit (for word loads).
2983          Empirical results suggest a 7-bit anchor range gives the best
2984          overall code size.  */
2985       targetm.min_anchor_offset = 0;
2986       targetm.max_anchor_offset = 127;
2987     }
2988   else if (TARGET_THUMB2)
2989     {
2990       /* The minimum is set such that the total size of the block
2991          for a particular anchor is 248 + 1 + 4095 bytes, which is
2992          divisible by eight, ensuring natural spacing of anchors.  */
2993       targetm.min_anchor_offset = -248;
2994       targetm.max_anchor_offset = 4095;
2995     }
2996   else
2997     {
2998       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2999       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3000     }
3001
3002   /* Increase the number of conditional instructions with -Os.  */
3003   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3004
3005   /* For THUMB2, we limit the conditional sequence to one IT block.  */
3006   if (TARGET_THUMB2)
3007     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3008
3009   if (TARGET_THUMB1)
3010     targetm.md_asm_adjust = thumb1_md_asm_adjust;
3011   else
3012     targetm.md_asm_adjust = arm_md_asm_adjust;
3013 }
3014
3015 /* True if -mflip-thumb should next add an attribute for the default
3016    mode, false if it should next add an attribute for the opposite mode.  */
3017 static GTY(()) bool thumb_flipper;
3018
3019 /* Options after initial target override.  */
3020 static GTY(()) tree init_optimize;
3021
3022 static void
3023 arm_override_options_after_change_1 (struct gcc_options *opts,
3024                                      struct gcc_options *opts_set)
3025 {
3026   /* -falign-functions without argument: supply one.  */
3027   if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3028     opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3029       && opts->x_optimize_size ? "2" : "4";
3030 }
3031
3032 /* Implement targetm.override_options_after_change.  */
3033
3034 static void
3035 arm_override_options_after_change (void)
3036 {
3037   arm_override_options_after_change_1 (&global_options, &global_options_set);
3038 }
3039
3040 /* Implement TARGET_OPTION_RESTORE.  */
3041 static void
3042 arm_option_restore (struct gcc_options */* opts */,
3043                     struct gcc_options *opts_set, struct cl_target_option *ptr)
3044 {
3045   arm_configure_build_target (&arm_active_target, ptr, opts_set, false);
3046 }
3047
3048 /* Reset options between modes that the user has specified.  */
3049 static void
3050 arm_option_override_internal (struct gcc_options *opts,
3051                               struct gcc_options *opts_set)
3052 {
3053   arm_override_options_after_change_1 (opts, opts_set);
3054
3055   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3056     {
3057       /* The default is to enable interworking, so this warning message would
3058          be confusing to users who have just compiled with
3059          eg, -march=armv4.  */
3060       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3061       opts->x_target_flags &= ~MASK_INTERWORK;
3062     }
3063
3064   if (TARGET_THUMB_P (opts->x_target_flags)
3065       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3066     {
3067       warning (0, "target CPU does not support THUMB instructions");
3068       opts->x_target_flags &= ~MASK_THUMB;
3069     }
3070
3071   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3072     {
3073       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3074       opts->x_target_flags &= ~MASK_APCS_FRAME;
3075     }
3076
3077   /* Callee super interworking implies thumb interworking.  Adding
3078      this to the flags here simplifies the logic elsewhere.  */
3079   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3080     opts->x_target_flags |= MASK_INTERWORK;
3081
3082   /* need to remember initial values so combinaisons of options like
3083      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3084   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3085
3086   if (! opts_set->x_arm_restrict_it)
3087     opts->x_arm_restrict_it = arm_arch8;
3088
3089   /* ARM execution state and M profile don't have [restrict] IT.  */
3090   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3091     opts->x_arm_restrict_it = 0;
3092
3093   /* Use the IT size from CPU specific tuning unless -mrestrict-it is used.  */
3094   if (!opts_set->x_arm_restrict_it
3095       && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3096     opts->x_arm_restrict_it = 0;
3097
3098   /* Enable -munaligned-access by default for
3099      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3100      i.e. Thumb2 and ARM state only.
3101      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3102      - ARMv8 architecture-base processors.
3103
3104      Disable -munaligned-access by default for
3105      - all pre-ARMv6 architecture-based processors
3106      - ARMv6-M architecture-based processors
3107      - ARMv8-M Baseline processors.  */
3108
3109   if (! opts_set->x_unaligned_access)
3110     {
3111       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3112                           && arm_arch6 && (arm_arch_notm || arm_arch7));
3113     }
3114   else if (opts->x_unaligned_access == 1
3115            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3116     {
3117       warning (0, "target CPU does not support unaligned accesses");
3118      opts->x_unaligned_access = 0;
3119     }
3120
3121   /* Don't warn since it's on by default in -O2.  */
3122   if (TARGET_THUMB1_P (opts->x_target_flags))
3123     opts->x_flag_schedule_insns = 0;
3124   else
3125     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3126
3127   /* Disable shrink-wrap when optimizing function for size, since it tends to
3128      generate additional returns.  */
3129   if (optimize_function_for_size_p (cfun)
3130       && TARGET_THUMB2_P (opts->x_target_flags))
3131     opts->x_flag_shrink_wrap = false;
3132   else
3133     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3134
3135   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3136      - epilogue_insns - does not accurately model the corresponding insns
3137      emitted in the asm file.  In particular, see the comment in thumb_exit
3138      'Find out how many of the (return) argument registers we can corrupt'.
3139      As a consequence, the epilogue may clobber registers without fipa-ra
3140      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3141      TODO: Accurately model clobbers for epilogue_insns and reenable
3142      fipa-ra.  */
3143   if (TARGET_THUMB1_P (opts->x_target_flags))
3144     opts->x_flag_ipa_ra = 0;
3145   else
3146     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3147
3148   /* Thumb2 inline assembly code should always use unified syntax.
3149      This will apply to ARM and Thumb1 eventually.  */
3150   if (TARGET_THUMB2_P (opts->x_target_flags))
3151     opts->x_inline_asm_unified = true;
3152
3153 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3154   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3155 #endif
3156 }
3157
3158 static sbitmap isa_all_fpubits_internal;
3159 static sbitmap isa_all_fpbits;
3160 static sbitmap isa_quirkbits;
3161
3162 /* Configure a build target TARGET from the user-specified options OPTS and
3163    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3164    architecture have been specified, but the two are not identical.  */
3165 void
3166 arm_configure_build_target (struct arm_build_target *target,
3167                             struct cl_target_option *opts,
3168                             struct gcc_options *opts_set,
3169                             bool warn_compatible)
3170 {
3171   const cpu_option *arm_selected_tune = NULL;
3172   const arch_option *arm_selected_arch = NULL;
3173   const cpu_option *arm_selected_cpu = NULL;
3174   const arm_fpu_desc *arm_selected_fpu = NULL;
3175   const char *tune_opts = NULL;
3176   const char *arch_opts = NULL;
3177   const char *cpu_opts = NULL;
3178
3179   bitmap_clear (target->isa);
3180   target->core_name = NULL;
3181   target->arch_name = NULL;
3182
3183   if (opts_set->x_arm_arch_string)
3184     {
3185       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3186                                                       "-march",
3187                                                       opts->x_arm_arch_string);
3188       arch_opts = strchr (opts->x_arm_arch_string, '+');
3189     }
3190
3191   if (opts_set->x_arm_cpu_string)
3192     {
3193       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3194                                                     opts->x_arm_cpu_string);
3195       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3196       arm_selected_tune = arm_selected_cpu;
3197       /* If taking the tuning from -mcpu, we don't need to rescan the
3198          options for tuning.  */
3199     }
3200
3201   if (opts_set->x_arm_tune_string)
3202     {
3203       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3204                                                      opts->x_arm_tune_string);
3205       tune_opts = strchr (opts->x_arm_tune_string, '+');
3206     }
3207
3208   if (arm_selected_arch)
3209     {
3210       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3211       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3212                                  arch_opts);
3213
3214       if (arm_selected_cpu)
3215         {
3216           auto_sbitmap cpu_isa (isa_num_bits);
3217           auto_sbitmap isa_delta (isa_num_bits);
3218
3219           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3220           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3221                                      cpu_opts);
3222           bitmap_xor (isa_delta, cpu_isa, target->isa);
3223           /* Ignore any bits that are quirk bits.  */
3224           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3225           /* Ignore (for now) any bits that might be set by -mfpu.  */
3226           bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits_internal);
3227
3228           /* And if the target ISA lacks floating point, ignore any
3229              extensions that depend on that.  */
3230           if (!bitmap_bit_p (target->isa, isa_bit_vfpv2))
3231             bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3232
3233           if (!bitmap_empty_p (isa_delta))
3234             {
3235               if (warn_compatible)
3236                 warning (0, "switch %<-mcpu=%s%> conflicts "
3237                          "with %<-march=%s%> switch",
3238                          arm_selected_cpu->common.name,
3239                          arm_selected_arch->common.name);
3240               /* -march wins for code generation.
3241                  -mcpu wins for default tuning.  */
3242               if (!arm_selected_tune)
3243                 arm_selected_tune = arm_selected_cpu;
3244
3245               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3246               target->arch_name = arm_selected_arch->common.name;
3247             }
3248           else
3249             {
3250               /* Architecture and CPU are essentially the same.
3251                  Prefer the CPU setting.  */
3252               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3253               target->core_name = arm_selected_cpu->common.name;
3254               /* Copy the CPU's capabilities, so that we inherit the
3255                  appropriate extensions and quirks.  */
3256               bitmap_copy (target->isa, cpu_isa);
3257             }
3258         }
3259       else
3260         {
3261           /* Pick a CPU based on the architecture.  */
3262           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3263           target->arch_name = arm_selected_arch->common.name;
3264           /* Note: target->core_name is left unset in this path.  */
3265         }
3266     }
3267   else if (arm_selected_cpu)
3268     {
3269       target->core_name = arm_selected_cpu->common.name;
3270       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3271       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3272                                  cpu_opts);
3273       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3274     }
3275   /* If the user did not specify a processor or architecture, choose
3276      one for them.  */
3277   else
3278     {
3279       const cpu_option *sel;
3280       auto_sbitmap sought_isa (isa_num_bits);
3281       bitmap_clear (sought_isa);
3282       auto_sbitmap default_isa (isa_num_bits);
3283
3284       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3285                                                     TARGET_CPU_DEFAULT);
3286       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3287       gcc_assert (arm_selected_cpu->common.name);
3288
3289       /* RWE: All of the selection logic below (to the end of this
3290          'if' clause) looks somewhat suspect.  It appears to be mostly
3291          there to support forcing thumb support when the default CPU
3292          does not have thumb (somewhat dubious in terms of what the
3293          user might be expecting).  I think it should be removed once
3294          support for the pre-thumb era cores is removed.  */
3295       sel = arm_selected_cpu;
3296       arm_initialize_isa (default_isa, sel->common.isa_bits);
3297       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3298                                  cpu_opts);
3299
3300       /* Now check to see if the user has specified any command line
3301          switches that require certain abilities from the cpu.  */
3302
3303       if (TARGET_INTERWORK || TARGET_THUMB)
3304         bitmap_set_bit (sought_isa, isa_bit_thumb);
3305
3306       /* If there are such requirements and the default CPU does not
3307          satisfy them, we need to run over the complete list of
3308          cores looking for one that is satisfactory.  */
3309       if (!bitmap_empty_p (sought_isa)
3310           && !bitmap_subset_p (sought_isa, default_isa))
3311         {
3312           auto_sbitmap candidate_isa (isa_num_bits);
3313           /* We're only interested in a CPU with at least the
3314              capabilities of the default CPU and the required
3315              additional features.  */
3316           bitmap_ior (default_isa, default_isa, sought_isa);
3317
3318           /* Try to locate a CPU type that supports all of the abilities
3319              of the default CPU, plus the extra abilities requested by
3320              the user.  */
3321           for (sel = all_cores; sel->common.name != NULL; sel++)
3322             {
3323               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3324               /* An exact match?  */
3325               if (bitmap_equal_p (default_isa, candidate_isa))
3326                 break;
3327             }
3328
3329           if (sel->common.name == NULL)
3330             {
3331               unsigned current_bit_count = isa_num_bits;
3332               const cpu_option *best_fit = NULL;
3333
3334               /* Ideally we would like to issue an error message here
3335                  saying that it was not possible to find a CPU compatible
3336                  with the default CPU, but which also supports the command
3337                  line options specified by the programmer, and so they
3338                  ought to use the -mcpu=<name> command line option to
3339                  override the default CPU type.
3340
3341                  If we cannot find a CPU that has exactly the
3342                  characteristics of the default CPU and the given
3343                  command line options we scan the array again looking
3344                  for a best match.  The best match must have at least
3345                  the capabilities of the perfect match.  */
3346               for (sel = all_cores; sel->common.name != NULL; sel++)
3347                 {
3348                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3349
3350                   if (bitmap_subset_p (default_isa, candidate_isa))
3351                     {
3352                       unsigned count;
3353
3354                       bitmap_and_compl (candidate_isa, candidate_isa,
3355                                         default_isa);
3356                       count = bitmap_popcount (candidate_isa);
3357
3358                       if (count < current_bit_count)
3359                         {
3360                           best_fit = sel;
3361                           current_bit_count = count;
3362                         }
3363                     }
3364
3365                   gcc_assert (best_fit);
3366                   sel = best_fit;
3367                 }
3368             }
3369           arm_selected_cpu = sel;
3370         }
3371
3372       /* Now we know the CPU, we can finally initialize the target
3373          structure.  */
3374       target->core_name = arm_selected_cpu->common.name;
3375       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3376       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3377                                  cpu_opts);
3378       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3379     }
3380
3381   gcc_assert (arm_selected_cpu);
3382   gcc_assert (arm_selected_arch);
3383
3384   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3385     {
3386       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3387       auto_sbitmap fpu_bits (isa_num_bits);
3388
3389       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3390       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3391       bitmap_ior (target->isa, target->isa, fpu_bits);
3392     }
3393
3394   /* There may be implied bits which we still need to enable. These are
3395      non-named features which are needed to complete other sets of features,
3396      but cannot be enabled from arm-cpus.in due to being shared between
3397      multiple fgroups. Each entry in all_implied_fbits is of the form
3398      ante -> cons, meaning that if the feature "ante" is enabled, we should
3399      implicitly enable "cons".  */
3400   const struct fbit_implication *impl = all_implied_fbits;
3401   while (impl->ante)
3402     {
3403       if (bitmap_bit_p (target->isa, impl->ante))
3404         bitmap_set_bit (target->isa, impl->cons);
3405       impl++;
3406     }
3407
3408   if (!arm_selected_tune)
3409     arm_selected_tune = arm_selected_cpu;
3410   else /* Validate the features passed to -mtune.  */
3411     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3412
3413   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3414
3415   /* Finish initializing the target structure.  */
3416   target->arch_pp_name = arm_selected_arch->arch;
3417   target->base_arch = arm_selected_arch->base_arch;
3418   target->profile = arm_selected_arch->profile;
3419
3420   target->tune_flags = tune_data->tune_flags;
3421   target->tune = tune_data->tune;
3422   target->tune_core = tune_data->scheduler;
3423   arm_option_reconfigure_globals ();
3424 }
3425
3426 /* Fix up any incompatible options that the user has specified.  */
3427 static void
3428 arm_option_override (void)
3429 {
3430   static const enum isa_feature fpu_bitlist_internal[]
3431     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3432   static const enum isa_feature fp_bitlist[]
3433     = { ISA_ALL_FP, isa_nobit };
3434   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3435   cl_target_option opts;
3436
3437   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3438   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3439
3440   isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3441   isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3442   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3443   arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3444
3445   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3446
3447   if (!global_options_set.x_arm_fpu_index)
3448     {
3449       bool ok;
3450       int fpu_index;
3451
3452       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3453                                   CL_TARGET);
3454       gcc_assert (ok);
3455       arm_fpu_index = (enum fpu_type) fpu_index;
3456     }
3457
3458   cl_target_option_save (&opts, &global_options, &global_options_set);
3459   arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3460                               true);
3461
3462 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3463   SUBTARGET_OVERRIDE_OPTIONS;
3464 #endif
3465
3466   /* Initialize boolean versions of the architectural flags, for use
3467      in the arm.md file and for enabling feature flags.  */
3468   arm_option_reconfigure_globals ();
3469
3470   arm_tune = arm_active_target.tune_core;
3471   tune_flags = arm_active_target.tune_flags;
3472   current_tune = arm_active_target.tune;
3473
3474   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3475   if (TARGET_APCS_FRAME)
3476     flag_shrink_wrap = false;
3477
3478   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3479     {
3480       warning (0, "%<-mapcs-stack-check%> incompatible with "
3481                "%<-mno-apcs-frame%>");
3482       target_flags |= MASK_APCS_FRAME;
3483     }
3484
3485   if (TARGET_POKE_FUNCTION_NAME)
3486     target_flags |= MASK_APCS_FRAME;
3487
3488   if (TARGET_APCS_REENT && flag_pic)
3489     error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3490
3491   if (TARGET_APCS_REENT)
3492     warning (0, "APCS reentrant code not supported.  Ignored");
3493
3494   /* Set up some tuning parameters.  */
3495   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3496   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3497   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3498   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3499   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3500   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3501
3502   /* For arm2/3 there is no need to do any scheduling if we are doing
3503      software floating-point.  */
3504   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3505     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3506
3507   /* Override the default structure alignment for AAPCS ABI.  */
3508   if (!global_options_set.x_arm_structure_size_boundary)
3509     {
3510       if (TARGET_AAPCS_BASED)
3511         arm_structure_size_boundary = 8;
3512     }
3513   else
3514     {
3515       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3516
3517       if (arm_structure_size_boundary != 8
3518           && arm_structure_size_boundary != 32
3519           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3520         {
3521           if (ARM_DOUBLEWORD_ALIGN)
3522             warning (0,
3523                      "structure size boundary can only be set to 8, 32 or 64");
3524           else
3525             warning (0, "structure size boundary can only be set to 8 or 32");
3526           arm_structure_size_boundary
3527             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3528         }
3529     }
3530
3531   if (TARGET_VXWORKS_RTP)
3532     {
3533       if (!global_options_set.x_arm_pic_data_is_text_relative)
3534         arm_pic_data_is_text_relative = 0;
3535     }
3536   else if (flag_pic
3537            && !arm_pic_data_is_text_relative
3538            && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3539     /* When text & data segments don't have a fixed displacement, the
3540        intended use is with a single, read only, pic base register.
3541        Unless the user explicitly requested not to do that, set
3542        it.  */
3543     target_flags |= MASK_SINGLE_PIC_BASE;
3544
3545   /* If stack checking is disabled, we can use r10 as the PIC register,
3546      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3547   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3548     {
3549       if (TARGET_VXWORKS_RTP)
3550         warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3551       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3552     }
3553
3554   if (flag_pic && TARGET_VXWORKS_RTP)
3555     arm_pic_register = 9;
3556
3557   /* If in FDPIC mode then force arm_pic_register to be r9.  */
3558   if (TARGET_FDPIC)
3559     {
3560       arm_pic_register = FDPIC_REGNUM;
3561       if (TARGET_THUMB1)
3562         sorry ("FDPIC mode is not supported in Thumb-1 mode");
3563     }
3564
3565   if (arm_pic_register_string != NULL)
3566     {
3567       int pic_register = decode_reg_name (arm_pic_register_string);
3568
3569       if (!flag_pic)
3570         warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3571
3572       /* Prevent the user from choosing an obviously stupid PIC register.  */
3573       else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3574                || pic_register == HARD_FRAME_POINTER_REGNUM
3575                || pic_register == STACK_POINTER_REGNUM
3576                || pic_register >= PC_REGNUM
3577                || (TARGET_VXWORKS_RTP
3578                    && (unsigned int) pic_register != arm_pic_register))
3579         error ("unable to use %qs for PIC register", arm_pic_register_string);
3580       else
3581         arm_pic_register = pic_register;
3582     }
3583
3584   if (flag_pic)
3585     target_word_relocations = 1;
3586
3587   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3588   if (fix_cm3_ldrd == 2)
3589     {
3590       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3591         fix_cm3_ldrd = 1;
3592       else
3593         fix_cm3_ldrd = 0;
3594     }
3595
3596   /* Hot/Cold partitioning is not currently supported, since we can't
3597      handle literal pool placement in that case.  */
3598   if (flag_reorder_blocks_and_partition)
3599     {
3600       inform (input_location,
3601               "%<-freorder-blocks-and-partition%> not supported "
3602               "on this architecture");
3603       flag_reorder_blocks_and_partition = 0;
3604       flag_reorder_blocks = 1;
3605     }
3606
3607   if (flag_pic)
3608     /* Hoisting PIC address calculations more aggressively provides a small,
3609        but measurable, size reduction for PIC code.  Therefore, we decrease
3610        the bar for unrestricted expression hoisting to the cost of PIC address
3611        calculation, which is 2 instructions.  */
3612     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3613                          param_gcse_unrestricted_cost, 2);
3614
3615   /* ARM EABI defaults to strict volatile bitfields.  */
3616   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3617       && abi_version_at_least(2))
3618     flag_strict_volatile_bitfields = 1;
3619
3620   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3621      have deemed it beneficial (signified by setting
3622      prefetch.num_slots to 1 or more).  */
3623   if (flag_prefetch_loop_arrays < 0
3624       && HAVE_prefetch
3625       && optimize >= 3
3626       && current_tune->prefetch.num_slots > 0)
3627     flag_prefetch_loop_arrays = 1;
3628
3629   /* Set up parameters to be used in prefetching algorithm.  Do not
3630      override the defaults unless we are tuning for a core we have
3631      researched values for.  */
3632   if (current_tune->prefetch.num_slots > 0)
3633     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3634                          param_simultaneous_prefetches,
3635                          current_tune->prefetch.num_slots);
3636   if (current_tune->prefetch.l1_cache_line_size >= 0)
3637     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3638                          param_l1_cache_line_size,
3639                          current_tune->prefetch.l1_cache_line_size);
3640   if (current_tune->prefetch.l1_cache_size >= 0)
3641     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3642                          param_l1_cache_size,
3643                          current_tune->prefetch.l1_cache_size);
3644
3645   /* Look through ready list and all of queue for instructions
3646      relevant for L2 auto-prefetcher.  */
3647   int sched_autopref_queue_depth;
3648
3649   switch (current_tune->sched_autopref)
3650     {
3651     case tune_params::SCHED_AUTOPREF_OFF:
3652       sched_autopref_queue_depth = -1;
3653       break;
3654
3655     case tune_params::SCHED_AUTOPREF_RANK:
3656       sched_autopref_queue_depth = 0;
3657       break;
3658
3659     case tune_params::SCHED_AUTOPREF_FULL:
3660       sched_autopref_queue_depth = max_insn_queue_index + 1;
3661       break;
3662
3663     default:
3664       gcc_unreachable ();
3665     }
3666
3667   SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3668                        param_sched_autopref_queue_depth,
3669                        sched_autopref_queue_depth);
3670
3671   /* Currently, for slow flash data, we just disable literal pools.  We also
3672      disable it for pure-code.  */
3673   if (target_slow_flash_data || target_pure_code)
3674     arm_disable_literal_pool = true;
3675
3676   /* Disable scheduling fusion by default if it's not armv7 processor
3677      or doesn't prefer ldrd/strd.  */
3678   if (flag_schedule_fusion == 2
3679       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3680     flag_schedule_fusion = 0;
3681
3682   /* Need to remember initial options before they are overriden.  */
3683   init_optimize = build_optimization_node (&global_options,
3684                                            &global_options_set);
3685
3686   arm_options_perform_arch_sanity_checks ();
3687   arm_option_override_internal (&global_options, &global_options_set);
3688   arm_option_check_internal (&global_options);
3689   arm_option_params_internal ();
3690
3691   /* Create the default target_options structure.  */
3692   target_option_default_node = target_option_current_node
3693     = build_target_option_node (&global_options, &global_options_set);
3694
3695   /* Register global variables with the garbage collector.  */
3696   arm_add_gc_roots ();
3697
3698   /* Init initial mode for testing.  */
3699   thumb_flipper = TARGET_THUMB;
3700 }
3701
3702
3703 /* Reconfigure global status flags from the active_target.isa.  */
3704 void
3705 arm_option_reconfigure_globals (void)
3706 {
3707   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3708   arm_base_arch = arm_active_target.base_arch;
3709
3710   /* Initialize boolean versions of the architectural flags, for use
3711      in the arm.md file.  */
3712   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3713   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3714   arm_arch5t =  bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3715   arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3716   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3717   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3718   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3719   arm_arch6m = arm_arch6 && !arm_arch_notm;
3720   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3721   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3722   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3723   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3724   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3725   arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3726   arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3727   arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3728                                     isa_bit_armv8_1m_main);
3729   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3730   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3731   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3732   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3733   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3734   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3735   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3736   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3737   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3738   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3739   arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3740   arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3741
3742   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3743   if (arm_fp16_inst)
3744     {
3745       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3746         error ("selected fp16 options are incompatible");
3747       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3748     }
3749
3750   arm_arch_cde = 0;
3751   arm_arch_cde_coproc = 0;
3752   int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3753                     isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3754                     isa_bit_cdecp6, isa_bit_cdecp7};
3755   for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3756     {
3757       int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3758       if (cde_bit)
3759         {
3760           arm_arch_cde |= cde_bit;
3761           arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3762         }
3763     }
3764
3765   /* And finally, set up some quirks.  */
3766   arm_arch_no_volatile_ce
3767     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3768   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3769                                             isa_bit_quirk_armv6kz);
3770
3771   /* Use the cp15 method if it is available.  */
3772   if (target_thread_pointer == TP_AUTO)
3773     {
3774       if (arm_arch6k && !TARGET_THUMB1)
3775         target_thread_pointer = TP_CP15;
3776       else
3777         target_thread_pointer = TP_SOFT;
3778     }
3779 }
3780
3781 /* Perform some validation between the desired architecture and the rest of the
3782    options.  */
3783 void
3784 arm_options_perform_arch_sanity_checks (void)
3785 {
3786   /* V5T code we generate is completely interworking capable, so we turn off
3787      TARGET_INTERWORK here to avoid many tests later on.  */
3788
3789   /* XXX However, we must pass the right pre-processor defines to CPP
3790      or GLD can get confused.  This is a hack.  */
3791   if (TARGET_INTERWORK)
3792     arm_cpp_interwork = 1;
3793
3794   if (arm_arch5t)
3795     target_flags &= ~MASK_INTERWORK;
3796
3797   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3798     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3799
3800   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3801     error ("iwmmxt abi requires an iwmmxt capable cpu");
3802
3803   /* BPABI targets use linker tricks to allow interworking on cores
3804      without thumb support.  */
3805   if (TARGET_INTERWORK
3806       && !TARGET_BPABI
3807       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3808     {
3809       warning (0, "target CPU does not support interworking" );
3810       target_flags &= ~MASK_INTERWORK;
3811     }
3812
3813   /* If soft-float is specified then don't use FPU.  */
3814   if (TARGET_SOFT_FLOAT)
3815     arm_fpu_attr = FPU_NONE;
3816   else
3817     arm_fpu_attr = FPU_VFP;
3818
3819   if (TARGET_AAPCS_BASED)
3820     {
3821       if (TARGET_CALLER_INTERWORKING)
3822         error ("AAPCS does not support %<-mcaller-super-interworking%>");
3823       else
3824         if (TARGET_CALLEE_INTERWORKING)
3825           error ("AAPCS does not support %<-mcallee-super-interworking%>");
3826     }
3827
3828   /* __fp16 support currently assumes the core has ldrh.  */
3829   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3830     sorry ("__fp16 and no ldrh");
3831
3832   if (use_cmse && !arm_arch_cmse)
3833     error ("target CPU does not support ARMv8-M Security Extensions");
3834
3835   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3836      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3837   if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3838     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3839
3840
3841   if (TARGET_AAPCS_BASED)
3842     {
3843       if (arm_abi == ARM_ABI_IWMMXT)
3844         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3845       else if (TARGET_HARD_FLOAT_ABI)
3846         {
3847           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3848           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
3849               && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
3850             error ("%<-mfloat-abi=hard%>: selected processor lacks an FPU");
3851         }
3852       else
3853         arm_pcs_default = ARM_PCS_AAPCS;
3854     }
3855   else
3856     {
3857       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3858         sorry ("%<-mfloat-abi=hard%> and VFP");
3859
3860       if (arm_abi == ARM_ABI_APCS)
3861         arm_pcs_default = ARM_PCS_APCS;
3862       else
3863         arm_pcs_default = ARM_PCS_ATPCS;
3864     }
3865 }
3866
3867 /* Test whether a local function descriptor is canonical, i.e.,
3868    whether we can use GOTOFFFUNCDESC to compute the address of the
3869    function.  */
3870 static bool
3871 arm_fdpic_local_funcdesc_p (rtx fnx)
3872 {
3873   tree fn;
3874   enum symbol_visibility vis;
3875   bool ret;
3876
3877   if (!TARGET_FDPIC)
3878     return true;
3879
3880   if (! SYMBOL_REF_LOCAL_P (fnx))
3881     return false;
3882
3883   fn = SYMBOL_REF_DECL (fnx);
3884
3885   if (! fn)
3886     return false;
3887
3888   vis = DECL_VISIBILITY (fn);
3889
3890   if (vis == VISIBILITY_PROTECTED)
3891     /* Private function descriptors for protected functions are not
3892        canonical.  Temporarily change the visibility to global so that
3893        we can ensure uniqueness of funcdesc pointers.  */
3894     DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
3895
3896   ret = default_binds_local_p_1 (fn, flag_pic);
3897
3898   DECL_VISIBILITY (fn) = vis;
3899
3900   return ret;
3901 }
3902
3903 static void
3904 arm_add_gc_roots (void)
3905 {
3906   gcc_obstack_init(&minipool_obstack);
3907   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3908 }
3909 \f
3910 /* A table of known ARM exception types.
3911    For use with the interrupt function attribute.  */
3912
3913 typedef struct
3914 {
3915   const char *const arg;
3916   const unsigned long return_value;
3917 }
3918 isr_attribute_arg;
3919
3920 static const isr_attribute_arg isr_attribute_args [] =
3921 {
3922   { "IRQ",   ARM_FT_ISR },
3923   { "irq",   ARM_FT_ISR },
3924   { "FIQ",   ARM_FT_FIQ },
3925   { "fiq",   ARM_FT_FIQ },
3926   { "ABORT", ARM_FT_ISR },
3927   { "abort", ARM_FT_ISR },
3928   { "UNDEF", ARM_FT_EXCEPTION },
3929   { "undef", ARM_FT_EXCEPTION },
3930   { "SWI",   ARM_FT_EXCEPTION },
3931   { "swi",   ARM_FT_EXCEPTION },
3932   { NULL,    ARM_FT_NORMAL }
3933 };
3934
3935 /* Returns the (interrupt) function type of the current
3936    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3937
3938 static unsigned long
3939 arm_isr_value (tree argument)
3940 {
3941   const isr_attribute_arg * ptr;
3942   const char *              arg;
3943
3944   if (!arm_arch_notm)
3945     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3946
3947   /* No argument - default to IRQ.  */
3948   if (argument == NULL_TREE)
3949     return ARM_FT_ISR;
3950
3951   /* Get the value of the argument.  */
3952   if (TREE_VALUE (argument) == NULL_TREE
3953       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3954     return ARM_FT_UNKNOWN;
3955
3956   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3957
3958   /* Check it against the list of known arguments.  */
3959   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3960     if (streq (arg, ptr->arg))
3961       return ptr->return_value;
3962
3963   /* An unrecognized interrupt type.  */
3964   return ARM_FT_UNKNOWN;
3965 }
3966
3967 /* Computes the type of the current function.  */
3968
3969 static unsigned long
3970 arm_compute_func_type (void)
3971 {
3972   unsigned long type = ARM_FT_UNKNOWN;
3973   tree a;
3974   tree attr;
3975
3976   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3977
3978   /* Decide if the current function is volatile.  Such functions
3979      never return, and many memory cycles can be saved by not storing
3980      register values that will never be needed again.  This optimization
3981      was added to speed up context switching in a kernel application.  */
3982   if (optimize > 0
3983       && (TREE_NOTHROW (current_function_decl)
3984           || !(flag_unwind_tables
3985                || (flag_exceptions
3986                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3987       && TREE_THIS_VOLATILE (current_function_decl))
3988     type |= ARM_FT_VOLATILE;
3989
3990   if (cfun->static_chain_decl != NULL)
3991     type |= ARM_FT_NESTED;
3992
3993   attr = DECL_ATTRIBUTES (current_function_decl);
3994
3995   a = lookup_attribute ("naked", attr);
3996   if (a != NULL_TREE)
3997     type |= ARM_FT_NAKED;
3998
3999   a = lookup_attribute ("isr", attr);
4000   if (a == NULL_TREE)
4001     a = lookup_attribute ("interrupt", attr);
4002
4003   if (a == NULL_TREE)
4004     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4005   else
4006     type |= arm_isr_value (TREE_VALUE (a));
4007
4008   if (lookup_attribute ("cmse_nonsecure_entry", attr))
4009     type |= ARM_FT_CMSE_ENTRY;
4010
4011   return type;
4012 }
4013
4014 /* Returns the type of the current function.  */
4015
4016 unsigned long
4017 arm_current_func_type (void)
4018 {
4019   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4020     cfun->machine->func_type = arm_compute_func_type ();
4021
4022   return cfun->machine->func_type;
4023 }
4024
4025 bool
4026 arm_allocate_stack_slots_for_args (void)
4027 {
4028   /* Naked functions should not allocate stack slots for arguments.  */
4029   return !IS_NAKED (arm_current_func_type ());
4030 }
4031
4032 static bool
4033 arm_warn_func_return (tree decl)
4034 {
4035   /* Naked functions are implemented entirely in assembly, including the
4036      return sequence, so suppress warnings about this.  */
4037   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4038 }
4039
4040 \f
4041 /* Output assembler code for a block containing the constant parts
4042    of a trampoline, leaving space for the variable parts.
4043
4044    On the ARM, (if r8 is the static chain regnum, and remembering that
4045    referencing pc adds an offset of 8) the trampoline looks like:
4046            ldr          r8, [pc, #0]
4047            ldr          pc, [pc]
4048            .word        static chain value
4049            .word        function's address
4050    XXX FIXME: When the trampoline returns, r8 will be clobbered.
4051
4052    In FDPIC mode, the trampoline looks like:
4053            .word        trampoline address
4054            .word        trampoline GOT address
4055            ldr          r12, [pc, #8] ; #4 for Arm mode
4056            ldr          r9,  [pc, #8] ; #4 for Arm mode
4057            ldr          pc,  [pc, #8] ; #4 for Arm mode
4058            .word        static chain value
4059            .word        GOT address
4060            .word        function's address
4061 */
4062
4063 static void
4064 arm_asm_trampoline_template (FILE *f)
4065 {
4066   fprintf (f, "\t.syntax unified\n");
4067
4068   if (TARGET_FDPIC)
4069     {
4070       /* The first two words are a function descriptor pointing to the
4071          trampoline code just below.  */
4072       if (TARGET_ARM)
4073         fprintf (f, "\t.arm\n");
4074       else if (TARGET_THUMB2)
4075         fprintf (f, "\t.thumb\n");
4076       else
4077         /* Only ARM and Thumb-2 are supported.  */
4078         gcc_unreachable ();
4079
4080       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4081       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4082       /* Trampoline code which sets the static chain register but also
4083          PIC register before jumping into real code.  */
4084       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4085                    STATIC_CHAIN_REGNUM, PC_REGNUM,
4086                    TARGET_THUMB2 ? 8 : 4);
4087       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4088                    PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4089                    TARGET_THUMB2 ? 8 : 4);
4090       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4091                    PC_REGNUM, PC_REGNUM,
4092                    TARGET_THUMB2 ? 8 : 4);
4093       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4094     }
4095   else if (TARGET_ARM)
4096     {
4097       fprintf (f, "\t.arm\n");
4098       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4099       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4100     }
4101   else if (TARGET_THUMB2)
4102     {
4103       fprintf (f, "\t.thumb\n");
4104       /* The Thumb-2 trampoline is similar to the arm implementation.
4105          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
4106       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4107                    STATIC_CHAIN_REGNUM, PC_REGNUM);
4108       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4109     }
4110   else
4111     {
4112       ASM_OUTPUT_ALIGN (f, 2);
4113       fprintf (f, "\t.code\t16\n");
4114       fprintf (f, ".Ltrampoline_start:\n");
4115       asm_fprintf (f, "\tpush\t{r0, r1}\n");
4116       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4117       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4118       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4119       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4120       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4121     }
4122   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4123   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4124 }
4125
4126 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
4127
4128 static void
4129 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4130 {
4131   rtx fnaddr, mem, a_tramp;
4132
4133   emit_block_move (m_tramp, assemble_trampoline_template (),
4134                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4135
4136   if (TARGET_FDPIC)
4137     {
4138       rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4139       rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4140       rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4141       /* The function start address is at offset 8, but in Thumb mode
4142          we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4143          below.  */
4144       rtx trampoline_code_start
4145         = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4146
4147       /* Write initial funcdesc which points to the trampoline.  */
4148       mem = adjust_address (m_tramp, SImode, 0);
4149       emit_move_insn (mem, trampoline_code_start);
4150       mem = adjust_address (m_tramp, SImode, 4);
4151       emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4152       /* Setup static chain.  */
4153       mem = adjust_address (m_tramp, SImode, 20);
4154       emit_move_insn (mem, chain_value);
4155       /* GOT + real function entry point.  */
4156       mem = adjust_address (m_tramp, SImode, 24);
4157       emit_move_insn (mem, gotaddr);
4158       mem = adjust_address (m_tramp, SImode, 28);
4159       emit_move_insn (mem, fnaddr);
4160     }
4161   else
4162     {
4163       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4164       emit_move_insn (mem, chain_value);
4165
4166       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4167       fnaddr = XEXP (DECL_RTL (fndecl), 0);
4168       emit_move_insn (mem, fnaddr);
4169     }
4170
4171   a_tramp = XEXP (m_tramp, 0);
4172   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4173                      LCT_NORMAL, VOIDmode, a_tramp, Pmode,
4174                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
4175 }
4176
4177 /* Thumb trampolines should be entered in thumb mode, so set
4178    the bottom bit of the address.  */
4179
4180 static rtx
4181 arm_trampoline_adjust_address (rtx addr)
4182 {
4183   /* For FDPIC don't fix trampoline address since it's a function
4184      descriptor and not a function address.  */
4185   if (TARGET_THUMB && !TARGET_FDPIC)
4186     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4187                                 NULL, 0, OPTAB_LIB_WIDEN);
4188   return addr;
4189 }
4190 \f
4191 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4192    includes call-clobbered registers too.  If this is a leaf function
4193    we can just examine the registers used by the RTL, but otherwise we
4194    have to assume that whatever function is called might clobber
4195    anything, and so we have to save all the call-clobbered registers
4196    as well.  */
4197 static inline bool reg_needs_saving_p (unsigned reg)
4198 {
4199   unsigned long func_type = arm_current_func_type ();
4200
4201   if (IS_INTERRUPT (func_type))
4202     if (df_regs_ever_live_p (reg)
4203         /* Save call-clobbered core registers.  */
4204         || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4205       return true;
4206     else
4207       return false;
4208   else
4209     if (!df_regs_ever_live_p (reg)
4210         || call_used_or_fixed_reg_p (reg))
4211       return false;
4212     else
4213       return true;
4214 }
4215
4216 /* Return 1 if it is possible to return using a single instruction.
4217    If SIBLING is non-null, this is a test for a return before a sibling
4218    call.  SIBLING is the call insn, so we can examine its register usage.  */
4219
4220 int
4221 use_return_insn (int iscond, rtx sibling)
4222 {
4223   int regno;
4224   unsigned int func_type;
4225   unsigned long saved_int_regs;
4226   unsigned HOST_WIDE_INT stack_adjust;
4227   arm_stack_offsets *offsets;
4228
4229   /* Never use a return instruction before reload has run.  */
4230   if (!reload_completed)
4231     return 0;
4232
4233   func_type = arm_current_func_type ();
4234
4235   /* Naked, volatile and stack alignment functions need special
4236      consideration.  */
4237   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4238     return 0;
4239
4240   /* So do interrupt functions that use the frame pointer and Thumb
4241      interrupt functions.  */
4242   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4243     return 0;
4244
4245   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4246       && !optimize_function_for_size_p (cfun))
4247     return 0;
4248
4249   offsets = arm_get_frame_offsets ();
4250   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4251
4252   /* As do variadic functions.  */
4253   if (crtl->args.pretend_args_size
4254       || cfun->machine->uses_anonymous_args
4255       /* Or if the function calls __builtin_eh_return () */
4256       || crtl->calls_eh_return
4257       /* Or if the function calls alloca */
4258       || cfun->calls_alloca
4259       /* Or if there is a stack adjustment.  However, if the stack pointer
4260          is saved on the stack, we can use a pre-incrementing stack load.  */
4261       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4262                                  && stack_adjust == 4))
4263       /* Or if the static chain register was saved above the frame, under the
4264          assumption that the stack pointer isn't saved on the stack.  */
4265       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4266           && arm_compute_static_chain_stack_bytes() != 0))
4267     return 0;
4268
4269   saved_int_regs = offsets->saved_regs_mask;
4270
4271   /* Unfortunately, the insn
4272
4273        ldmib sp, {..., sp, ...}
4274
4275      triggers a bug on most SA-110 based devices, such that the stack
4276      pointer won't be correctly restored if the instruction takes a
4277      page fault.  We work around this problem by popping r3 along with
4278      the other registers, since that is never slower than executing
4279      another instruction.
4280
4281      We test for !arm_arch5t here, because code for any architecture
4282      less than this could potentially be run on one of the buggy
4283      chips.  */
4284   if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4285     {
4286       /* Validate that r3 is a call-clobbered register (always true in
4287          the default abi) ...  */
4288       if (!call_used_or_fixed_reg_p (3))
4289         return 0;
4290
4291       /* ... that it isn't being used for a return value ... */
4292       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4293         return 0;
4294
4295       /* ... or for a tail-call argument ...  */
4296       if (sibling)
4297         {
4298           gcc_assert (CALL_P (sibling));
4299
4300           if (find_regno_fusage (sibling, USE, 3))
4301             return 0;
4302         }
4303
4304       /* ... and that there are no call-saved registers in r0-r2
4305          (always true in the default ABI).  */
4306       if (saved_int_regs & 0x7)
4307         return 0;
4308     }
4309
4310   /* Can't be done if interworking with Thumb, and any registers have been
4311      stacked.  */
4312   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4313     return 0;
4314
4315   /* On StrongARM, conditional returns are expensive if they aren't
4316      taken and multiple registers have been stacked.  */
4317   if (iscond && arm_tune_strongarm)
4318     {
4319       /* Conditional return when just the LR is stored is a simple
4320          conditional-load instruction, that's not expensive.  */
4321       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4322         return 0;
4323
4324       if (flag_pic
4325           && arm_pic_register != INVALID_REGNUM
4326           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4327         return 0;
4328     }
4329
4330   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4331      several instructions if anything needs to be popped.  Armv8.1-M Mainline
4332      also needs several instructions to save and restore FP context.  */
4333   if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4334     return 0;
4335
4336   /* If there are saved registers but the LR isn't saved, then we need
4337      two instructions for the return.  */
4338   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4339     return 0;
4340
4341   /* Can't be done if any of the VFP regs are pushed,
4342      since this also requires an insn.  */
4343   if (TARGET_VFP_BASE)
4344     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4345       if (reg_needs_saving_p (regno))
4346         return 0;
4347
4348   if (TARGET_REALLY_IWMMXT)
4349     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4350       if (reg_needs_saving_p (regno))
4351         return 0;
4352
4353   return 1;
4354 }
4355
4356 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4357    shrink-wrapping if possible.  This is the case if we need to emit a
4358    prologue, which we can test by looking at the offsets.  */
4359 bool
4360 use_simple_return_p (void)
4361 {
4362   arm_stack_offsets *offsets;
4363
4364   /* Note this function can be called before or after reload.  */
4365   if (!reload_completed)
4366     arm_compute_frame_layout ();
4367
4368   offsets = arm_get_frame_offsets ();
4369   return offsets->outgoing_args != 0;
4370 }
4371
4372 /* Return TRUE if int I is a valid immediate ARM constant.  */
4373
4374 int
4375 const_ok_for_arm (HOST_WIDE_INT i)
4376 {
4377   int lowbit;
4378
4379   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4380      be all zero, or all one.  */
4381   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4382       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4383           != ((~(unsigned HOST_WIDE_INT) 0)
4384               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4385     return FALSE;
4386
4387   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4388
4389   /* Fast return for 0 and small values.  We must do this for zero, since
4390      the code below can't handle that one case.  */
4391   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4392     return TRUE;
4393
4394   /* Get the number of trailing zeros.  */
4395   lowbit = ffs((int) i) - 1;
4396
4397   /* Only even shifts are allowed in ARM mode so round down to the
4398      nearest even number.  */
4399   if (TARGET_ARM)
4400     lowbit &= ~1;
4401
4402   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4403     return TRUE;
4404
4405   if (TARGET_ARM)
4406     {
4407       /* Allow rotated constants in ARM mode.  */
4408       if (lowbit <= 4
4409            && ((i & ~0xc000003f) == 0
4410                || (i & ~0xf000000f) == 0
4411                || (i & ~0xfc000003) == 0))
4412         return TRUE;
4413     }
4414   else if (TARGET_THUMB2)
4415     {
4416       HOST_WIDE_INT v;
4417
4418       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4419       v = i & 0xff;
4420       v |= v << 16;
4421       if (i == v || i == (v | (v << 8)))
4422         return TRUE;
4423
4424       /* Allow repeated pattern 0xXY00XY00.  */
4425       v = i & 0xff00;
4426       v |= v << 16;
4427       if (i == v)
4428         return TRUE;
4429     }
4430   else if (TARGET_HAVE_MOVT)
4431     {
4432       /* Thumb-1 Targets with MOVT.  */
4433       if (i > 0xffff)
4434         return FALSE;
4435       else
4436         return TRUE;
4437     }
4438
4439   return FALSE;
4440 }
4441
4442 /* Return true if I is a valid constant for the operation CODE.  */
4443 int
4444 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4445 {
4446   if (const_ok_for_arm (i))
4447     return 1;
4448
4449   switch (code)
4450     {
4451     case SET:
4452       /* See if we can use movw.  */
4453       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4454         return 1;
4455       else
4456         /* Otherwise, try mvn.  */
4457         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4458
4459     case PLUS:
4460       /* See if we can use addw or subw.  */
4461       if (TARGET_THUMB2
4462           && ((i & 0xfffff000) == 0
4463               || ((-i) & 0xfffff000) == 0))
4464         return 1;
4465       /* Fall through.  */
4466     case COMPARE:
4467     case EQ:
4468     case NE:
4469     case GT:
4470     case LE:
4471     case LT:
4472     case GE:
4473     case GEU:
4474     case LTU:
4475     case GTU:
4476     case LEU:
4477     case UNORDERED:
4478     case ORDERED:
4479     case UNEQ:
4480     case UNGE:
4481     case UNLT:
4482     case UNGT:
4483     case UNLE:
4484       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4485
4486     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4487     case XOR:
4488       return 0;
4489
4490     case IOR:
4491       if (TARGET_THUMB2)
4492         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4493       return 0;
4494
4495     case AND:
4496       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4497
4498     default:
4499       gcc_unreachable ();
4500     }
4501 }
4502
4503 /* Return true if I is a valid di mode constant for the operation CODE.  */
4504 int
4505 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4506 {
4507   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4508   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4509   rtx hi = GEN_INT (hi_val);
4510   rtx lo = GEN_INT (lo_val);
4511
4512   if (TARGET_THUMB1)
4513     return 0;
4514
4515   switch (code)
4516     {
4517     case AND:
4518     case IOR:
4519     case XOR:
4520       return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4521              || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4522     case PLUS:
4523       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4524
4525     default:
4526       return 0;
4527     }
4528 }
4529
4530 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
4531    Avoid generating useless code when one of the bytes is zero.  */
4532 void
4533 thumb1_gen_const_int (rtx op0, HOST_WIDE_INT op1)
4534 {
4535   bool mov_done_p = false;
4536   int i;
4537
4538   /* Emit upper 3 bytes if needed.  */
4539   for (i = 0; i < 3; i++)
4540     {
4541       int byte = (op1 >> (8 * (3 - i))) & 0xff;
4542
4543       if (byte)
4544         {
4545           emit_set_insn (op0, mov_done_p
4546                          ? gen_rtx_PLUS (SImode,op0, GEN_INT (byte))
4547                          : GEN_INT (byte));
4548           mov_done_p = true;
4549         }
4550
4551       if (mov_done_p)
4552         emit_set_insn (op0, gen_rtx_ASHIFT (SImode, op0, GEN_INT (8)));
4553     }
4554
4555   /* Emit lower byte if needed.  */
4556   if (!mov_done_p)
4557     emit_set_insn (op0, GEN_INT (op1 & 0xff));
4558   else if (op1 & 0xff)
4559     emit_set_insn (op0, gen_rtx_PLUS (SImode, op0, GEN_INT (op1 & 0xff)));
4560 }
4561
4562 /* Emit a sequence of insns to handle a large constant.
4563    CODE is the code of the operation required, it can be any of SET, PLUS,
4564    IOR, AND, XOR, MINUS;
4565    MODE is the mode in which the operation is being performed;
4566    VAL is the integer to operate on;
4567    SOURCE is the other operand (a register, or a null-pointer for SET);
4568    SUBTARGETS means it is safe to create scratch registers if that will
4569    either produce a simpler sequence, or we will want to cse the values.
4570    Return value is the number of insns emitted.  */
4571
4572 /* ??? Tweak this for thumb2.  */
4573 int
4574 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4575                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4576 {
4577   rtx cond;
4578
4579   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4580     cond = COND_EXEC_TEST (PATTERN (insn));
4581   else
4582     cond = NULL_RTX;
4583
4584   if (subtargets || code == SET
4585       || (REG_P (target) && REG_P (source)
4586           && REGNO (target) != REGNO (source)))
4587     {
4588       /* After arm_reorg has been called, we can't fix up expensive
4589          constants by pushing them into memory so we must synthesize
4590          them in-line, regardless of the cost.  This is only likely to
4591          be more costly on chips that have load delay slots and we are
4592          compiling without running the scheduler (so no splitting
4593          occurred before the final instruction emission).
4594
4595          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4596       */
4597       if (!cfun->machine->after_arm_reorg
4598           && !cond
4599           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4600                                 1, 0)
4601               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4602                  + (code != SET))))
4603         {
4604           if (code == SET)
4605             {
4606               /* Currently SET is the only monadic value for CODE, all
4607                  the rest are diadic.  */
4608               if (TARGET_USE_MOVT)
4609                 arm_emit_movpair (target, GEN_INT (val));
4610               else
4611                 emit_set_insn (target, GEN_INT (val));
4612
4613               return 1;
4614             }
4615           else
4616             {
4617               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4618
4619               if (TARGET_USE_MOVT)
4620                 arm_emit_movpair (temp, GEN_INT (val));
4621               else
4622                 emit_set_insn (temp, GEN_INT (val));
4623
4624               /* For MINUS, the value is subtracted from, since we never
4625                  have subtraction of a constant.  */
4626               if (code == MINUS)
4627                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4628               else
4629                 emit_set_insn (target,
4630                                gen_rtx_fmt_ee (code, mode, source, temp));
4631               return 2;
4632             }
4633         }
4634     }
4635
4636   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4637                            1);
4638 }
4639
4640 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4641    ARM/THUMB2 immediates, and add up to VAL.
4642    Thr function return value gives the number of insns required.  */
4643 static int
4644 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4645                             struct four_ints *return_sequence)
4646 {
4647   int best_consecutive_zeros = 0;
4648   int i;
4649   int best_start = 0;
4650   int insns1, insns2;
4651   struct four_ints tmp_sequence;
4652
4653   /* If we aren't targeting ARM, the best place to start is always at
4654      the bottom, otherwise look more closely.  */
4655   if (TARGET_ARM)
4656     {
4657       for (i = 0; i < 32; i += 2)
4658         {
4659           int consecutive_zeros = 0;
4660
4661           if (!(val & (3 << i)))
4662             {
4663               while ((i < 32) && !(val & (3 << i)))
4664                 {
4665                   consecutive_zeros += 2;
4666                   i += 2;
4667                 }
4668               if (consecutive_zeros > best_consecutive_zeros)
4669                 {
4670                   best_consecutive_zeros = consecutive_zeros;
4671                   best_start = i - consecutive_zeros;
4672                 }
4673               i -= 2;
4674             }
4675         }
4676     }
4677
4678   /* So long as it won't require any more insns to do so, it's
4679      desirable to emit a small constant (in bits 0...9) in the last
4680      insn.  This way there is more chance that it can be combined with
4681      a later addressing insn to form a pre-indexed load or store
4682      operation.  Consider:
4683
4684            *((volatile int *)0xe0000100) = 1;
4685            *((volatile int *)0xe0000110) = 2;
4686
4687      We want this to wind up as:
4688
4689             mov rA, #0xe0000000
4690             mov rB, #1
4691             str rB, [rA, #0x100]
4692             mov rB, #2
4693             str rB, [rA, #0x110]
4694
4695      rather than having to synthesize both large constants from scratch.
4696
4697      Therefore, we calculate how many insns would be required to emit
4698      the constant starting from `best_start', and also starting from
4699      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4700      yield a shorter sequence, we may as well use zero.  */
4701   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4702   if (best_start != 0
4703       && ((HOST_WIDE_INT_1U << best_start) < val))
4704     {
4705       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4706       if (insns2 <= insns1)
4707         {
4708           *return_sequence = tmp_sequence;
4709           insns1 = insns2;
4710         }
4711     }
4712
4713   return insns1;
4714 }
4715
4716 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4717 static int
4718 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4719                              struct four_ints *return_sequence, int i)
4720 {
4721   int remainder = val & 0xffffffff;
4722   int insns = 0;
4723
4724   /* Try and find a way of doing the job in either two or three
4725      instructions.
4726
4727      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4728      location.  We start at position I.  This may be the MSB, or
4729      optimial_immediate_sequence may have positioned it at the largest block
4730      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4731      wrapping around to the top of the word when we drop off the bottom.
4732      In the worst case this code should produce no more than four insns.
4733
4734      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4735      constants, shifted to any arbitrary location.  We should always start
4736      at the MSB.  */
4737   do
4738     {
4739       int end;
4740       unsigned int b1, b2, b3, b4;
4741       unsigned HOST_WIDE_INT result;
4742       int loc;
4743
4744       gcc_assert (insns < 4);
4745
4746       if (i <= 0)
4747         i += 32;
4748
4749       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4750       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4751         {
4752           loc = i;
4753           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4754             /* We can use addw/subw for the last 12 bits.  */
4755             result = remainder;
4756           else
4757             {
4758               /* Use an 8-bit shifted/rotated immediate.  */
4759               end = i - 8;
4760               if (end < 0)
4761                 end += 32;
4762               result = remainder & ((0x0ff << end)
4763                                    | ((i < end) ? (0xff >> (32 - end))
4764                                                 : 0));
4765               i -= 8;
4766             }
4767         }
4768       else
4769         {
4770           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4771              arbitrary shifts.  */
4772           i -= TARGET_ARM ? 2 : 1;
4773           continue;
4774         }
4775
4776       /* Next, see if we can do a better job with a thumb2 replicated
4777          constant.
4778
4779          We do it this way around to catch the cases like 0x01F001E0 where
4780          two 8-bit immediates would work, but a replicated constant would
4781          make it worse.
4782
4783          TODO: 16-bit constants that don't clear all the bits, but still win.
4784          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4785       if (TARGET_THUMB2)
4786         {
4787           b1 = (remainder & 0xff000000) >> 24;
4788           b2 = (remainder & 0x00ff0000) >> 16;
4789           b3 = (remainder & 0x0000ff00) >> 8;
4790           b4 = remainder & 0xff;
4791
4792           if (loc > 24)
4793             {
4794               /* The 8-bit immediate already found clears b1 (and maybe b2),
4795                  but must leave b3 and b4 alone.  */
4796
4797               /* First try to find a 32-bit replicated constant that clears
4798                  almost everything.  We can assume that we can't do it in one,
4799                  or else we wouldn't be here.  */
4800               unsigned int tmp = b1 & b2 & b3 & b4;
4801               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4802                                   + (tmp << 24);
4803               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4804                                             + (tmp == b3) + (tmp == b4);
4805               if (tmp
4806                   && (matching_bytes >= 3
4807                       || (matching_bytes == 2
4808                           && const_ok_for_op (remainder & ~tmp2, code))))
4809                 {
4810                   /* At least 3 of the bytes match, and the fourth has at
4811                      least as many bits set, or two of the bytes match
4812                      and it will only require one more insn to finish.  */
4813                   result = tmp2;
4814                   i = tmp != b1 ? 32
4815                       : tmp != b2 ? 24
4816                       : tmp != b3 ? 16
4817                       : 8;
4818                 }
4819
4820               /* Second, try to find a 16-bit replicated constant that can
4821                  leave three of the bytes clear.  If b2 or b4 is already
4822                  zero, then we can.  If the 8-bit from above would not
4823                  clear b2 anyway, then we still win.  */
4824               else if (b1 == b3 && (!b2 || !b4
4825                                || (remainder & 0x00ff0000 & ~result)))
4826                 {
4827                   result = remainder & 0xff00ff00;
4828                   i = 24;
4829                 }
4830             }
4831           else if (loc > 16)
4832             {
4833               /* The 8-bit immediate already found clears b2 (and maybe b3)
4834                  and we don't get here unless b1 is alredy clear, but it will
4835                  leave b4 unchanged.  */
4836
4837               /* If we can clear b2 and b4 at once, then we win, since the
4838                  8-bits couldn't possibly reach that far.  */
4839               if (b2 == b4)
4840                 {
4841                   result = remainder & 0x00ff00ff;
4842                   i = 16;
4843                 }
4844             }
4845         }
4846
4847       return_sequence->i[insns++] = result;
4848       remainder &= ~result;
4849
4850       if (code == SET || code == MINUS)
4851         code = PLUS;
4852     }
4853   while (remainder);
4854
4855   return insns;
4856 }
4857
4858 /* Emit an instruction with the indicated PATTERN.  If COND is
4859    non-NULL, conditionalize the execution of the instruction on COND
4860    being true.  */
4861
4862 static void
4863 emit_constant_insn (rtx cond, rtx pattern)
4864 {
4865   if (cond)
4866     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4867   emit_insn (pattern);
4868 }
4869
4870 /* As above, but extra parameter GENERATE which, if clear, suppresses
4871    RTL generation.  */
4872
4873 static int
4874 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4875                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4876                   int subtargets, int generate)
4877 {
4878   int can_invert = 0;
4879   int can_negate = 0;
4880   int final_invert = 0;
4881   int i;
4882   int set_sign_bit_copies = 0;
4883   int clear_sign_bit_copies = 0;
4884   int clear_zero_bit_copies = 0;
4885   int set_zero_bit_copies = 0;
4886   int insns = 0, neg_insns, inv_insns;
4887   unsigned HOST_WIDE_INT temp1, temp2;
4888   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4889   struct four_ints *immediates;
4890   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4891
4892   /* Find out which operations are safe for a given CODE.  Also do a quick
4893      check for degenerate cases; these can occur when DImode operations
4894      are split.  */
4895   switch (code)
4896     {
4897     case SET:
4898       can_invert = 1;
4899       break;
4900
4901     case PLUS:
4902       can_negate = 1;
4903       break;
4904
4905     case IOR:
4906       if (remainder == 0xffffffff)
4907         {
4908           if (generate)
4909             emit_constant_insn (cond,
4910                                 gen_rtx_SET (target,
4911                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4912           return 1;
4913         }
4914
4915       if (remainder == 0)
4916         {
4917           if (reload_completed && rtx_equal_p (target, source))
4918             return 0;
4919
4920           if (generate)
4921             emit_constant_insn (cond, gen_rtx_SET (target, source));
4922           return 1;
4923         }
4924       break;
4925
4926     case AND:
4927       if (remainder == 0)
4928         {
4929           if (generate)
4930             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4931           return 1;
4932         }
4933       if (remainder == 0xffffffff)
4934         {
4935           if (reload_completed && rtx_equal_p (target, source))
4936             return 0;
4937           if (generate)
4938             emit_constant_insn (cond, gen_rtx_SET (target, source));
4939           return 1;
4940         }
4941       can_invert = 1;
4942       break;
4943
4944     case XOR:
4945       if (remainder == 0)
4946         {
4947           if (reload_completed && rtx_equal_p (target, source))
4948             return 0;
4949           if (generate)
4950             emit_constant_insn (cond, gen_rtx_SET (target, source));
4951           return 1;
4952         }
4953
4954       if (remainder == 0xffffffff)
4955         {
4956           if (generate)
4957             emit_constant_insn (cond,
4958                                 gen_rtx_SET (target,
4959                                              gen_rtx_NOT (mode, source)));
4960           return 1;
4961         }
4962       final_invert = 1;
4963       break;
4964
4965     case MINUS:
4966       /* We treat MINUS as (val - source), since (source - val) is always
4967          passed as (source + (-val)).  */
4968       if (remainder == 0)
4969         {
4970           if (generate)
4971             emit_constant_insn (cond,
4972                                 gen_rtx_SET (target,
4973                                              gen_rtx_NEG (mode, source)));
4974           return 1;
4975         }
4976       if (const_ok_for_arm (val))
4977         {
4978           if (generate)
4979             emit_constant_insn (cond,
4980                                 gen_rtx_SET (target,
4981                                              gen_rtx_MINUS (mode, GEN_INT (val),
4982                                                             source)));
4983           return 1;
4984         }
4985
4986       break;
4987
4988     default:
4989       gcc_unreachable ();
4990     }
4991
4992   /* If we can do it in one insn get out quickly.  */
4993   if (const_ok_for_op (val, code))
4994     {
4995       if (generate)
4996         emit_constant_insn (cond,
4997                             gen_rtx_SET (target,
4998                                          (source
4999                                           ? gen_rtx_fmt_ee (code, mode, source,
5000                                                             GEN_INT (val))
5001                                           : GEN_INT (val))));
5002       return 1;
5003     }
5004
5005   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5006      insn.  */
5007   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5008       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5009     {
5010       if (generate)
5011         {
5012           if (mode == SImode && i == 16)
5013             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5014                smaller insn.  */
5015             emit_constant_insn (cond,
5016                                 gen_zero_extendhisi2
5017                                 (target, gen_lowpart (HImode, source)));
5018           else
5019             /* Extz only supports SImode, but we can coerce the operands
5020                into that mode.  */
5021             emit_constant_insn (cond,
5022                                 gen_extzv_t2 (gen_lowpart (SImode, target),
5023                                               gen_lowpart (SImode, source),
5024                                               GEN_INT (i), const0_rtx));
5025         }
5026
5027       return 1;
5028     }
5029
5030   /* Calculate a few attributes that may be useful for specific
5031      optimizations.  */
5032   /* Count number of leading zeros.  */
5033   for (i = 31; i >= 0; i--)
5034     {
5035       if ((remainder & (1 << i)) == 0)
5036         clear_sign_bit_copies++;
5037       else
5038         break;
5039     }
5040
5041   /* Count number of leading 1's.  */
5042   for (i = 31; i >= 0; i--)
5043     {
5044       if ((remainder & (1 << i)) != 0)
5045         set_sign_bit_copies++;
5046       else
5047         break;
5048     }
5049
5050   /* Count number of trailing zero's.  */
5051   for (i = 0; i <= 31; i++)
5052     {
5053       if ((remainder & (1 << i)) == 0)
5054         clear_zero_bit_copies++;
5055       else
5056         break;
5057     }
5058
5059   /* Count number of trailing 1's.  */
5060   for (i = 0; i <= 31; i++)
5061     {
5062       if ((remainder & (1 << i)) != 0)
5063         set_zero_bit_copies++;
5064       else
5065         break;
5066     }
5067
5068   switch (code)
5069     {
5070     case SET:
5071       /* See if we can do this by sign_extending a constant that is known
5072          to be negative.  This is a good, way of doing it, since the shift
5073          may well merge into a subsequent insn.  */
5074       if (set_sign_bit_copies > 1)
5075         {
5076           if (const_ok_for_arm
5077               (temp1 = ARM_SIGN_EXTEND (remainder
5078                                         << (set_sign_bit_copies - 1))))
5079             {
5080               if (generate)
5081                 {
5082                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5083                   emit_constant_insn (cond,
5084                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5085                   emit_constant_insn (cond,
5086                                       gen_ashrsi3 (target, new_src,
5087                                                    GEN_INT (set_sign_bit_copies - 1)));
5088                 }
5089               return 2;
5090             }
5091           /* For an inverted constant, we will need to set the low bits,
5092              these will be shifted out of harm's way.  */
5093           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5094           if (const_ok_for_arm (~temp1))
5095             {
5096               if (generate)
5097                 {
5098                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5099                   emit_constant_insn (cond,
5100                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5101                   emit_constant_insn (cond,
5102                                       gen_ashrsi3 (target, new_src,
5103                                                    GEN_INT (set_sign_bit_copies - 1)));
5104                 }
5105               return 2;
5106             }
5107         }
5108
5109       /* See if we can calculate the value as the difference between two
5110          valid immediates.  */
5111       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5112         {
5113           int topshift = clear_sign_bit_copies & ~1;
5114
5115           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5116                                    & (0xff000000 >> topshift));
5117
5118           /* If temp1 is zero, then that means the 9 most significant
5119              bits of remainder were 1 and we've caused it to overflow.
5120              When topshift is 0 we don't need to do anything since we
5121              can borrow from 'bit 32'.  */
5122           if (temp1 == 0 && topshift != 0)
5123             temp1 = 0x80000000 >> (topshift - 1);
5124
5125           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5126
5127           if (const_ok_for_arm (temp2))
5128             {
5129               if (generate)
5130                 {
5131                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5132                   emit_constant_insn (cond,
5133                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5134                   emit_constant_insn (cond,
5135                                       gen_addsi3 (target, new_src,
5136                                                   GEN_INT (-temp2)));
5137                 }
5138
5139               return 2;
5140             }
5141         }
5142
5143       /* See if we can generate this by setting the bottom (or the top)
5144          16 bits, and then shifting these into the other half of the
5145          word.  We only look for the simplest cases, to do more would cost
5146          too much.  Be careful, however, not to generate this when the
5147          alternative would take fewer insns.  */
5148       if (val & 0xffff0000)
5149         {
5150           temp1 = remainder & 0xffff0000;
5151           temp2 = remainder & 0x0000ffff;
5152
5153           /* Overlaps outside this range are best done using other methods.  */
5154           for (i = 9; i < 24; i++)
5155             {
5156               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5157                   && !const_ok_for_arm (temp2))
5158                 {
5159                   rtx new_src = (subtargets
5160                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5161                                  : target);
5162                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5163                                             source, subtargets, generate);
5164                   source = new_src;
5165                   if (generate)
5166                     emit_constant_insn
5167                       (cond,
5168                        gen_rtx_SET
5169                        (target,
5170                         gen_rtx_IOR (mode,
5171                                      gen_rtx_ASHIFT (mode, source,
5172                                                      GEN_INT (i)),
5173                                      source)));
5174                   return insns + 1;
5175                 }
5176             }
5177
5178           /* Don't duplicate cases already considered.  */
5179           for (i = 17; i < 24; i++)
5180             {
5181               if (((temp1 | (temp1 >> i)) == remainder)
5182                   && !const_ok_for_arm (temp1))
5183                 {
5184                   rtx new_src = (subtargets
5185                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5186                                  : target);
5187                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5188                                             source, subtargets, generate);
5189                   source = new_src;
5190                   if (generate)
5191                     emit_constant_insn
5192                       (cond,
5193                        gen_rtx_SET (target,
5194                                     gen_rtx_IOR
5195                                     (mode,
5196                                      gen_rtx_LSHIFTRT (mode, source,
5197                                                        GEN_INT (i)),
5198                                      source)));
5199                   return insns + 1;
5200                 }
5201             }
5202         }
5203       break;
5204
5205     case IOR:
5206     case XOR:
5207       /* If we have IOR or XOR, and the constant can be loaded in a
5208          single instruction, and we can find a temporary to put it in,
5209          then this can be done in two instructions instead of 3-4.  */
5210       if (subtargets
5211           /* TARGET can't be NULL if SUBTARGETS is 0 */
5212           || (reload_completed && !reg_mentioned_p (target, source)))
5213         {
5214           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5215             {
5216               if (generate)
5217                 {
5218                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5219
5220                   emit_constant_insn (cond,
5221                                       gen_rtx_SET (sub, GEN_INT (val)));
5222                   emit_constant_insn (cond,
5223                                       gen_rtx_SET (target,
5224                                                    gen_rtx_fmt_ee (code, mode,
5225                                                                    source, sub)));
5226                 }
5227               return 2;
5228             }
5229         }
5230
5231       if (code == XOR)
5232         break;
5233
5234       /*  Convert.
5235           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5236                              and the remainder 0s for e.g. 0xfff00000)
5237           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5238
5239           This can be done in 2 instructions by using shifts with mov or mvn.
5240           e.g. for
5241           x = x | 0xfff00000;
5242           we generate.
5243           mvn   r0, r0, asl #12
5244           mvn   r0, r0, lsr #12  */
5245       if (set_sign_bit_copies > 8
5246           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5247         {
5248           if (generate)
5249             {
5250               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5251               rtx shift = GEN_INT (set_sign_bit_copies);
5252
5253               emit_constant_insn
5254                 (cond,
5255                  gen_rtx_SET (sub,
5256                               gen_rtx_NOT (mode,
5257                                            gen_rtx_ASHIFT (mode,
5258                                                            source,
5259                                                            shift))));
5260               emit_constant_insn
5261                 (cond,
5262                  gen_rtx_SET (target,
5263                               gen_rtx_NOT (mode,
5264                                            gen_rtx_LSHIFTRT (mode, sub,
5265                                                              shift))));
5266             }
5267           return 2;
5268         }
5269
5270       /* Convert
5271           x = y | constant (which has set_zero_bit_copies number of trailing ones).
5272            to
5273           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5274
5275           For eg. r0 = r0 | 0xfff
5276                mvn      r0, r0, lsr #12
5277                mvn      r0, r0, asl #12
5278
5279       */
5280       if (set_zero_bit_copies > 8
5281           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5282         {
5283           if (generate)
5284             {
5285               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5286               rtx shift = GEN_INT (set_zero_bit_copies);
5287
5288               emit_constant_insn
5289                 (cond,
5290                  gen_rtx_SET (sub,
5291                               gen_rtx_NOT (mode,
5292                                            gen_rtx_LSHIFTRT (mode,
5293                                                              source,
5294                                                              shift))));
5295               emit_constant_insn
5296                 (cond,
5297                  gen_rtx_SET (target,
5298                               gen_rtx_NOT (mode,
5299                                            gen_rtx_ASHIFT (mode, sub,
5300                                                            shift))));
5301             }
5302           return 2;
5303         }
5304
5305       /* This will never be reached for Thumb2 because orn is a valid
5306          instruction. This is for Thumb1 and the ARM 32 bit cases.
5307
5308          x = y | constant (such that ~constant is a valid constant)
5309          Transform this to
5310          x = ~(~y & ~constant).
5311       */
5312       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5313         {
5314           if (generate)
5315             {
5316               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5317               emit_constant_insn (cond,
5318                                   gen_rtx_SET (sub,
5319                                                gen_rtx_NOT (mode, source)));
5320               source = sub;
5321               if (subtargets)
5322                 sub = gen_reg_rtx (mode);
5323               emit_constant_insn (cond,
5324                                   gen_rtx_SET (sub,
5325                                                gen_rtx_AND (mode, source,
5326                                                             GEN_INT (temp1))));
5327               emit_constant_insn (cond,
5328                                   gen_rtx_SET (target,
5329                                                gen_rtx_NOT (mode, sub)));
5330             }
5331           return 3;
5332         }
5333       break;
5334
5335     case AND:
5336       /* See if two shifts will do 2 or more insn's worth of work.  */
5337       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5338         {
5339           HOST_WIDE_INT shift_mask = ((0xffffffff
5340                                        << (32 - clear_sign_bit_copies))
5341                                       & 0xffffffff);
5342
5343           if ((remainder | shift_mask) != 0xffffffff)
5344             {
5345               HOST_WIDE_INT new_val
5346                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5347
5348               if (generate)
5349                 {
5350                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5351                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5352                                             new_src, source, subtargets, 1);
5353                   source = new_src;
5354                 }
5355               else
5356                 {
5357                   rtx targ = subtargets ? NULL_RTX : target;
5358                   insns = arm_gen_constant (AND, mode, cond, new_val,
5359                                             targ, source, subtargets, 0);
5360                 }
5361             }
5362
5363           if (generate)
5364             {
5365               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5366               rtx shift = GEN_INT (clear_sign_bit_copies);
5367
5368               emit_insn (gen_ashlsi3 (new_src, source, shift));
5369               emit_insn (gen_lshrsi3 (target, new_src, shift));
5370             }
5371
5372           return insns + 2;
5373         }
5374
5375       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5376         {
5377           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5378
5379           if ((remainder | shift_mask) != 0xffffffff)
5380             {
5381               HOST_WIDE_INT new_val
5382                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5383               if (generate)
5384                 {
5385                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5386
5387                   insns = arm_gen_constant (AND, mode, cond, new_val,
5388                                             new_src, source, subtargets, 1);
5389                   source = new_src;
5390                 }
5391               else
5392                 {
5393                   rtx targ = subtargets ? NULL_RTX : target;
5394
5395                   insns = arm_gen_constant (AND, mode, cond, new_val,
5396                                             targ, source, subtargets, 0);
5397                 }
5398             }
5399
5400           if (generate)
5401             {
5402               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5403               rtx shift = GEN_INT (clear_zero_bit_copies);
5404
5405               emit_insn (gen_lshrsi3 (new_src, source, shift));
5406               emit_insn (gen_ashlsi3 (target, new_src, shift));
5407             }
5408
5409           return insns + 2;
5410         }
5411
5412       break;
5413
5414     default:
5415       break;
5416     }
5417
5418   /* Calculate what the instruction sequences would be if we generated it
5419      normally, negated, or inverted.  */
5420   if (code == AND)
5421     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5422     insns = 99;
5423   else
5424     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5425
5426   if (can_negate)
5427     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5428                                             &neg_immediates);
5429   else
5430     neg_insns = 99;
5431
5432   if (can_invert || final_invert)
5433     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5434                                             &inv_immediates);
5435   else
5436     inv_insns = 99;
5437
5438   immediates = &pos_immediates;
5439
5440   /* Is the negated immediate sequence more efficient?  */
5441   if (neg_insns < insns && neg_insns <= inv_insns)
5442     {
5443       insns = neg_insns;
5444       immediates = &neg_immediates;
5445     }
5446   else
5447     can_negate = 0;
5448
5449   /* Is the inverted immediate sequence more efficient?
5450      We must allow for an extra NOT instruction for XOR operations, although
5451      there is some chance that the final 'mvn' will get optimized later.  */
5452   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5453     {
5454       insns = inv_insns;
5455       immediates = &inv_immediates;
5456     }
5457   else
5458     {
5459       can_invert = 0;
5460       final_invert = 0;
5461     }
5462
5463   /* Now output the chosen sequence as instructions.  */
5464   if (generate)
5465     {
5466       for (i = 0; i < insns; i++)
5467         {
5468           rtx new_src, temp1_rtx;
5469
5470           temp1 = immediates->i[i];
5471
5472           if (code == SET || code == MINUS)
5473             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5474           else if ((final_invert || i < (insns - 1)) && subtargets)
5475             new_src = gen_reg_rtx (mode);
5476           else
5477             new_src = target;
5478
5479           if (can_invert)
5480             temp1 = ~temp1;
5481           else if (can_negate)
5482             temp1 = -temp1;
5483
5484           temp1 = trunc_int_for_mode (temp1, mode);
5485           temp1_rtx = GEN_INT (temp1);
5486
5487           if (code == SET)
5488             ;
5489           else if (code == MINUS)
5490             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5491           else
5492             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5493
5494           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5495           source = new_src;
5496
5497           if (code == SET)
5498             {
5499               can_negate = can_invert;
5500               can_invert = 0;
5501               code = PLUS;
5502             }
5503           else if (code == MINUS)
5504             code = PLUS;
5505         }
5506     }
5507
5508   if (final_invert)
5509     {
5510       if (generate)
5511         emit_constant_insn (cond, gen_rtx_SET (target,
5512                                                gen_rtx_NOT (mode, source)));
5513       insns++;
5514     }
5515
5516   return insns;
5517 }
5518
5519 /* Return TRUE if op is a constant where both the low and top words are
5520    suitable for RSB/RSC instructions.  This is never true for Thumb, since
5521    we do not have RSC in that case.  */
5522 static bool
5523 arm_const_double_prefer_rsbs_rsc (rtx op)
5524 {
5525   /* Thumb lacks RSC, so we never prefer that sequence.  */
5526   if (TARGET_THUMB || !CONST_INT_P (op))
5527     return false;
5528   HOST_WIDE_INT hi, lo;
5529   lo = UINTVAL (op) & 0xffffffffULL;
5530   hi = UINTVAL (op) >> 32;
5531   return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5532 }
5533
5534 /* Canonicalize a comparison so that we are more likely to recognize it.
5535    This can be done for a few constant compares, where we can make the
5536    immediate value easier to load.  */
5537
5538 static void
5539 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5540                              bool op0_preserve_value)
5541 {
5542   machine_mode mode;
5543   unsigned HOST_WIDE_INT i, maxval;
5544
5545   mode = GET_MODE (*op0);
5546   if (mode == VOIDmode)
5547     mode = GET_MODE (*op1);
5548
5549   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5550
5551   /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc).  In
5552      ARM mode we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be
5553      either reversed or (for constant OP1) adjusted to GE/LT.
5554      Similarly for GTU/LEU in Thumb mode.  */
5555   if (mode == DImode)
5556     {
5557
5558       if (*code == GT || *code == LE
5559           || *code == GTU || *code == LEU)
5560         {
5561           /* Missing comparison.  First try to use an available
5562              comparison.  */
5563           if (CONST_INT_P (*op1))
5564             {
5565               i = INTVAL (*op1);
5566               switch (*code)
5567                 {
5568                 case GT:
5569                 case LE:
5570                   if (i != maxval)
5571                     {
5572                       /* Try to convert to GE/LT, unless that would be more
5573                          expensive.  */
5574                       if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5575                           && arm_const_double_prefer_rsbs_rsc (*op1))
5576                         return;
5577                       *op1 = GEN_INT (i + 1);
5578                       *code = *code == GT ? GE : LT;
5579                       return;
5580                     }
5581                   break;
5582
5583                 case GTU:
5584                 case LEU:
5585                   if (i != ~((unsigned HOST_WIDE_INT) 0))
5586                     {
5587                       /* Try to convert to GEU/LTU, unless that would
5588                          be more expensive.  */
5589                       if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5590                           && arm_const_double_prefer_rsbs_rsc (*op1))
5591                         return;
5592                       *op1 = GEN_INT (i + 1);
5593                       *code = *code == GTU ? GEU : LTU;
5594                       return;
5595                     }
5596                   break;
5597
5598                 default:
5599                   gcc_unreachable ();
5600                 }
5601             }
5602
5603           if (!op0_preserve_value)
5604             {
5605               std::swap (*op0, *op1);
5606               *code = (int)swap_condition ((enum rtx_code)*code);
5607             }
5608         }
5609       return;
5610     }
5611
5612   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5613      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5614      to facilitate possible combining with a cmp into 'ands'.  */
5615   if (mode == SImode
5616       && GET_CODE (*op0) == ZERO_EXTEND
5617       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5618       && GET_MODE (XEXP (*op0, 0)) == QImode
5619       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5620       && subreg_lowpart_p (XEXP (*op0, 0))
5621       && *op1 == const0_rtx)
5622     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5623                         GEN_INT (255));
5624
5625   /* Comparisons smaller than DImode.  Only adjust comparisons against
5626      an out-of-range constant.  */
5627   if (!CONST_INT_P (*op1)
5628       || const_ok_for_arm (INTVAL (*op1))
5629       || const_ok_for_arm (- INTVAL (*op1)))
5630     return;
5631
5632   i = INTVAL (*op1);
5633
5634   switch (*code)
5635     {
5636     case EQ:
5637     case NE:
5638       return;
5639
5640     case GT:
5641     case LE:
5642       if (i != maxval
5643           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5644         {
5645           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5646           *code = *code == GT ? GE : LT;
5647           return;
5648         }
5649       break;
5650
5651     case GE:
5652     case LT:
5653       if (i != ~maxval
5654           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5655         {
5656           *op1 = GEN_INT (i - 1);
5657           *code = *code == GE ? GT : LE;
5658           return;
5659         }
5660       break;
5661
5662     case GTU:
5663     case LEU:
5664       if (i != ~((unsigned HOST_WIDE_INT) 0)
5665           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5666         {
5667           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5668           *code = *code == GTU ? GEU : LTU;
5669           return;
5670         }
5671       break;
5672
5673     case GEU:
5674     case LTU:
5675       if (i != 0
5676           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5677         {
5678           *op1 = GEN_INT (i - 1);
5679           *code = *code == GEU ? GTU : LEU;
5680           return;
5681         }
5682       break;
5683
5684     default:
5685       gcc_unreachable ();
5686     }
5687 }
5688
5689
5690 /* Define how to find the value returned by a function.  */
5691
5692 static rtx
5693 arm_function_value(const_tree type, const_tree func,
5694                    bool outgoing ATTRIBUTE_UNUSED)
5695 {
5696   machine_mode mode;
5697   int unsignedp ATTRIBUTE_UNUSED;
5698   rtx r ATTRIBUTE_UNUSED;
5699
5700   mode = TYPE_MODE (type);
5701
5702   if (TARGET_AAPCS_BASED)
5703     return aapcs_allocate_return_reg (mode, type, func);
5704
5705   /* Promote integer types.  */
5706   if (INTEGRAL_TYPE_P (type))
5707     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5708
5709   /* Promotes small structs returned in a register to full-word size
5710      for big-endian AAPCS.  */
5711   if (arm_return_in_msb (type))
5712     {
5713       HOST_WIDE_INT size = int_size_in_bytes (type);
5714       if (size % UNITS_PER_WORD != 0)
5715         {
5716           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5717           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5718         }
5719     }
5720
5721   return arm_libcall_value_1 (mode);
5722 }
5723
5724 /* libcall hashtable helpers.  */
5725
5726 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5727 {
5728   static inline hashval_t hash (const rtx_def *);
5729   static inline bool equal (const rtx_def *, const rtx_def *);
5730   static inline void remove (rtx_def *);
5731 };
5732
5733 inline bool
5734 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5735 {
5736   return rtx_equal_p (p1, p2);
5737 }
5738
5739 inline hashval_t
5740 libcall_hasher::hash (const rtx_def *p1)
5741 {
5742   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5743 }
5744
5745 typedef hash_table<libcall_hasher> libcall_table_type;
5746
5747 static void
5748 add_libcall (libcall_table_type *htab, rtx libcall)
5749 {
5750   *htab->find_slot (libcall, INSERT) = libcall;
5751 }
5752
5753 static bool
5754 arm_libcall_uses_aapcs_base (const_rtx libcall)
5755 {
5756   static bool init_done = false;
5757   static libcall_table_type *libcall_htab = NULL;
5758
5759   if (!init_done)
5760     {
5761       init_done = true;
5762
5763       libcall_htab = new libcall_table_type (31);
5764       add_libcall (libcall_htab,
5765                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5766       add_libcall (libcall_htab,
5767                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5768       add_libcall (libcall_htab,
5769                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5770       add_libcall (libcall_htab,
5771                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5772
5773       add_libcall (libcall_htab,
5774                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5775       add_libcall (libcall_htab,
5776                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5777       add_libcall (libcall_htab,
5778                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5779       add_libcall (libcall_htab,
5780                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5781
5782       add_libcall (libcall_htab,
5783                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5784       add_libcall (libcall_htab,
5785                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5786       add_libcall (libcall_htab,
5787                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5788       add_libcall (libcall_htab,
5789                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5790       add_libcall (libcall_htab,
5791                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5792       add_libcall (libcall_htab,
5793                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5794       add_libcall (libcall_htab,
5795                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5796       add_libcall (libcall_htab,
5797                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5798
5799       /* Values from double-precision helper functions are returned in core
5800          registers if the selected core only supports single-precision
5801          arithmetic, even if we are using the hard-float ABI.  The same is
5802          true for single-precision helpers except in case of MVE, because in
5803          MVE we will be using the hard-float ABI on a CPU which doesn't support
5804          single-precision operations in hardware.  In MVE the following check
5805          enables use of emulation for the single-precision arithmetic
5806          operations.  */
5807       if (TARGET_HAVE_MVE)
5808         {
5809           add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5810           add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5811           add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5812           add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5813           add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5814           add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5815           add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5816           add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5817           add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5818           add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5819           add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5820         }
5821       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5822       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5823       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5824       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5825       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5826       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5827       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5828       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5829       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5830       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5831       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5832       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5833                                                         SFmode));
5834       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5835                                                         DFmode));
5836       add_libcall (libcall_htab,
5837                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5838     }
5839
5840   return libcall && libcall_htab->find (libcall) != NULL;
5841 }
5842
5843 static rtx
5844 arm_libcall_value_1 (machine_mode mode)
5845 {
5846   if (TARGET_AAPCS_BASED)
5847     return aapcs_libcall_value (mode);
5848   else if (TARGET_IWMMXT_ABI
5849            && arm_vector_mode_supported_p (mode))
5850     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5851   else
5852     return gen_rtx_REG (mode, ARG_REGISTER (1));
5853 }
5854
5855 /* Define how to find the value returned by a library function
5856    assuming the value has mode MODE.  */
5857
5858 static rtx
5859 arm_libcall_value (machine_mode mode, const_rtx libcall)
5860 {
5861   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5862       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5863     {
5864       /* The following libcalls return their result in integer registers,
5865          even though they return a floating point value.  */
5866       if (arm_libcall_uses_aapcs_base (libcall))
5867         return gen_rtx_REG (mode, ARG_REGISTER(1));
5868
5869     }
5870
5871   return arm_libcall_value_1 (mode);
5872 }
5873
5874 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5875
5876 static bool
5877 arm_function_value_regno_p (const unsigned int regno)
5878 {
5879   if (regno == ARG_REGISTER (1)
5880       || (TARGET_32BIT
5881           && TARGET_AAPCS_BASED
5882           && TARGET_HARD_FLOAT
5883           && regno == FIRST_VFP_REGNUM)
5884       || (TARGET_IWMMXT_ABI
5885           && regno == FIRST_IWMMXT_REGNUM))
5886     return true;
5887
5888   return false;
5889 }
5890
5891 /* Determine the amount of memory needed to store the possible return
5892    registers of an untyped call.  */
5893 int
5894 arm_apply_result_size (void)
5895 {
5896   int size = 16;
5897
5898   if (TARGET_32BIT)
5899     {
5900       if (TARGET_HARD_FLOAT_ABI)
5901         size += 32;
5902       if (TARGET_IWMMXT_ABI)
5903         size += 8;
5904     }
5905
5906   return size;
5907 }
5908
5909 /* Decide whether TYPE should be returned in memory (true)
5910    or in a register (false).  FNTYPE is the type of the function making
5911    the call.  */
5912 static bool
5913 arm_return_in_memory (const_tree type, const_tree fntype)
5914 {
5915   HOST_WIDE_INT size;
5916
5917   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5918
5919   if (TARGET_AAPCS_BASED)
5920     {
5921       /* Simple, non-aggregate types (ie not including vectors and
5922          complex) are always returned in a register (or registers).
5923          We don't care about which register here, so we can short-cut
5924          some of the detail.  */
5925       if (!AGGREGATE_TYPE_P (type)
5926           && TREE_CODE (type) != VECTOR_TYPE
5927           && TREE_CODE (type) != COMPLEX_TYPE)
5928         return false;
5929
5930       /* Any return value that is no larger than one word can be
5931          returned in r0.  */
5932       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5933         return false;
5934
5935       /* Check any available co-processors to see if they accept the
5936          type as a register candidate (VFP, for example, can return
5937          some aggregates in consecutive registers).  These aren't
5938          available if the call is variadic.  */
5939       if (aapcs_select_return_coproc (type, fntype) >= 0)
5940         return false;
5941
5942       /* Vector values should be returned using ARM registers, not
5943          memory (unless they're over 16 bytes, which will break since
5944          we only have four call-clobbered registers to play with).  */
5945       if (TREE_CODE (type) == VECTOR_TYPE)
5946         return (size < 0 || size > (4 * UNITS_PER_WORD));
5947
5948       /* The rest go in memory.  */
5949       return true;
5950     }
5951
5952   if (TREE_CODE (type) == VECTOR_TYPE)
5953     return (size < 0 || size > (4 * UNITS_PER_WORD));
5954
5955   if (!AGGREGATE_TYPE_P (type) &&
5956       (TREE_CODE (type) != VECTOR_TYPE))
5957     /* All simple types are returned in registers.  */
5958     return false;
5959
5960   if (arm_abi != ARM_ABI_APCS)
5961     {
5962       /* ATPCS and later return aggregate types in memory only if they are
5963          larger than a word (or are variable size).  */
5964       return (size < 0 || size > UNITS_PER_WORD);
5965     }
5966
5967   /* For the arm-wince targets we choose to be compatible with Microsoft's
5968      ARM and Thumb compilers, which always return aggregates in memory.  */
5969 #ifndef ARM_WINCE
5970   /* All structures/unions bigger than one word are returned in memory.
5971      Also catch the case where int_size_in_bytes returns -1.  In this case
5972      the aggregate is either huge or of variable size, and in either case
5973      we will want to return it via memory and not in a register.  */
5974   if (size < 0 || size > UNITS_PER_WORD)
5975     return true;
5976
5977   if (TREE_CODE (type) == RECORD_TYPE)
5978     {
5979       tree field;
5980
5981       /* For a struct the APCS says that we only return in a register
5982          if the type is 'integer like' and every addressable element
5983          has an offset of zero.  For practical purposes this means
5984          that the structure can have at most one non bit-field element
5985          and that this element must be the first one in the structure.  */
5986
5987       /* Find the first field, ignoring non FIELD_DECL things which will
5988          have been created by C++.  */
5989       /* NOTE: This code is deprecated and has not been updated to handle
5990          DECL_FIELD_ABI_IGNORED.  */
5991       for (field = TYPE_FIELDS (type);
5992            field && TREE_CODE (field) != FIELD_DECL;
5993            field = DECL_CHAIN (field))
5994         continue;
5995
5996       if (field == NULL)
5997         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5998
5999       /* Check that the first field is valid for returning in a register.  */
6000
6001       /* ... Floats are not allowed */
6002       if (FLOAT_TYPE_P (TREE_TYPE (field)))
6003         return true;
6004
6005       /* ... Aggregates that are not themselves valid for returning in
6006          a register are not allowed.  */
6007       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6008         return true;
6009
6010       /* Now check the remaining fields, if any.  Only bitfields are allowed,
6011          since they are not addressable.  */
6012       for (field = DECL_CHAIN (field);
6013            field;
6014            field = DECL_CHAIN (field))
6015         {
6016           if (TREE_CODE (field) != FIELD_DECL)
6017             continue;
6018
6019           if (!DECL_BIT_FIELD_TYPE (field))
6020             return true;
6021         }
6022
6023       return false;
6024     }
6025
6026   if (TREE_CODE (type) == UNION_TYPE)
6027     {
6028       tree field;
6029
6030       /* Unions can be returned in registers if every element is
6031          integral, or can be returned in an integer register.  */
6032       for (field = TYPE_FIELDS (type);
6033            field;
6034            field = DECL_CHAIN (field))
6035         {
6036           if (TREE_CODE (field) != FIELD_DECL)
6037             continue;
6038
6039           if (FLOAT_TYPE_P (TREE_TYPE (field)))
6040             return true;
6041
6042           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6043             return true;
6044         }
6045
6046       return false;
6047     }
6048 #endif /* not ARM_WINCE */
6049
6050   /* Return all other types in memory.  */
6051   return true;
6052 }
6053
6054 const struct pcs_attribute_arg
6055 {
6056   const char *arg;
6057   enum arm_pcs value;
6058 } pcs_attribute_args[] =
6059   {
6060     {"aapcs", ARM_PCS_AAPCS},
6061     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6062 #if 0
6063     /* We could recognize these, but changes would be needed elsewhere
6064      * to implement them.  */
6065     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6066     {"atpcs", ARM_PCS_ATPCS},
6067     {"apcs", ARM_PCS_APCS},
6068 #endif
6069     {NULL, ARM_PCS_UNKNOWN}
6070   };
6071
6072 static enum arm_pcs
6073 arm_pcs_from_attribute (tree attr)
6074 {
6075   const struct pcs_attribute_arg *ptr;
6076   const char *arg;
6077
6078   /* Get the value of the argument.  */
6079   if (TREE_VALUE (attr) == NULL_TREE
6080       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6081     return ARM_PCS_UNKNOWN;
6082
6083   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6084
6085   /* Check it against the list of known arguments.  */
6086   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6087     if (streq (arg, ptr->arg))
6088       return ptr->value;
6089
6090   /* An unrecognized interrupt type.  */
6091   return ARM_PCS_UNKNOWN;
6092 }
6093
6094 /* Get the PCS variant to use for this call.  TYPE is the function's type
6095    specification, DECL is the specific declartion.  DECL may be null if
6096    the call could be indirect or if this is a library call.  */
6097 static enum arm_pcs
6098 arm_get_pcs_model (const_tree type, const_tree decl)
6099 {
6100   bool user_convention = false;
6101   enum arm_pcs user_pcs = arm_pcs_default;
6102   tree attr;
6103
6104   gcc_assert (type);
6105
6106   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6107   if (attr)
6108     {
6109       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6110       user_convention = true;
6111     }
6112
6113   if (TARGET_AAPCS_BASED)
6114     {
6115       /* Detect varargs functions.  These always use the base rules
6116          (no argument is ever a candidate for a co-processor
6117          register).  */
6118       bool base_rules = stdarg_p (type);
6119
6120       if (user_convention)
6121         {
6122           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6123             sorry ("non-AAPCS derived PCS variant");
6124           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6125             error ("variadic functions must use the base AAPCS variant");
6126         }
6127
6128       if (base_rules)
6129         return ARM_PCS_AAPCS;
6130       else if (user_convention)
6131         return user_pcs;
6132       else if (decl && flag_unit_at_a_time)
6133         {
6134           /* Local functions never leak outside this compilation unit,
6135              so we are free to use whatever conventions are
6136              appropriate.  */
6137           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
6138           cgraph_node *local_info_node
6139             = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6140           if (local_info_node && local_info_node->local)
6141             return ARM_PCS_AAPCS_LOCAL;
6142         }
6143     }
6144   else if (user_convention && user_pcs != arm_pcs_default)
6145     sorry ("PCS variant");
6146
6147   /* For everything else we use the target's default.  */
6148   return arm_pcs_default;
6149 }
6150
6151
6152 static void
6153 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6154                     const_tree fntype ATTRIBUTE_UNUSED,
6155                     rtx libcall ATTRIBUTE_UNUSED,
6156                     const_tree fndecl ATTRIBUTE_UNUSED)
6157 {
6158   /* Record the unallocated VFP registers.  */
6159   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6160   pcum->aapcs_vfp_reg_alloc = 0;
6161 }
6162
6163 /* Bitmasks that indicate whether earlier versions of GCC would have
6164    taken a different path through the ABI logic.  This should result in
6165    a -Wpsabi warning if the earlier path led to a different ABI decision.
6166
6167    WARN_PSABI_EMPTY_CXX17_BASE
6168       Indicates that the type includes an artificial empty C++17 base field
6169       that, prior to GCC 10.1, would prevent the type from being treated as
6170       a HFA or HVA.  See PR94711 for details.
6171
6172    WARN_PSABI_NO_UNIQUE_ADDRESS
6173       Indicates that the type includes an empty [[no_unique_address]] field
6174       that, prior to GCC 10.1, would prevent the type from being treated as
6175       a HFA or HVA.  */
6176 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6177 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6178
6179 /* Walk down the type tree of TYPE counting consecutive base elements.
6180    If *MODEP is VOIDmode, then set it to the first valid floating point
6181    type.  If a non-floating point type is found, or if a floating point
6182    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6183    otherwise return the count in the sub-tree.
6184
6185    The WARN_PSABI_FLAGS argument allows the caller to check whether this
6186    function has changed its behavior relative to earlier versions of GCC.
6187    Normally the argument should be nonnull and point to a zero-initialized
6188    variable.  The function then records whether the ABI decision might
6189    be affected by a known fix to the ABI logic, setting the associated
6190    WARN_PSABI_* bits if so.
6191
6192    When the argument is instead a null pointer, the function tries to
6193    simulate the behavior of GCC before all such ABI fixes were made.
6194    This is useful to check whether the function returns something
6195    different after the ABI fixes.  */
6196 static int
6197 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6198                          unsigned int *warn_psabi_flags)
6199 {
6200   machine_mode mode;
6201   HOST_WIDE_INT size;
6202
6203   switch (TREE_CODE (type))
6204     {
6205     case REAL_TYPE:
6206       mode = TYPE_MODE (type);
6207       if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6208         return -1;
6209
6210       if (*modep == VOIDmode)
6211         *modep = mode;
6212
6213       if (*modep == mode)
6214         return 1;
6215
6216       break;
6217
6218     case COMPLEX_TYPE:
6219       mode = TYPE_MODE (TREE_TYPE (type));
6220       if (mode != DFmode && mode != SFmode)
6221         return -1;
6222
6223       if (*modep == VOIDmode)
6224         *modep = mode;
6225
6226       if (*modep == mode)
6227         return 2;
6228
6229       break;
6230
6231     case VECTOR_TYPE:
6232       /* Use V2SImode and V4SImode as representatives of all 64-bit
6233          and 128-bit vector types, whether or not those modes are
6234          supported with the present options.  */
6235       size = int_size_in_bytes (type);
6236       switch (size)
6237         {
6238         case 8:
6239           mode = V2SImode;
6240           break;
6241         case 16:
6242           mode = V4SImode;
6243           break;
6244         default:
6245           return -1;
6246         }
6247
6248       if (*modep == VOIDmode)
6249         *modep = mode;
6250
6251       /* Vector modes are considered to be opaque: two vectors are
6252          equivalent for the purposes of being homogeneous aggregates
6253          if they are the same size.  */
6254       if (*modep == mode)
6255         return 1;
6256
6257       break;
6258
6259     case ARRAY_TYPE:
6260       {
6261         int count;
6262         tree index = TYPE_DOMAIN (type);
6263
6264         /* Can't handle incomplete types nor sizes that are not
6265            fixed.  */
6266         if (!COMPLETE_TYPE_P (type)
6267             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6268           return -1;
6269
6270         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6271                                          warn_psabi_flags);
6272         if (count == -1
6273             || !index
6274             || !TYPE_MAX_VALUE (index)
6275             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6276             || !TYPE_MIN_VALUE (index)
6277             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6278             || count < 0)
6279           return -1;
6280
6281         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6282                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6283
6284         /* There must be no padding.  */
6285         if (wi::to_wide (TYPE_SIZE (type))
6286             != count * GET_MODE_BITSIZE (*modep))
6287           return -1;
6288
6289         return count;
6290       }
6291
6292     case RECORD_TYPE:
6293       {
6294         int count = 0;
6295         int sub_count;
6296         tree field;
6297
6298         /* Can't handle incomplete types nor sizes that are not
6299            fixed.  */
6300         if (!COMPLETE_TYPE_P (type)
6301             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6302           return -1;
6303
6304         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6305           {
6306             if (TREE_CODE (field) != FIELD_DECL)
6307               continue;
6308
6309             if (DECL_FIELD_ABI_IGNORED (field))
6310               {
6311                 /* See whether this is something that earlier versions of
6312                    GCC failed to ignore.  */
6313                 unsigned int flag;
6314                 if (lookup_attribute ("no_unique_address",
6315                                       DECL_ATTRIBUTES (field)))
6316                   flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6317                 else if (cxx17_empty_base_field_p (field))
6318                   flag = WARN_PSABI_EMPTY_CXX17_BASE;
6319                 else
6320                   /* No compatibility problem.  */
6321                   continue;
6322
6323                 /* Simulate the old behavior when WARN_PSABI_FLAGS is null.  */
6324                 if (warn_psabi_flags)
6325                   {
6326                     *warn_psabi_flags |= flag;
6327                     continue;
6328                   }
6329               }
6330
6331             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6332                                                  warn_psabi_flags);
6333             if (sub_count < 0)
6334               return -1;
6335             count += sub_count;
6336           }
6337
6338         /* There must be no padding.  */
6339         if (wi::to_wide (TYPE_SIZE (type))
6340             != count * GET_MODE_BITSIZE (*modep))
6341           return -1;
6342
6343         return count;
6344       }
6345
6346     case UNION_TYPE:
6347     case QUAL_UNION_TYPE:
6348       {
6349         /* These aren't very interesting except in a degenerate case.  */
6350         int count = 0;
6351         int sub_count;
6352         tree field;
6353
6354         /* Can't handle incomplete types nor sizes that are not
6355            fixed.  */
6356         if (!COMPLETE_TYPE_P (type)
6357             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6358           return -1;
6359
6360         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6361           {
6362             if (TREE_CODE (field) != FIELD_DECL)
6363               continue;
6364
6365             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6366                                                  warn_psabi_flags);
6367             if (sub_count < 0)
6368               return -1;
6369             count = count > sub_count ? count : sub_count;
6370           }
6371
6372         /* There must be no padding.  */
6373         if (wi::to_wide (TYPE_SIZE (type))
6374             != count * GET_MODE_BITSIZE (*modep))
6375           return -1;
6376
6377         return count;
6378       }
6379
6380     default:
6381       break;
6382     }
6383
6384   return -1;
6385 }
6386
6387 /* Return true if PCS_VARIANT should use VFP registers.  */
6388 static bool
6389 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6390 {
6391   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6392     {
6393       static bool seen_thumb1_vfp = false;
6394
6395       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6396         {
6397           sorry ("Thumb-1 hard-float VFP ABI");
6398           /* sorry() is not immediately fatal, so only display this once.  */
6399           seen_thumb1_vfp = true;
6400         }
6401
6402       return true;
6403     }
6404
6405   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6406     return false;
6407
6408   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6409          (TARGET_VFP_DOUBLE || !is_double));
6410 }
6411
6412 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6413    suitable for passing or returning in VFP registers for the PCS
6414    variant selected.  If it is, then *BASE_MODE is updated to contain
6415    a machine mode describing each element of the argument's type and
6416    *COUNT to hold the number of such elements.  */
6417 static bool
6418 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6419                                        machine_mode mode, const_tree type,
6420                                        machine_mode *base_mode, int *count)
6421 {
6422   machine_mode new_mode = VOIDmode;
6423
6424   /* If we have the type information, prefer that to working things
6425      out from the mode.  */
6426   if (type)
6427     {
6428       unsigned int warn_psabi_flags = 0;
6429       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6430                                               &warn_psabi_flags);
6431       if (ag_count > 0 && ag_count <= 4)
6432         {
6433           static unsigned last_reported_type_uid;
6434           unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6435           int alt;
6436           if (warn_psabi
6437               && warn_psabi_flags
6438               && uid != last_reported_type_uid
6439               && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6440                   != ag_count))
6441             {
6442               const char *url
6443                 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6444               gcc_assert (alt == -1);
6445               last_reported_type_uid = uid;
6446               /* Use TYPE_MAIN_VARIANT to strip any redundant const
6447                  qualification.  */
6448               if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6449                 inform (input_location, "parameter passing for argument of "
6450                         "type %qT with %<[[no_unique_address]]%> members "
6451                         "changed %{in GCC 10.1%}",
6452                         TYPE_MAIN_VARIANT (type), url);
6453               else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6454                 inform (input_location, "parameter passing for argument of "
6455                         "type %qT when C++17 is enabled changed to match "
6456                         "C++14 %{in GCC 10.1%}",
6457                         TYPE_MAIN_VARIANT (type), url);
6458             }
6459           *count = ag_count;
6460         }
6461       else
6462         return false;
6463     }
6464   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6465            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6466            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6467     {
6468       *count = 1;
6469       new_mode = mode;
6470     }
6471   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6472     {
6473       *count = 2;
6474       new_mode = (mode == DCmode ? DFmode : SFmode);
6475     }
6476   else
6477     return false;
6478
6479
6480   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6481     return false;
6482
6483   *base_mode = new_mode;
6484
6485   if (TARGET_GENERAL_REGS_ONLY)
6486     error ("argument of type %qT not permitted with -mgeneral-regs-only",
6487            type);
6488
6489   return true;
6490 }
6491
6492 static bool
6493 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6494                                machine_mode mode, const_tree type)
6495 {
6496   int count ATTRIBUTE_UNUSED;
6497   machine_mode ag_mode ATTRIBUTE_UNUSED;
6498
6499   if (!use_vfp_abi (pcs_variant, false))
6500     return false;
6501   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6502                                                 &ag_mode, &count);
6503 }
6504
6505 static bool
6506 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6507                              const_tree type)
6508 {
6509   if (!use_vfp_abi (pcum->pcs_variant, false))
6510     return false;
6511
6512   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6513                                                 &pcum->aapcs_vfp_rmode,
6514                                                 &pcum->aapcs_vfp_rcount);
6515 }
6516
6517 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6518    for the behaviour of this function.  */
6519
6520 static bool
6521 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6522                     const_tree type  ATTRIBUTE_UNUSED)
6523 {
6524   int rmode_size
6525     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6526   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6527   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6528   int regno;
6529
6530   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6531     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6532       {
6533         pcum->aapcs_vfp_reg_alloc = mask << regno;
6534         if (mode == BLKmode
6535             || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6536             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6537           {
6538             int i;
6539             int rcount = pcum->aapcs_vfp_rcount;
6540             int rshift = shift;
6541             machine_mode rmode = pcum->aapcs_vfp_rmode;
6542             rtx par;
6543             if (!(TARGET_NEON || TARGET_HAVE_MVE))
6544               {
6545                 /* Avoid using unsupported vector modes.  */
6546                 if (rmode == V2SImode)
6547                   rmode = DImode;
6548                 else if (rmode == V4SImode)
6549                   {
6550                     rmode = DImode;
6551                     rcount *= 2;
6552                     rshift /= 2;
6553                   }
6554               }
6555             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6556             for (i = 0; i < rcount; i++)
6557               {
6558                 rtx tmp = gen_rtx_REG (rmode,
6559                                        FIRST_VFP_REGNUM + regno + i * rshift);
6560                 tmp = gen_rtx_EXPR_LIST
6561                   (VOIDmode, tmp,
6562                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6563                 XVECEXP (par, 0, i) = tmp;
6564               }
6565
6566             pcum->aapcs_reg = par;
6567           }
6568         else
6569           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6570         return true;
6571       }
6572   return false;
6573 }
6574
6575 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6576    comment there for the behaviour of this function.  */
6577
6578 static rtx
6579 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6580                                machine_mode mode,
6581                                const_tree type ATTRIBUTE_UNUSED)
6582 {
6583   if (!use_vfp_abi (pcs_variant, false))
6584     return NULL;
6585
6586   if (mode == BLKmode
6587       || (GET_MODE_CLASS (mode) == MODE_INT
6588           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6589           && !(TARGET_NEON || TARGET_HAVE_MVE)))
6590     {
6591       int count;
6592       machine_mode ag_mode;
6593       int i;
6594       rtx par;
6595       int shift;
6596
6597       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6598                                              &ag_mode, &count);
6599
6600       if (!(TARGET_NEON || TARGET_HAVE_MVE))
6601         {
6602           if (ag_mode == V2SImode)
6603             ag_mode = DImode;
6604           else if (ag_mode == V4SImode)
6605             {
6606               ag_mode = DImode;
6607               count *= 2;
6608             }
6609         }
6610       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6611       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6612       for (i = 0; i < count; i++)
6613         {
6614           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6615           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6616                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6617           XVECEXP (par, 0, i) = tmp;
6618         }
6619
6620       return par;
6621     }
6622
6623   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6624 }
6625
6626 static void
6627 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6628                    machine_mode mode  ATTRIBUTE_UNUSED,
6629                    const_tree type  ATTRIBUTE_UNUSED)
6630 {
6631   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6632   pcum->aapcs_vfp_reg_alloc = 0;
6633   return;
6634 }
6635
6636 #define AAPCS_CP(X)                             \
6637   {                                             \
6638     aapcs_ ## X ## _cum_init,                   \
6639     aapcs_ ## X ## _is_call_candidate,          \
6640     aapcs_ ## X ## _allocate,                   \
6641     aapcs_ ## X ## _is_return_candidate,        \
6642     aapcs_ ## X ## _allocate_return_reg,        \
6643     aapcs_ ## X ## _advance                     \
6644   }
6645
6646 /* Table of co-processors that can be used to pass arguments in
6647    registers.  Idealy no arugment should be a candidate for more than
6648    one co-processor table entry, but the table is processed in order
6649    and stops after the first match.  If that entry then fails to put
6650    the argument into a co-processor register, the argument will go on
6651    the stack.  */
6652 static struct
6653 {
6654   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6655   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6656
6657   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6658      BLKmode) is a candidate for this co-processor's registers; this
6659      function should ignore any position-dependent state in
6660      CUMULATIVE_ARGS and only use call-type dependent information.  */
6661   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6662
6663   /* Return true if the argument does get a co-processor register; it
6664      should set aapcs_reg to an RTX of the register allocated as is
6665      required for a return from FUNCTION_ARG.  */
6666   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6667
6668   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6669      be returned in this co-processor's registers.  */
6670   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6671
6672   /* Allocate and return an RTX element to hold the return type of a call.  This
6673      routine must not fail and will only be called if is_return_candidate
6674      returned true with the same parameters.  */
6675   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6676
6677   /* Finish processing this argument and prepare to start processing
6678      the next one.  */
6679   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6680 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6681   {
6682     AAPCS_CP(vfp)
6683   };
6684
6685 #undef AAPCS_CP
6686
6687 static int
6688 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6689                           const_tree type)
6690 {
6691   int i;
6692
6693   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6694     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6695       return i;
6696
6697   return -1;
6698 }
6699
6700 static int
6701 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6702 {
6703   /* We aren't passed a decl, so we can't check that a call is local.
6704      However, it isn't clear that that would be a win anyway, since it
6705      might limit some tail-calling opportunities.  */
6706   enum arm_pcs pcs_variant;
6707
6708   if (fntype)
6709     {
6710       const_tree fndecl = NULL_TREE;
6711
6712       if (TREE_CODE (fntype) == FUNCTION_DECL)
6713         {
6714           fndecl = fntype;
6715           fntype = TREE_TYPE (fntype);
6716         }
6717
6718       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6719     }
6720   else
6721     pcs_variant = arm_pcs_default;
6722
6723   if (pcs_variant != ARM_PCS_AAPCS)
6724     {
6725       int i;
6726
6727       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6728         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6729                                                         TYPE_MODE (type),
6730                                                         type))
6731           return i;
6732     }
6733   return -1;
6734 }
6735
6736 static rtx
6737 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6738                            const_tree fntype)
6739 {
6740   /* We aren't passed a decl, so we can't check that a call is local.
6741      However, it isn't clear that that would be a win anyway, since it
6742      might limit some tail-calling opportunities.  */
6743   enum arm_pcs pcs_variant;
6744   int unsignedp ATTRIBUTE_UNUSED;
6745
6746   if (fntype)
6747     {
6748       const_tree fndecl = NULL_TREE;
6749
6750       if (TREE_CODE (fntype) == FUNCTION_DECL)
6751         {
6752           fndecl = fntype;
6753           fntype = TREE_TYPE (fntype);
6754         }
6755
6756       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6757     }
6758   else
6759     pcs_variant = arm_pcs_default;
6760
6761   /* Promote integer types.  */
6762   if (type && INTEGRAL_TYPE_P (type))
6763     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6764
6765   if (pcs_variant != ARM_PCS_AAPCS)
6766     {
6767       int i;
6768
6769       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6770         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6771                                                         type))
6772           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6773                                                              mode, type);
6774     }
6775
6776   /* Promotes small structs returned in a register to full-word size
6777      for big-endian AAPCS.  */
6778   if (type && arm_return_in_msb (type))
6779     {
6780       HOST_WIDE_INT size = int_size_in_bytes (type);
6781       if (size % UNITS_PER_WORD != 0)
6782         {
6783           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6784           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6785         }
6786     }
6787
6788   return gen_rtx_REG (mode, R0_REGNUM);
6789 }
6790
6791 static rtx
6792 aapcs_libcall_value (machine_mode mode)
6793 {
6794   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6795       && GET_MODE_SIZE (mode) <= 4)
6796     mode = SImode;
6797
6798   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6799 }
6800
6801 /* Lay out a function argument using the AAPCS rules.  The rule
6802    numbers referred to here are those in the AAPCS.  */
6803 static void
6804 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6805                   const_tree type, bool named)
6806 {
6807   int nregs, nregs2;
6808   int ncrn;
6809
6810   /* We only need to do this once per argument.  */
6811   if (pcum->aapcs_arg_processed)
6812     return;
6813
6814   pcum->aapcs_arg_processed = true;
6815
6816   /* Special case: if named is false then we are handling an incoming
6817      anonymous argument which is on the stack.  */
6818   if (!named)
6819     return;
6820
6821   /* Is this a potential co-processor register candidate?  */
6822   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6823     {
6824       int slot = aapcs_select_call_coproc (pcum, mode, type);
6825       pcum->aapcs_cprc_slot = slot;
6826
6827       /* We don't have to apply any of the rules from part B of the
6828          preparation phase, these are handled elsewhere in the
6829          compiler.  */
6830
6831       if (slot >= 0)
6832         {
6833           /* A Co-processor register candidate goes either in its own
6834              class of registers or on the stack.  */
6835           if (!pcum->aapcs_cprc_failed[slot])
6836             {
6837               /* C1.cp - Try to allocate the argument to co-processor
6838                  registers.  */
6839               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6840                 return;
6841
6842               /* C2.cp - Put the argument on the stack and note that we
6843                  can't assign any more candidates in this slot.  We also
6844                  need to note that we have allocated stack space, so that
6845                  we won't later try to split a non-cprc candidate between
6846                  core registers and the stack.  */
6847               pcum->aapcs_cprc_failed[slot] = true;
6848               pcum->can_split = false;
6849             }
6850
6851           /* We didn't get a register, so this argument goes on the
6852              stack.  */
6853           gcc_assert (pcum->can_split == false);
6854           return;
6855         }
6856     }
6857
6858   /* C3 - For double-word aligned arguments, round the NCRN up to the
6859      next even number.  */
6860   ncrn = pcum->aapcs_ncrn;
6861   if (ncrn & 1)
6862     {
6863       int res = arm_needs_doubleword_align (mode, type);
6864       /* Only warn during RTL expansion of call stmts, otherwise we would
6865          warn e.g. during gimplification even on functions that will be
6866          always inlined, and we'd warn multiple times.  Don't warn when
6867          called in expand_function_start either, as we warn instead in
6868          arm_function_arg_boundary in that case.  */
6869       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6870         inform (input_location, "parameter passing for argument of type "
6871                 "%qT changed in GCC 7.1", type);
6872       else if (res > 0)
6873         ncrn++;
6874     }
6875
6876   nregs = ARM_NUM_REGS2(mode, type);
6877
6878   /* Sigh, this test should really assert that nregs > 0, but a GCC
6879      extension allows empty structs and then gives them empty size; it
6880      then allows such a structure to be passed by value.  For some of
6881      the code below we have to pretend that such an argument has
6882      non-zero size so that we 'locate' it correctly either in
6883      registers or on the stack.  */
6884   gcc_assert (nregs >= 0);
6885
6886   nregs2 = nregs ? nregs : 1;
6887
6888   /* C4 - Argument fits entirely in core registers.  */
6889   if (ncrn + nregs2 <= NUM_ARG_REGS)
6890     {
6891       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6892       pcum->aapcs_next_ncrn = ncrn + nregs;
6893       return;
6894     }
6895
6896   /* C5 - Some core registers left and there are no arguments already
6897      on the stack: split this argument between the remaining core
6898      registers and the stack.  */
6899   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6900     {
6901       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6902       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6903       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6904       return;
6905     }
6906
6907   /* C6 - NCRN is set to 4.  */
6908   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6909
6910   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6911   return;
6912 }
6913
6914 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6915    for a call to a function whose data type is FNTYPE.
6916    For a library call, FNTYPE is NULL.  */
6917 void
6918 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6919                           rtx libname,
6920                           tree fndecl ATTRIBUTE_UNUSED)
6921 {
6922   /* Long call handling.  */
6923   if (fntype)
6924     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6925   else
6926     pcum->pcs_variant = arm_pcs_default;
6927
6928   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6929     {
6930       if (arm_libcall_uses_aapcs_base (libname))
6931         pcum->pcs_variant = ARM_PCS_AAPCS;
6932
6933       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6934       pcum->aapcs_reg = NULL_RTX;
6935       pcum->aapcs_partial = 0;
6936       pcum->aapcs_arg_processed = false;
6937       pcum->aapcs_cprc_slot = -1;
6938       pcum->can_split = true;
6939
6940       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6941         {
6942           int i;
6943
6944           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6945             {
6946               pcum->aapcs_cprc_failed[i] = false;
6947               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6948             }
6949         }
6950       return;
6951     }
6952
6953   /* Legacy ABIs */
6954
6955   /* On the ARM, the offset starts at 0.  */
6956   pcum->nregs = 0;
6957   pcum->iwmmxt_nregs = 0;
6958   pcum->can_split = true;
6959
6960   /* Varargs vectors are treated the same as long long.
6961      named_count avoids having to change the way arm handles 'named' */
6962   pcum->named_count = 0;
6963   pcum->nargs = 0;
6964
6965   if (TARGET_REALLY_IWMMXT && fntype)
6966     {
6967       tree fn_arg;
6968
6969       for (fn_arg = TYPE_ARG_TYPES (fntype);
6970            fn_arg;
6971            fn_arg = TREE_CHAIN (fn_arg))
6972         pcum->named_count += 1;
6973
6974       if (! pcum->named_count)
6975         pcum->named_count = INT_MAX;
6976     }
6977 }
6978
6979 /* Return 2 if double word alignment is required for argument passing,
6980    but wasn't required before the fix for PR88469.
6981    Return 1 if double word alignment is required for argument passing.
6982    Return -1 if double word alignment used to be required for argument
6983    passing before PR77728 ABI fix, but is not required anymore.
6984    Return 0 if double word alignment is not required and wasn't requried
6985    before either.  */
6986 static int
6987 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6988 {
6989   if (!type)
6990     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6991
6992   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6993   if (!AGGREGATE_TYPE_P (type))
6994     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6995
6996   /* Array types: Use member alignment of element type.  */
6997   if (TREE_CODE (type) == ARRAY_TYPE)
6998     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6999
7000   int ret = 0;
7001   int ret2 = 0;
7002   /* Record/aggregate types: Use greatest member alignment of any member.
7003
7004      Note that we explicitly consider zero-sized fields here, even though
7005      they don't map to AAPCS machine types.  For example, in:
7006
7007          struct __attribute__((aligned(8))) empty {};
7008
7009          struct s {
7010            [[no_unique_address]] empty e;
7011            int x;
7012          };
7013
7014      "s" contains only one Fundamental Data Type (the int field)
7015      but gains 8-byte alignment and size thanks to "e".  */
7016   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7017     if (DECL_ALIGN (field) > PARM_BOUNDARY)
7018       {
7019         if (TREE_CODE (field) == FIELD_DECL)
7020           return 1;
7021         else
7022           /* Before PR77728 fix, we were incorrectly considering also
7023              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7024              Make sure we can warn about that with -Wpsabi.  */
7025           ret = -1;
7026       }
7027     else if (TREE_CODE (field) == FIELD_DECL
7028              && DECL_BIT_FIELD_TYPE (field)
7029              && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7030       ret2 = 1;
7031
7032   if (ret2)
7033     return 2;
7034
7035   return ret;
7036 }
7037
7038
7039 /* Determine where to put an argument to a function.
7040    Value is zero to push the argument on the stack,
7041    or a hard register in which to store the argument.
7042
7043    CUM is a variable of type CUMULATIVE_ARGS which gives info about
7044     the preceding args and about the function being called.
7045    ARG is a description of the argument.
7046
7047    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7048    other arguments are passed on the stack.  If (NAMED == 0) (which happens
7049    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7050    defined), say it is passed in the stack (function_prologue will
7051    indeed make it pass in the stack if necessary).  */
7052
7053 static rtx
7054 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7055 {
7056   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7057   int nregs;
7058
7059   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
7060      a call insn (op3 of a call_value insn).  */
7061   if (arg.end_marker_p ())
7062     return const0_rtx;
7063
7064   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7065     {
7066       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7067       return pcum->aapcs_reg;
7068     }
7069
7070   /* Varargs vectors are treated the same as long long.
7071      named_count avoids having to change the way arm handles 'named' */
7072   if (TARGET_IWMMXT_ABI
7073       && arm_vector_mode_supported_p (arg.mode)
7074       && pcum->named_count > pcum->nargs + 1)
7075     {
7076       if (pcum->iwmmxt_nregs <= 9)
7077         return gen_rtx_REG (arg.mode,
7078                             pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7079       else
7080         {
7081           pcum->can_split = false;
7082           return NULL_RTX;
7083         }
7084     }
7085
7086   /* Put doubleword aligned quantities in even register pairs.  */
7087   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7088     {
7089       int res = arm_needs_doubleword_align (arg.mode, arg.type);
7090       if (res < 0 && warn_psabi)
7091         inform (input_location, "parameter passing for argument of type "
7092                 "%qT changed in GCC 7.1", arg.type);
7093       else if (res > 0)
7094         {
7095           pcum->nregs++;
7096           if (res > 1 && warn_psabi)
7097             inform (input_location, "parameter passing for argument of type "
7098                     "%qT changed in GCC 9.1", arg.type);
7099         }
7100     }
7101
7102   /* Only allow splitting an arg between regs and memory if all preceding
7103      args were allocated to regs.  For args passed by reference we only count
7104      the reference pointer.  */
7105   if (pcum->can_split)
7106     nregs = 1;
7107   else
7108     nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7109
7110   if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7111     return NULL_RTX;
7112
7113   return gen_rtx_REG (arg.mode, pcum->nregs);
7114 }
7115
7116 static unsigned int
7117 arm_function_arg_boundary (machine_mode mode, const_tree type)
7118 {
7119   if (!ARM_DOUBLEWORD_ALIGN)
7120     return PARM_BOUNDARY;
7121
7122   int res = arm_needs_doubleword_align (mode, type);
7123   if (res < 0 && warn_psabi)
7124     inform (input_location, "parameter passing for argument of type %qT "
7125             "changed in GCC 7.1", type);
7126   if (res > 1 && warn_psabi)
7127     inform (input_location, "parameter passing for argument of type "
7128             "%qT changed in GCC 9.1", type);
7129
7130   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7131 }
7132
7133 static int
7134 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7135 {
7136   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7137   int nregs = pcum->nregs;
7138
7139   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7140     {
7141       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7142       return pcum->aapcs_partial;
7143     }
7144
7145   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7146     return 0;
7147
7148   if (NUM_ARG_REGS > nregs
7149       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7150       && pcum->can_split)
7151     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7152
7153   return 0;
7154 }
7155
7156 /* Update the data in PCUM to advance over argument ARG.  */
7157
7158 static void
7159 arm_function_arg_advance (cumulative_args_t pcum_v,
7160                           const function_arg_info &arg)
7161 {
7162   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7163
7164   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7165     {
7166       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7167
7168       if (pcum->aapcs_cprc_slot >= 0)
7169         {
7170           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7171                                                               arg.type);
7172           pcum->aapcs_cprc_slot = -1;
7173         }
7174
7175       /* Generic stuff.  */
7176       pcum->aapcs_arg_processed = false;
7177       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7178       pcum->aapcs_reg = NULL_RTX;
7179       pcum->aapcs_partial = 0;
7180     }
7181   else
7182     {
7183       pcum->nargs += 1;
7184       if (arm_vector_mode_supported_p (arg.mode)
7185           && pcum->named_count > pcum->nargs
7186           && TARGET_IWMMXT_ABI)
7187         pcum->iwmmxt_nregs += 1;
7188       else
7189         pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7190     }
7191 }
7192
7193 /* Variable sized types are passed by reference.  This is a GCC
7194    extension to the ARM ABI.  */
7195
7196 static bool
7197 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7198 {
7199   return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7200 }
7201 \f
7202 /* Encode the current state of the #pragma [no_]long_calls.  */
7203 typedef enum
7204 {
7205   OFF,          /* No #pragma [no_]long_calls is in effect.  */
7206   LONG,         /* #pragma long_calls is in effect.  */
7207   SHORT         /* #pragma no_long_calls is in effect.  */
7208 } arm_pragma_enum;
7209
7210 static arm_pragma_enum arm_pragma_long_calls = OFF;
7211
7212 void
7213 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7214 {
7215   arm_pragma_long_calls = LONG;
7216 }
7217
7218 void
7219 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7220 {
7221   arm_pragma_long_calls = SHORT;
7222 }
7223
7224 void
7225 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7226 {
7227   arm_pragma_long_calls = OFF;
7228 }
7229 \f
7230 /* Handle an attribute requiring a FUNCTION_DECL;
7231    arguments as in struct attribute_spec.handler.  */
7232 static tree
7233 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7234                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7235 {
7236   if (TREE_CODE (*node) != FUNCTION_DECL)
7237     {
7238       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7239                name);
7240       *no_add_attrs = true;
7241     }
7242
7243   return NULL_TREE;
7244 }
7245
7246 /* Handle an "interrupt" or "isr" attribute;
7247    arguments as in struct attribute_spec.handler.  */
7248 static tree
7249 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7250                           bool *no_add_attrs)
7251 {
7252   if (DECL_P (*node))
7253     {
7254       if (TREE_CODE (*node) != FUNCTION_DECL)
7255         {
7256           warning (OPT_Wattributes, "%qE attribute only applies to functions",
7257                    name);
7258           *no_add_attrs = true;
7259         }
7260       else if (TARGET_VFP_BASE)
7261         {
7262           warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7263                    name);
7264         }
7265       /* FIXME: the argument if any is checked for type attributes;
7266          should it be checked for decl ones?  */
7267     }
7268   else
7269     {
7270       if (TREE_CODE (*node) == FUNCTION_TYPE
7271           || TREE_CODE (*node) == METHOD_TYPE)
7272         {
7273           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7274             {
7275               warning (OPT_Wattributes, "%qE attribute ignored",
7276                        name);
7277               *no_add_attrs = true;
7278             }
7279         }
7280       else if (TREE_CODE (*node) == POINTER_TYPE
7281                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
7282                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
7283                && arm_isr_value (args) != ARM_FT_UNKNOWN)
7284         {
7285           *node = build_variant_type_copy (*node);
7286           TREE_TYPE (*node) = build_type_attribute_variant
7287             (TREE_TYPE (*node),
7288              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7289           *no_add_attrs = true;
7290         }
7291       else
7292         {
7293           /* Possibly pass this attribute on from the type to a decl.  */
7294           if (flags & ((int) ATTR_FLAG_DECL_NEXT
7295                        | (int) ATTR_FLAG_FUNCTION_NEXT
7296                        | (int) ATTR_FLAG_ARRAY_NEXT))
7297             {
7298               *no_add_attrs = true;
7299               return tree_cons (name, args, NULL_TREE);
7300             }
7301           else
7302             {
7303               warning (OPT_Wattributes, "%qE attribute ignored",
7304                        name);
7305             }
7306         }
7307     }
7308
7309   return NULL_TREE;
7310 }
7311
7312 /* Handle a "pcs" attribute; arguments as in struct
7313    attribute_spec.handler.  */
7314 static tree
7315 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7316                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7317 {
7318   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7319     {
7320       warning (OPT_Wattributes, "%qE attribute ignored", name);
7321       *no_add_attrs = true;
7322     }
7323   return NULL_TREE;
7324 }
7325
7326 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7327 /* Handle the "notshared" attribute.  This attribute is another way of
7328    requesting hidden visibility.  ARM's compiler supports
7329    "__declspec(notshared)"; we support the same thing via an
7330    attribute.  */
7331
7332 static tree
7333 arm_handle_notshared_attribute (tree *node,
7334                                 tree name ATTRIBUTE_UNUSED,
7335                                 tree args ATTRIBUTE_UNUSED,
7336                                 int flags ATTRIBUTE_UNUSED,
7337                                 bool *no_add_attrs)
7338 {
7339   tree decl = TYPE_NAME (*node);
7340
7341   if (decl)
7342     {
7343       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7344       DECL_VISIBILITY_SPECIFIED (decl) = 1;
7345       *no_add_attrs = false;
7346     }
7347   return NULL_TREE;
7348 }
7349 #endif
7350
7351 /* This function returns true if a function with declaration FNDECL and type
7352    FNTYPE uses the stack to pass arguments or return variables and false
7353    otherwise.  This is used for functions with the attributes
7354    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7355    diagnostic messages if the stack is used.  NAME is the name of the attribute
7356    used.  */
7357
7358 static bool
7359 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7360 {
7361   function_args_iterator args_iter;
7362   CUMULATIVE_ARGS args_so_far_v;
7363   cumulative_args_t args_so_far;
7364   bool first_param = true;
7365   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7366
7367   /* Error out if any argument is passed on the stack.  */
7368   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7369   args_so_far = pack_cumulative_args (&args_so_far_v);
7370   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7371     {
7372       rtx arg_rtx;
7373
7374       prev_arg_type = arg_type;
7375       if (VOID_TYPE_P (arg_type))
7376         continue;
7377
7378       function_arg_info arg (arg_type, /*named=*/true);
7379       if (!first_param)
7380         /* ??? We should advance after processing the argument and pass
7381            the argument we're advancing past.  */
7382         arm_function_arg_advance (args_so_far, arg);
7383       arg_rtx = arm_function_arg (args_so_far, arg);
7384       if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7385         {
7386           error ("%qE attribute not available to functions with arguments "
7387                  "passed on the stack", name);
7388           return true;
7389         }
7390       first_param = false;
7391     }
7392
7393   /* Error out for variadic functions since we cannot control how many
7394      arguments will be passed and thus stack could be used.  stdarg_p () is not
7395      used for the checking to avoid browsing arguments twice.  */
7396   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7397     {
7398       error ("%qE attribute not available to functions with variable number "
7399              "of arguments", name);
7400       return true;
7401     }
7402
7403   /* Error out if return value is passed on the stack.  */
7404   ret_type = TREE_TYPE (fntype);
7405   if (arm_return_in_memory (ret_type, fntype))
7406     {
7407       error ("%qE attribute not available to functions that return value on "
7408              "the stack", name);
7409       return true;
7410     }
7411   return false;
7412 }
7413
7414 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7415    function will check whether the attribute is allowed here and will add the
7416    attribute to the function declaration tree or otherwise issue a warning.  */
7417
7418 static tree
7419 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7420                                  tree /* args */,
7421                                  int /* flags */,
7422                                  bool *no_add_attrs)
7423 {
7424   tree fndecl;
7425
7426   if (!use_cmse)
7427     {
7428       *no_add_attrs = true;
7429       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7430                "option.", name);
7431       return NULL_TREE;
7432     }
7433
7434   /* Ignore attribute for function types.  */
7435   if (TREE_CODE (*node) != FUNCTION_DECL)
7436     {
7437       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7438                name);
7439       *no_add_attrs = true;
7440       return NULL_TREE;
7441     }
7442
7443   fndecl = *node;
7444
7445   /* Warn for static linkage functions.  */
7446   if (!TREE_PUBLIC (fndecl))
7447     {
7448       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7449                "with static linkage", name);
7450       *no_add_attrs = true;
7451       return NULL_TREE;
7452     }
7453
7454   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7455                                                 TREE_TYPE (fndecl));
7456   return NULL_TREE;
7457 }
7458
7459
7460 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7461    function will check whether the attribute is allowed here and will add the
7462    attribute to the function type tree or otherwise issue a diagnostic.  The
7463    reason we check this at declaration time is to only allow the use of the
7464    attribute with declarations of function pointers and not function
7465    declarations.  This function checks NODE is of the expected type and issues
7466    diagnostics otherwise using NAME.  If it is not of the expected type
7467    *NO_ADD_ATTRS will be set to true.  */
7468
7469 static tree
7470 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7471                                  tree /* args */,
7472                                  int /* flags */,
7473                                  bool *no_add_attrs)
7474 {
7475   tree decl = NULL_TREE, fntype = NULL_TREE;
7476   tree type;
7477
7478   if (!use_cmse)
7479     {
7480       *no_add_attrs = true;
7481       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7482                "option.", name);
7483       return NULL_TREE;
7484     }
7485
7486   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7487     {
7488       decl = *node;
7489       fntype = TREE_TYPE (decl);
7490     }
7491
7492   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7493     fntype = TREE_TYPE (fntype);
7494
7495   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7496     {
7497         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7498                  "function pointer", name);
7499         *no_add_attrs = true;
7500         return NULL_TREE;
7501     }
7502
7503   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7504
7505   if (*no_add_attrs)
7506     return NULL_TREE;
7507
7508   /* Prevent trees being shared among function types with and without
7509      cmse_nonsecure_call attribute.  */
7510   type = TREE_TYPE (decl);
7511
7512   type = build_distinct_type_copy (type);
7513   TREE_TYPE (decl) = type;
7514   fntype = type;
7515
7516   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7517     {
7518       type = fntype;
7519       fntype = TREE_TYPE (fntype);
7520       fntype = build_distinct_type_copy (fntype);
7521       TREE_TYPE (type) = fntype;
7522     }
7523
7524   /* Construct a type attribute and add it to the function type.  */
7525   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7526                           TYPE_ATTRIBUTES (fntype));
7527   TYPE_ATTRIBUTES (fntype) = attrs;
7528   return NULL_TREE;
7529 }
7530
7531 /* Return 0 if the attributes for two types are incompatible, 1 if they
7532    are compatible, and 2 if they are nearly compatible (which causes a
7533    warning to be generated).  */
7534 static int
7535 arm_comp_type_attributes (const_tree type1, const_tree type2)
7536 {
7537   int l1, l2, s1, s2;
7538
7539   tree attrs1 = lookup_attribute ("Advanced SIMD type",
7540                                   TYPE_ATTRIBUTES (type1));
7541   tree attrs2 = lookup_attribute ("Advanced SIMD type",
7542                                   TYPE_ATTRIBUTES (type2));
7543   if (bool (attrs1) != bool (attrs2))
7544     return 0;
7545   if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7546     return 0;
7547
7548   /* Check for mismatch of non-default calling convention.  */
7549   if (TREE_CODE (type1) != FUNCTION_TYPE)
7550     return 1;
7551
7552   /* Check for mismatched call attributes.  */
7553   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7554   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7555   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7556   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7557
7558   /* Only bother to check if an attribute is defined.  */
7559   if (l1 | l2 | s1 | s2)
7560     {
7561       /* If one type has an attribute, the other must have the same attribute.  */
7562       if ((l1 != l2) || (s1 != s2))
7563         return 0;
7564
7565       /* Disallow mixed attributes.  */
7566       if ((l1 & s2) || (l2 & s1))
7567         return 0;
7568     }
7569
7570   /* Check for mismatched ISR attribute.  */
7571   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7572   if (! l1)
7573     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7574   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7575   if (! l2)
7576     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7577   if (l1 != l2)
7578     return 0;
7579
7580   l1 = lookup_attribute ("cmse_nonsecure_call",
7581                          TYPE_ATTRIBUTES (type1)) != NULL;
7582   l2 = lookup_attribute ("cmse_nonsecure_call",
7583                          TYPE_ATTRIBUTES (type2)) != NULL;
7584
7585   if (l1 != l2)
7586     return 0;
7587
7588   return 1;
7589 }
7590
7591 /*  Assigns default attributes to newly defined type.  This is used to
7592     set short_call/long_call attributes for function types of
7593     functions defined inside corresponding #pragma scopes.  */
7594 static void
7595 arm_set_default_type_attributes (tree type)
7596 {
7597   /* Add __attribute__ ((long_call)) to all functions, when
7598      inside #pragma long_calls or __attribute__ ((short_call)),
7599      when inside #pragma no_long_calls.  */
7600   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7601     {
7602       tree type_attr_list, attr_name;
7603       type_attr_list = TYPE_ATTRIBUTES (type);
7604
7605       if (arm_pragma_long_calls == LONG)
7606         attr_name = get_identifier ("long_call");
7607       else if (arm_pragma_long_calls == SHORT)
7608         attr_name = get_identifier ("short_call");
7609       else
7610         return;
7611
7612       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7613       TYPE_ATTRIBUTES (type) = type_attr_list;
7614     }
7615 }
7616 \f
7617 /* Return true if DECL is known to be linked into section SECTION.  */
7618
7619 static bool
7620 arm_function_in_section_p (tree decl, section *section)
7621 {
7622   /* We can only be certain about the prevailing symbol definition.  */
7623   if (!decl_binds_to_current_def_p (decl))
7624     return false;
7625
7626   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7627   if (!DECL_SECTION_NAME (decl))
7628     {
7629       /* Make sure that we will not create a unique section for DECL.  */
7630       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7631         return false;
7632     }
7633
7634   return function_section (decl) == section;
7635 }
7636
7637 /* Return nonzero if a 32-bit "long_call" should be generated for
7638    a call from the current function to DECL.  We generate a long_call
7639    if the function:
7640
7641         a.  has an __attribute__((long call))
7642      or b.  is within the scope of a #pragma long_calls
7643      or c.  the -mlong-calls command line switch has been specified
7644
7645    However we do not generate a long call if the function:
7646
7647         d.  has an __attribute__ ((short_call))
7648      or e.  is inside the scope of a #pragma no_long_calls
7649      or f.  is defined in the same section as the current function.  */
7650
7651 bool
7652 arm_is_long_call_p (tree decl)
7653 {
7654   tree attrs;
7655
7656   if (!decl)
7657     return TARGET_LONG_CALLS;
7658
7659   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7660   if (lookup_attribute ("short_call", attrs))
7661     return false;
7662
7663   /* For "f", be conservative, and only cater for cases in which the
7664      whole of the current function is placed in the same section.  */
7665   if (!flag_reorder_blocks_and_partition
7666       && TREE_CODE (decl) == FUNCTION_DECL
7667       && arm_function_in_section_p (decl, current_function_section ()))
7668     return false;
7669
7670   if (lookup_attribute ("long_call", attrs))
7671     return true;
7672
7673   return TARGET_LONG_CALLS;
7674 }
7675
7676 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7677 static bool
7678 arm_function_ok_for_sibcall (tree decl, tree exp)
7679 {
7680   unsigned long func_type;
7681
7682   if (cfun->machine->sibcall_blocked)
7683     return false;
7684
7685   if (TARGET_FDPIC)
7686     {
7687       /* In FDPIC, never tailcall something for which we have no decl:
7688          the target function could be in a different module, requiring
7689          a different FDPIC register value.  */
7690       if (decl == NULL)
7691         return false;
7692     }
7693
7694   /* Never tailcall something if we are generating code for Thumb-1.  */
7695   if (TARGET_THUMB1)
7696     return false;
7697
7698   /* The PIC register is live on entry to VxWorks PLT entries, so we
7699      must make the call before restoring the PIC register.  */
7700   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7701     return false;
7702
7703   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7704      may be used both as target of the call and base register for restoring
7705      the VFP registers  */
7706   if (TARGET_APCS_FRAME && TARGET_ARM
7707       && TARGET_HARD_FLOAT
7708       && decl && arm_is_long_call_p (decl))
7709     return false;
7710
7711   /* If we are interworking and the function is not declared static
7712      then we can't tail-call it unless we know that it exists in this
7713      compilation unit (since it might be a Thumb routine).  */
7714   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7715       && !TREE_ASM_WRITTEN (decl))
7716     return false;
7717
7718   func_type = arm_current_func_type ();
7719   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7720   if (IS_INTERRUPT (func_type))
7721     return false;
7722
7723   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7724      generated for entry functions themselves.  */
7725   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7726     return false;
7727
7728   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7729      this would complicate matters for later code generation.  */
7730   if (TREE_CODE (exp) == CALL_EXPR)
7731     {
7732       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7733       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7734         return false;
7735     }
7736
7737   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7738     {
7739       /* Check that the return value locations are the same.  For
7740          example that we aren't returning a value from the sibling in
7741          a VFP register but then need to transfer it to a core
7742          register.  */
7743       rtx a, b;
7744       tree decl_or_type = decl;
7745
7746       /* If it is an indirect function pointer, get the function type.  */
7747       if (!decl)
7748         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7749
7750       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7751       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7752                               cfun->decl, false);
7753       if (!rtx_equal_p (a, b))
7754         return false;
7755     }
7756
7757   /* Never tailcall if function may be called with a misaligned SP.  */
7758   if (IS_STACKALIGN (func_type))
7759     return false;
7760
7761   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7762      references should become a NOP.  Don't convert such calls into
7763      sibling calls.  */
7764   if (TARGET_AAPCS_BASED
7765       && arm_abi == ARM_ABI_AAPCS
7766       && decl
7767       && DECL_WEAK (decl))
7768     return false;
7769
7770   /* We cannot do a tailcall for an indirect call by descriptor if all the
7771      argument registers are used because the only register left to load the
7772      address is IP and it will already contain the static chain.  */
7773   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7774     {
7775       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7776       CUMULATIVE_ARGS cum;
7777       cumulative_args_t cum_v;
7778
7779       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7780       cum_v = pack_cumulative_args (&cum);
7781
7782       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7783         {
7784           tree type = TREE_VALUE (t);
7785           if (!VOID_TYPE_P (type))
7786             {
7787               function_arg_info arg (type, /*named=*/true);
7788               arm_function_arg_advance (cum_v, arg);
7789             }
7790         }
7791
7792       function_arg_info arg (integer_type_node, /*named=*/true);
7793       if (!arm_function_arg (cum_v, arg))
7794         return false;
7795     }
7796
7797   /* Everything else is ok.  */
7798   return true;
7799 }
7800
7801 \f
7802 /* Addressing mode support functions.  */
7803
7804 /* Return nonzero if X is a legitimate immediate operand when compiling
7805    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7806 int
7807 legitimate_pic_operand_p (rtx x)
7808 {
7809   if (GET_CODE (x) == SYMBOL_REF
7810       || (GET_CODE (x) == CONST
7811           && GET_CODE (XEXP (x, 0)) == PLUS
7812           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7813     return 0;
7814
7815   return 1;
7816 }
7817
7818 /* Record that the current function needs a PIC register.  If PIC_REG is null,
7819    a new pseudo is allocated as PIC register, otherwise PIC_REG is used.  In
7820    both case cfun->machine->pic_reg is initialized if we have not already done
7821    so.  COMPUTE_NOW decide whether and where to set the PIC register.  If true,
7822    PIC register is reloaded in the current position of the instruction stream
7823    irregardless of whether it was loaded before.  Otherwise, it is only loaded
7824    if not already done so (crtl->uses_pic_offset_table is null).  Note that
7825    nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7826    is only supported iff COMPUTE_NOW is false.  */
7827
7828 static void
7829 require_pic_register (rtx pic_reg, bool compute_now)
7830 {
7831   gcc_assert (compute_now == (pic_reg != NULL_RTX));
7832
7833   /* A lot of the logic here is made obscure by the fact that this
7834      routine gets called as part of the rtx cost estimation process.
7835      We don't want those calls to affect any assumptions about the real
7836      function; and further, we can't call entry_of_function() until we
7837      start the real expansion process.  */
7838   if (!crtl->uses_pic_offset_table || compute_now)
7839     {
7840       gcc_assert (can_create_pseudo_p ()
7841                   || (pic_reg != NULL_RTX
7842                       && REG_P (pic_reg)
7843                       && GET_MODE (pic_reg) == Pmode));
7844       if (arm_pic_register != INVALID_REGNUM
7845           && !compute_now
7846           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7847         {
7848           if (!cfun->machine->pic_reg)
7849             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7850
7851           /* Play games to avoid marking the function as needing pic
7852              if we are being called as part of the cost-estimation
7853              process.  */
7854           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7855             crtl->uses_pic_offset_table = 1;
7856         }
7857       else
7858         {
7859           rtx_insn *seq, *insn;
7860
7861           if (pic_reg == NULL_RTX)
7862             pic_reg = gen_reg_rtx (Pmode);
7863           if (!cfun->machine->pic_reg)
7864             cfun->machine->pic_reg = pic_reg;
7865
7866           /* Play games to avoid marking the function as needing pic
7867              if we are being called as part of the cost-estimation
7868              process.  */
7869           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7870             {
7871               crtl->uses_pic_offset_table = 1;
7872               start_sequence ();
7873
7874               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7875                   && arm_pic_register > LAST_LO_REGNUM
7876                   && !compute_now)
7877                 emit_move_insn (cfun->machine->pic_reg,
7878                                 gen_rtx_REG (Pmode, arm_pic_register));
7879               else
7880                 arm_load_pic_register (0UL, pic_reg);
7881
7882               seq = get_insns ();
7883               end_sequence ();
7884
7885               for (insn = seq; insn; insn = NEXT_INSN (insn))
7886                 if (INSN_P (insn))
7887                   INSN_LOCATION (insn) = prologue_location;
7888
7889               /* We can be called during expansion of PHI nodes, where
7890                  we can't yet emit instructions directly in the final
7891                  insn stream.  Queue the insns on the entry edge, they will
7892                  be committed after everything else is expanded.  */
7893               if (currently_expanding_to_rtl)
7894                 insert_insn_on_edge (seq,
7895                                      single_succ_edge
7896                                      (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7897               else
7898                 emit_insn (seq);
7899             }
7900         }
7901     }
7902 }
7903
7904 /* Generate insns to calculate the address of ORIG in pic mode.  */
7905 static rtx_insn *
7906 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
7907 {
7908   rtx pat;
7909   rtx mem;
7910
7911   pat = gen_calculate_pic_address (reg, pic_reg, orig);
7912
7913   /* Make the MEM as close to a constant as possible.  */
7914   mem = SET_SRC (pat);
7915   gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7916   MEM_READONLY_P (mem) = 1;
7917   MEM_NOTRAP_P (mem) = 1;
7918
7919   return emit_insn (pat);
7920 }
7921
7922 /* Legitimize PIC load to ORIG into REG.  If REG is NULL, a new pseudo is
7923    created to hold the result of the load.  If not NULL, PIC_REG indicates
7924    which register to use as PIC register, otherwise it is decided by register
7925    allocator.  COMPUTE_NOW forces the PIC register to be loaded at the current
7926    location in the instruction stream, irregardless of whether it was loaded
7927    previously.  Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
7928    true and null PIC_REG is only supported iff COMPUTE_NOW is false.
7929
7930    Returns the register REG into which the PIC load is performed.  */
7931
7932 rtx
7933 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
7934                         bool compute_now)
7935 {
7936   gcc_assert (compute_now == (pic_reg != NULL_RTX));
7937
7938   if (GET_CODE (orig) == SYMBOL_REF
7939       || GET_CODE (orig) == LABEL_REF)
7940     {
7941       if (reg == 0)
7942         {
7943           gcc_assert (can_create_pseudo_p ());
7944           reg = gen_reg_rtx (Pmode);
7945         }
7946
7947       /* VxWorks does not impose a fixed gap between segments; the run-time
7948          gap can be different from the object-file gap.  We therefore can't
7949          use GOTOFF unless we are absolutely sure that the symbol is in the
7950          same segment as the GOT.  Unfortunately, the flexibility of linker
7951          scripts means that we can't be sure of that in general, so assume
7952          that GOTOFF is never valid on VxWorks.  */
7953       /* References to weak symbols cannot be resolved locally: they
7954          may be overridden by a non-weak definition at link time.  */
7955       rtx_insn *insn;
7956       if ((GET_CODE (orig) == LABEL_REF
7957            || (GET_CODE (orig) == SYMBOL_REF
7958                && SYMBOL_REF_LOCAL_P (orig)
7959                && (SYMBOL_REF_DECL (orig)
7960                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
7961                && (!SYMBOL_REF_FUNCTION_P (orig)
7962                    || arm_fdpic_local_funcdesc_p (orig))))
7963           && NEED_GOT_RELOC
7964           && arm_pic_data_is_text_relative)
7965         insn = arm_pic_static_addr (orig, reg);
7966       else
7967         {
7968           /* If this function doesn't have a pic register, create one now.  */
7969           require_pic_register (pic_reg, compute_now);
7970
7971           if (pic_reg == NULL_RTX)
7972             pic_reg = cfun->machine->pic_reg;
7973
7974           insn = calculate_pic_address_constant (reg, pic_reg, orig);
7975         }
7976
7977       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7978          by loop.  */
7979       set_unique_reg_note (insn, REG_EQUAL, orig);
7980
7981       return reg;
7982     }
7983   else if (GET_CODE (orig) == CONST)
7984     {
7985       rtx base, offset;
7986
7987       if (GET_CODE (XEXP (orig, 0)) == PLUS
7988           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7989         return orig;
7990
7991       /* Handle the case where we have: const (UNSPEC_TLS).  */
7992       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7993           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7994         return orig;
7995
7996       /* Handle the case where we have:
7997          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
7998          CONST_INT.  */
7999       if (GET_CODE (XEXP (orig, 0)) == PLUS
8000           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8001           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8002         {
8003           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8004           return orig;
8005         }
8006
8007       if (reg == 0)
8008         {
8009           gcc_assert (can_create_pseudo_p ());
8010           reg = gen_reg_rtx (Pmode);
8011         }
8012
8013       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8014
8015       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8016                                      pic_reg, compute_now);
8017       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8018                                        base == reg ? 0 : reg, pic_reg,
8019                                        compute_now);
8020
8021       if (CONST_INT_P (offset))
8022         {
8023           /* The base register doesn't really matter, we only want to
8024              test the index for the appropriate mode.  */
8025           if (!arm_legitimate_index_p (mode, offset, SET, 0))
8026             {
8027               gcc_assert (can_create_pseudo_p ());
8028               offset = force_reg (Pmode, offset);
8029             }
8030
8031           if (CONST_INT_P (offset))
8032             return plus_constant (Pmode, base, INTVAL (offset));
8033         }
8034
8035       if (GET_MODE_SIZE (mode) > 4
8036           && (GET_MODE_CLASS (mode) == MODE_INT
8037               || TARGET_SOFT_FLOAT))
8038         {
8039           emit_insn (gen_addsi3 (reg, base, offset));
8040           return reg;
8041         }
8042
8043       return gen_rtx_PLUS (Pmode, base, offset);
8044     }
8045
8046   return orig;
8047 }
8048
8049
8050 /* Whether a register is callee saved or not.  This is necessary because high
8051    registers are marked as caller saved when optimizing for size on Thumb-1
8052    targets despite being callee saved in order to avoid using them.  */
8053 #define callee_saved_reg_p(reg) \
8054   (!call_used_or_fixed_reg_p (reg) \
8055    || (TARGET_THUMB1 && optimize_size \
8056        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8057
8058 /* Return a mask for the call-clobbered low registers that are unused
8059    at the end of the prologue.  */
8060 static unsigned long
8061 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8062 {
8063   unsigned long mask = 0;
8064   bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8065
8066   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8067     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8068       mask |= 1 << (reg - FIRST_LO_REGNUM);
8069   return mask;
8070 }
8071
8072 /* Similarly for the start of the epilogue.  */
8073 static unsigned long
8074 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8075 {
8076   unsigned long mask = 0;
8077   bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8078
8079   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8080     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8081       mask |= 1 << (reg - FIRST_LO_REGNUM);
8082   return mask;
8083 }
8084
8085 /* Find a spare register to use during the prolog of a function.  */
8086
8087 static int
8088 thumb_find_work_register (unsigned long pushed_regs_mask)
8089 {
8090   int reg;
8091
8092   unsigned long unused_regs
8093     = thumb1_prologue_unused_call_clobbered_lo_regs ();
8094
8095   /* Check the argument registers first as these are call-used.  The
8096      register allocation order means that sometimes r3 might be used
8097      but earlier argument registers might not, so check them all.  */
8098   for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8099     if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8100       return reg;
8101
8102   /* Otherwise look for a call-saved register that is going to be pushed.  */
8103   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8104     if (pushed_regs_mask & (1 << reg))
8105       return reg;
8106
8107   if (TARGET_THUMB2)
8108     {
8109       /* Thumb-2 can use high regs.  */
8110       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8111         if (pushed_regs_mask & (1 << reg))
8112           return reg;
8113     }
8114   /* Something went wrong - thumb_compute_save_reg_mask()
8115      should have arranged for a suitable register to be pushed.  */
8116   gcc_unreachable ();
8117 }
8118
8119 static GTY(()) int pic_labelno;
8120
8121 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
8122    low register.  */
8123
8124 void
8125 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8126 {
8127   rtx l1, labelno, pic_tmp, pic_rtx;
8128
8129   if (crtl->uses_pic_offset_table == 0
8130       || TARGET_SINGLE_PIC_BASE
8131       || TARGET_FDPIC)
8132     return;
8133
8134   gcc_assert (flag_pic);
8135
8136   if (pic_reg == NULL_RTX)
8137     pic_reg = cfun->machine->pic_reg;
8138   if (TARGET_VXWORKS_RTP)
8139     {
8140       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8141       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8142       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8143
8144       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8145
8146       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8147       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8148     }
8149   else
8150     {
8151       /* We use an UNSPEC rather than a LABEL_REF because this label
8152          never appears in the code stream.  */
8153
8154       labelno = GEN_INT (pic_labelno++);
8155       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8156       l1 = gen_rtx_CONST (VOIDmode, l1);
8157
8158       /* On the ARM the PC register contains 'dot + 8' at the time of the
8159          addition, on the Thumb it is 'dot + 4'.  */
8160       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8161       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8162                                 UNSPEC_GOTSYM_OFF);
8163       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8164
8165       if (TARGET_32BIT)
8166         {
8167           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8168         }
8169       else /* TARGET_THUMB1 */
8170         {
8171           if (arm_pic_register != INVALID_REGNUM
8172               && REGNO (pic_reg) > LAST_LO_REGNUM)
8173             {
8174               /* We will have pushed the pic register, so we should always be
8175                  able to find a work register.  */
8176               pic_tmp = gen_rtx_REG (SImode,
8177                                      thumb_find_work_register (saved_regs));
8178               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8179               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8180               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8181             }
8182           else if (arm_pic_register != INVALID_REGNUM
8183                    && arm_pic_register > LAST_LO_REGNUM
8184                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
8185             {
8186               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8187               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8188               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8189             }
8190           else
8191             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8192         }
8193     }
8194
8195   /* Need to emit this whether or not we obey regdecls,
8196      since setjmp/longjmp can cause life info to screw up.  */
8197   emit_use (pic_reg);
8198 }
8199
8200 /* Try to determine whether an object, referenced via ORIG, will be
8201    placed in the text or data segment.  This is used in FDPIC mode, to
8202    decide which relocations to use when accessing ORIG.  *IS_READONLY
8203    is set to true if ORIG is a read-only location, false otherwise.
8204    Return true if we could determine the location of ORIG, false
8205    otherwise.  *IS_READONLY is valid only when we return true.  */
8206 static bool
8207 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8208 {
8209   *is_readonly = false;
8210
8211   if (GET_CODE (orig) == LABEL_REF)
8212     {
8213       *is_readonly = true;
8214       return true;
8215     }
8216
8217   if (SYMBOL_REF_P (orig))
8218     {
8219       if (CONSTANT_POOL_ADDRESS_P (orig))
8220         {
8221           *is_readonly = true;
8222           return true;
8223         }
8224       if (SYMBOL_REF_LOCAL_P (orig)
8225           && !SYMBOL_REF_EXTERNAL_P (orig)
8226           && SYMBOL_REF_DECL (orig)
8227           && (!DECL_P (SYMBOL_REF_DECL (orig))
8228               || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8229         {
8230           tree decl = SYMBOL_REF_DECL (orig);
8231           tree init = (TREE_CODE (decl) == VAR_DECL)
8232             ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8233             ? decl : 0;
8234           int reloc = 0;
8235           bool named_section, readonly;
8236
8237           if (init && init != error_mark_node)
8238             reloc = compute_reloc_for_constant (init);
8239
8240           named_section = TREE_CODE (decl) == VAR_DECL
8241             && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8242           readonly = decl_readonly_section (decl, reloc);
8243
8244           /* We don't know where the link script will put a named
8245              section, so return false in such a case.  */
8246           if (named_section)
8247             return false;
8248
8249           *is_readonly = readonly;
8250           return true;
8251         }
8252
8253       /* We don't know.  */
8254       return false;
8255     }
8256
8257   gcc_unreachable ();
8258 }
8259
8260 /* Generate code to load the address of a static var when flag_pic is set.  */
8261 static rtx_insn *
8262 arm_pic_static_addr (rtx orig, rtx reg)
8263 {
8264   rtx l1, labelno, offset_rtx;
8265   rtx_insn *insn;
8266
8267   gcc_assert (flag_pic);
8268
8269   bool is_readonly = false;
8270   bool info_known = false;
8271
8272   if (TARGET_FDPIC
8273       && SYMBOL_REF_P (orig)
8274       && !SYMBOL_REF_FUNCTION_P (orig))
8275     info_known = arm_is_segment_info_known (orig, &is_readonly);
8276
8277   if (TARGET_FDPIC
8278       && SYMBOL_REF_P (orig)
8279       && !SYMBOL_REF_FUNCTION_P (orig)
8280       && !info_known)
8281     {
8282       /* We don't know where orig is stored, so we have be
8283          pessimistic and use a GOT relocation.  */
8284       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8285
8286       insn = calculate_pic_address_constant (reg, pic_reg, orig);
8287     }
8288   else if (TARGET_FDPIC
8289            && SYMBOL_REF_P (orig)
8290            && (SYMBOL_REF_FUNCTION_P (orig)
8291                || !is_readonly))
8292     {
8293       /* We use the GOTOFF relocation.  */
8294       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8295
8296       rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8297       emit_insn (gen_movsi (reg, l1));
8298       insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8299     }
8300   else
8301     {
8302       /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8303          PC-relative access.  */
8304       /* We use an UNSPEC rather than a LABEL_REF because this label
8305          never appears in the code stream.  */
8306       labelno = GEN_INT (pic_labelno++);
8307       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8308       l1 = gen_rtx_CONST (VOIDmode, l1);
8309
8310       /* On the ARM the PC register contains 'dot + 8' at the time of the
8311          addition, on the Thumb it is 'dot + 4'.  */
8312       offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8313       offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8314                                    UNSPEC_SYMBOL_OFFSET);
8315       offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8316
8317       insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8318                                                    labelno));
8319     }
8320
8321   return insn;
8322 }
8323
8324 /* Return nonzero if X is valid as an ARM state addressing register.  */
8325 static int
8326 arm_address_register_rtx_p (rtx x, int strict_p)
8327 {
8328   int regno;
8329
8330   if (!REG_P (x))
8331     return 0;
8332
8333   regno = REGNO (x);
8334
8335   if (strict_p)
8336     return ARM_REGNO_OK_FOR_BASE_P (regno);
8337
8338   return (regno <= LAST_ARM_REGNUM
8339           || regno >= FIRST_PSEUDO_REGISTER
8340           || regno == FRAME_POINTER_REGNUM
8341           || regno == ARG_POINTER_REGNUM);
8342 }
8343
8344 /* Return TRUE if this rtx is the difference of a symbol and a label,
8345    and will reduce to a PC-relative relocation in the object file.
8346    Expressions like this can be left alone when generating PIC, rather
8347    than forced through the GOT.  */
8348 static int
8349 pcrel_constant_p (rtx x)
8350 {
8351   if (GET_CODE (x) == MINUS)
8352     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8353
8354   return FALSE;
8355 }
8356
8357 /* Return true if X will surely end up in an index register after next
8358    splitting pass.  */
8359 static bool
8360 will_be_in_index_register (const_rtx x)
8361 {
8362   /* arm.md: calculate_pic_address will split this into a register.  */
8363   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8364 }
8365
8366 /* Return nonzero if X is a valid ARM state address operand.  */
8367 int
8368 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8369                                 int strict_p)
8370 {
8371   bool use_ldrd;
8372   enum rtx_code code = GET_CODE (x);
8373
8374   if (arm_address_register_rtx_p (x, strict_p))
8375     return 1;
8376
8377   use_ldrd = (TARGET_LDRD
8378               && (mode == DImode || mode == DFmode));
8379
8380   if (code == POST_INC || code == PRE_DEC
8381       || ((code == PRE_INC || code == POST_DEC)
8382           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8383     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8384
8385   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8386            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8387            && GET_CODE (XEXP (x, 1)) == PLUS
8388            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8389     {
8390       rtx addend = XEXP (XEXP (x, 1), 1);
8391
8392       /* Don't allow ldrd post increment by register because it's hard
8393          to fixup invalid register choices.  */
8394       if (use_ldrd
8395           && GET_CODE (x) == POST_MODIFY
8396           && REG_P (addend))
8397         return 0;
8398
8399       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8400               && arm_legitimate_index_p (mode, addend, outer, strict_p));
8401     }
8402
8403   /* After reload constants split into minipools will have addresses
8404      from a LABEL_REF.  */
8405   else if (reload_completed
8406            && (code == LABEL_REF
8407                || (code == CONST
8408                    && GET_CODE (XEXP (x, 0)) == PLUS
8409                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8410                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8411     return 1;
8412
8413   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8414     return 0;
8415
8416   else if (code == PLUS)
8417     {
8418       rtx xop0 = XEXP (x, 0);
8419       rtx xop1 = XEXP (x, 1);
8420
8421       return ((arm_address_register_rtx_p (xop0, strict_p)
8422                && ((CONST_INT_P (xop1)
8423                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8424                    || (!strict_p && will_be_in_index_register (xop1))))
8425               || (arm_address_register_rtx_p (xop1, strict_p)
8426                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8427     }
8428
8429 #if 0
8430   /* Reload currently can't handle MINUS, so disable this for now */
8431   else if (GET_CODE (x) == MINUS)
8432     {
8433       rtx xop0 = XEXP (x, 0);
8434       rtx xop1 = XEXP (x, 1);
8435
8436       return (arm_address_register_rtx_p (xop0, strict_p)
8437               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8438     }
8439 #endif
8440
8441   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8442            && code == SYMBOL_REF
8443            && CONSTANT_POOL_ADDRESS_P (x)
8444            && ! (flag_pic
8445                  && symbol_mentioned_p (get_pool_constant (x))
8446                  && ! pcrel_constant_p (get_pool_constant (x))))
8447     return 1;
8448
8449   return 0;
8450 }
8451
8452 /* Return true if we can avoid creating a constant pool entry for x.  */
8453 static bool
8454 can_avoid_literal_pool_for_label_p (rtx x)
8455 {
8456   /* Normally we can assign constant values to target registers without
8457      the help of constant pool.  But there are cases we have to use constant
8458      pool like:
8459      1) assign a label to register.
8460      2) sign-extend a 8bit value to 32bit and then assign to register.
8461
8462      Constant pool access in format:
8463      (set (reg r0) (mem (symbol_ref (".LC0"))))
8464      will cause the use of literal pool (later in function arm_reorg).
8465      So here we mark such format as an invalid format, then the compiler
8466      will adjust it into:
8467      (set (reg r0) (symbol_ref (".LC0")))
8468      (set (reg r0) (mem (reg r0))).
8469      No extra register is required, and (mem (reg r0)) won't cause the use
8470      of literal pools.  */
8471   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
8472       && CONSTANT_POOL_ADDRESS_P (x))
8473     return 1;
8474   return 0;
8475 }
8476
8477
8478 /* Return nonzero if X is a valid Thumb-2 address operand.  */
8479 static int
8480 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8481 {
8482   bool use_ldrd;
8483   enum rtx_code code = GET_CODE (x);
8484
8485   if (TARGET_HAVE_MVE
8486       && (mode == V8QImode || mode == E_V4QImode || mode == V4HImode))
8487     return mve_vector_mem_operand (mode, x, strict_p);
8488
8489   if (arm_address_register_rtx_p (x, strict_p))
8490     return 1;
8491
8492   use_ldrd = (TARGET_LDRD
8493               && (mode == DImode || mode == DFmode));
8494
8495   if (code == POST_INC || code == PRE_DEC
8496       || ((code == PRE_INC || code == POST_DEC)
8497           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8498     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8499
8500   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8501            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8502            && GET_CODE (XEXP (x, 1)) == PLUS
8503            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8504     {
8505       /* Thumb-2 only has autoincrement by constant.  */
8506       rtx addend = XEXP (XEXP (x, 1), 1);
8507       HOST_WIDE_INT offset;
8508
8509       if (!CONST_INT_P (addend))
8510         return 0;
8511
8512       offset = INTVAL(addend);
8513       if (GET_MODE_SIZE (mode) <= 4)
8514         return (offset > -256 && offset < 256);
8515
8516       return (use_ldrd && offset > -1024 && offset < 1024
8517               && (offset & 3) == 0);
8518     }
8519
8520   /* After reload constants split into minipools will have addresses
8521      from a LABEL_REF.  */
8522   else if (reload_completed
8523            && (code == LABEL_REF
8524                || (code == CONST
8525                    && GET_CODE (XEXP (x, 0)) == PLUS
8526                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8527                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8528     return 1;
8529
8530   else if (mode == TImode
8531            || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8532            || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8533     return 0;
8534
8535   else if (code == PLUS)
8536     {
8537       rtx xop0 = XEXP (x, 0);
8538       rtx xop1 = XEXP (x, 1);
8539
8540       return ((arm_address_register_rtx_p (xop0, strict_p)
8541                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8542                    || (!strict_p && will_be_in_index_register (xop1))))
8543               || (arm_address_register_rtx_p (xop1, strict_p)
8544                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8545     }
8546
8547   else if (can_avoid_literal_pool_for_label_p (x))
8548     return 0;
8549
8550   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8551            && code == SYMBOL_REF
8552            && CONSTANT_POOL_ADDRESS_P (x)
8553            && ! (flag_pic
8554                  && symbol_mentioned_p (get_pool_constant (x))
8555                  && ! pcrel_constant_p (get_pool_constant (x))))
8556     return 1;
8557
8558   return 0;
8559 }
8560
8561 /* Return nonzero if INDEX is valid for an address index operand in
8562    ARM state.  */
8563 static int
8564 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8565                         int strict_p)
8566 {
8567   HOST_WIDE_INT range;
8568   enum rtx_code code = GET_CODE (index);
8569
8570   /* Standard coprocessor addressing modes.  */
8571   if (TARGET_HARD_FLOAT
8572       && (mode == SFmode || mode == DFmode))
8573     return (code == CONST_INT && INTVAL (index) < 1024
8574             && INTVAL (index) > -1024
8575             && (INTVAL (index) & 3) == 0);
8576
8577   /* For quad modes, we restrict the constant offset to be slightly less
8578      than what the instruction format permits.  We do this because for
8579      quad mode moves, we will actually decompose them into two separate
8580      double-mode reads or writes.  INDEX must therefore be a valid
8581      (double-mode) offset and so should INDEX+8.  */
8582   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8583     return (code == CONST_INT
8584             && INTVAL (index) < 1016
8585             && INTVAL (index) > -1024
8586             && (INTVAL (index) & 3) == 0);
8587
8588   /* We have no such constraint on double mode offsets, so we permit the
8589      full range of the instruction format.  */
8590   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8591     return (code == CONST_INT
8592             && INTVAL (index) < 1024
8593             && INTVAL (index) > -1024
8594             && (INTVAL (index) & 3) == 0);
8595
8596   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8597     return (code == CONST_INT
8598             && INTVAL (index) < 1024
8599             && INTVAL (index) > -1024
8600             && (INTVAL (index) & 3) == 0);
8601
8602   if (arm_address_register_rtx_p (index, strict_p)
8603       && (GET_MODE_SIZE (mode) <= 4))
8604     return 1;
8605
8606   if (mode == DImode || mode == DFmode)
8607     {
8608       if (code == CONST_INT)
8609         {
8610           HOST_WIDE_INT val = INTVAL (index);
8611
8612           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8613              If vldr is selected it uses arm_coproc_mem_operand.  */
8614           if (TARGET_LDRD)
8615             return val > -256 && val < 256;
8616           else
8617             return val > -4096 && val < 4092;
8618         }
8619
8620       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8621     }
8622
8623   if (GET_MODE_SIZE (mode) <= 4
8624       && ! (arm_arch4
8625             && (mode == HImode
8626                 || mode == HFmode
8627                 || (mode == QImode && outer == SIGN_EXTEND))))
8628     {
8629       if (code == MULT)
8630         {
8631           rtx xiop0 = XEXP (index, 0);
8632           rtx xiop1 = XEXP (index, 1);
8633
8634           return ((arm_address_register_rtx_p (xiop0, strict_p)
8635                    && power_of_two_operand (xiop1, SImode))
8636                   || (arm_address_register_rtx_p (xiop1, strict_p)
8637                       && power_of_two_operand (xiop0, SImode)));
8638         }
8639       else if (code == LSHIFTRT || code == ASHIFTRT
8640                || code == ASHIFT || code == ROTATERT)
8641         {
8642           rtx op = XEXP (index, 1);
8643
8644           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8645                   && CONST_INT_P (op)
8646                   && INTVAL (op) > 0
8647                   && INTVAL (op) <= 31);
8648         }
8649     }
8650
8651   /* For ARM v4 we may be doing a sign-extend operation during the
8652      load.  */
8653   if (arm_arch4)
8654     {
8655       if (mode == HImode
8656           || mode == HFmode
8657           || (outer == SIGN_EXTEND && mode == QImode))
8658         range = 256;
8659       else
8660         range = 4096;
8661     }
8662   else
8663     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8664
8665   return (code == CONST_INT
8666           && INTVAL (index) < range
8667           && INTVAL (index) > -range);
8668 }
8669
8670 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8671    index operand.  i.e. 1, 2, 4 or 8.  */
8672 static bool
8673 thumb2_index_mul_operand (rtx op)
8674 {
8675   HOST_WIDE_INT val;
8676
8677   if (!CONST_INT_P (op))
8678     return false;
8679
8680   val = INTVAL(op);
8681   return (val == 1 || val == 2 || val == 4 || val == 8);
8682 }
8683
8684 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8685 static int
8686 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8687 {
8688   enum rtx_code code = GET_CODE (index);
8689
8690   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8691   /* Standard coprocessor addressing modes.  */
8692   if (TARGET_VFP_BASE
8693       && (mode == SFmode || mode == DFmode))
8694     return (code == CONST_INT && INTVAL (index) < 1024
8695             /* Thumb-2 allows only > -256 index range for it's core register
8696                load/stores. Since we allow SF/DF in core registers, we have
8697                to use the intersection between -256~4096 (core) and -1024~1024
8698                (coprocessor).  */
8699             && INTVAL (index) > -256
8700             && (INTVAL (index) & 3) == 0);
8701
8702   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8703     {
8704       /* For DImode assume values will usually live in core regs
8705          and only allow LDRD addressing modes.  */
8706       if (!TARGET_LDRD || mode != DImode)
8707         return (code == CONST_INT
8708                 && INTVAL (index) < 1024
8709                 && INTVAL (index) > -1024
8710                 && (INTVAL (index) & 3) == 0);
8711     }
8712
8713   /* For quad modes, we restrict the constant offset to be slightly less
8714      than what the instruction format permits.  We do this because for
8715      quad mode moves, we will actually decompose them into two separate
8716      double-mode reads or writes.  INDEX must therefore be a valid
8717      (double-mode) offset and so should INDEX+8.  */
8718   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8719     return (code == CONST_INT
8720             && INTVAL (index) < 1016
8721             && INTVAL (index) > -1024
8722             && (INTVAL (index) & 3) == 0);
8723
8724   /* We have no such constraint on double mode offsets, so we permit the
8725      full range of the instruction format.  */
8726   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8727     return (code == CONST_INT
8728             && INTVAL (index) < 1024
8729             && INTVAL (index) > -1024
8730             && (INTVAL (index) & 3) == 0);
8731
8732   if (arm_address_register_rtx_p (index, strict_p)
8733       && (GET_MODE_SIZE (mode) <= 4))
8734     return 1;
8735
8736   if (mode == DImode || mode == DFmode)
8737     {
8738       if (code == CONST_INT)
8739         {
8740           HOST_WIDE_INT val = INTVAL (index);
8741           /* Thumb-2 ldrd only has reg+const addressing modes.
8742              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8743              If vldr is selected it uses arm_coproc_mem_operand.  */
8744           if (TARGET_LDRD)
8745             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8746           else
8747             return IN_RANGE (val, -255, 4095 - 4);
8748         }
8749       else
8750         return 0;
8751     }
8752
8753   if (code == MULT)
8754     {
8755       rtx xiop0 = XEXP (index, 0);
8756       rtx xiop1 = XEXP (index, 1);
8757
8758       return ((arm_address_register_rtx_p (xiop0, strict_p)
8759                && thumb2_index_mul_operand (xiop1))
8760               || (arm_address_register_rtx_p (xiop1, strict_p)
8761                   && thumb2_index_mul_operand (xiop0)));
8762     }
8763   else if (code == ASHIFT)
8764     {
8765       rtx op = XEXP (index, 1);
8766
8767       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8768               && CONST_INT_P (op)
8769               && INTVAL (op) > 0
8770               && INTVAL (op) <= 3);
8771     }
8772
8773   return (code == CONST_INT
8774           && INTVAL (index) < 4096
8775           && INTVAL (index) > -256);
8776 }
8777
8778 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8779 static int
8780 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8781 {
8782   int regno;
8783
8784   if (!REG_P (x))
8785     return 0;
8786
8787   regno = REGNO (x);
8788
8789   if (strict_p)
8790     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8791
8792   return (regno <= LAST_LO_REGNUM
8793           || regno > LAST_VIRTUAL_REGISTER
8794           || regno == FRAME_POINTER_REGNUM
8795           || (GET_MODE_SIZE (mode) >= 4
8796               && (regno == STACK_POINTER_REGNUM
8797                   || regno >= FIRST_PSEUDO_REGISTER
8798                   || x == hard_frame_pointer_rtx
8799                   || x == arg_pointer_rtx)));
8800 }
8801
8802 /* Return nonzero if x is a legitimate index register.  This is the case
8803    for any base register that can access a QImode object.  */
8804 inline static int
8805 thumb1_index_register_rtx_p (rtx x, int strict_p)
8806 {
8807   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8808 }
8809
8810 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8811
8812    The AP may be eliminated to either the SP or the FP, so we use the
8813    least common denominator, e.g. SImode, and offsets from 0 to 64.
8814
8815    ??? Verify whether the above is the right approach.
8816
8817    ??? Also, the FP may be eliminated to the SP, so perhaps that
8818    needs special handling also.
8819
8820    ??? Look at how the mips16 port solves this problem.  It probably uses
8821    better ways to solve some of these problems.
8822
8823    Although it is not incorrect, we don't accept QImode and HImode
8824    addresses based on the frame pointer or arg pointer until the
8825    reload pass starts.  This is so that eliminating such addresses
8826    into stack based ones won't produce impossible code.  */
8827 int
8828 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8829 {
8830   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8831     return 0;
8832
8833   /* ??? Not clear if this is right.  Experiment.  */
8834   if (GET_MODE_SIZE (mode) < 4
8835       && !(reload_in_progress || reload_completed)
8836       && (reg_mentioned_p (frame_pointer_rtx, x)
8837           || reg_mentioned_p (arg_pointer_rtx, x)
8838           || reg_mentioned_p (virtual_incoming_args_rtx, x)
8839           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8840           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8841           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8842     return 0;
8843
8844   /* Accept any base register.  SP only in SImode or larger.  */
8845   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8846     return 1;
8847
8848   /* This is PC relative data before arm_reorg runs.  */
8849   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8850            && GET_CODE (x) == SYMBOL_REF
8851            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
8852            && !arm_disable_literal_pool)
8853     return 1;
8854
8855   /* This is PC relative data after arm_reorg runs.  */
8856   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8857            && reload_completed
8858            && (GET_CODE (x) == LABEL_REF
8859                || (GET_CODE (x) == CONST
8860                    && GET_CODE (XEXP (x, 0)) == PLUS
8861                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8862                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8863     return 1;
8864
8865   /* Post-inc indexing only supported for SImode and larger.  */
8866   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8867            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8868     return 1;
8869
8870   else if (GET_CODE (x) == PLUS)
8871     {
8872       /* REG+REG address can be any two index registers.  */
8873       /* We disallow FRAME+REG addressing since we know that FRAME
8874          will be replaced with STACK, and SP relative addressing only
8875          permits SP+OFFSET.  */
8876       if (GET_MODE_SIZE (mode) <= 4
8877           && XEXP (x, 0) != frame_pointer_rtx
8878           && XEXP (x, 1) != frame_pointer_rtx
8879           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8880           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8881               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8882         return 1;
8883
8884       /* REG+const has 5-7 bit offset for non-SP registers.  */
8885       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8886                 || XEXP (x, 0) == arg_pointer_rtx)
8887                && CONST_INT_P (XEXP (x, 1))
8888                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8889         return 1;
8890
8891       /* REG+const has 10-bit offset for SP, but only SImode and
8892          larger is supported.  */
8893       /* ??? Should probably check for DI/DFmode overflow here
8894          just like GO_IF_LEGITIMATE_OFFSET does.  */
8895       else if (REG_P (XEXP (x, 0))
8896                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8897                && GET_MODE_SIZE (mode) >= 4
8898                && CONST_INT_P (XEXP (x, 1))
8899                && INTVAL (XEXP (x, 1)) >= 0
8900                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8901                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8902         return 1;
8903
8904       else if (REG_P (XEXP (x, 0))
8905                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8906                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8907                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8908                        && REGNO (XEXP (x, 0))
8909                           <= LAST_VIRTUAL_POINTER_REGISTER))
8910                && GET_MODE_SIZE (mode) >= 4
8911                && CONST_INT_P (XEXP (x, 1))
8912                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8913         return 1;
8914     }
8915
8916   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8917            && GET_MODE_SIZE (mode) == 4
8918            && GET_CODE (x) == SYMBOL_REF
8919            && CONSTANT_POOL_ADDRESS_P (x)
8920            && !arm_disable_literal_pool
8921            && ! (flag_pic
8922                  && symbol_mentioned_p (get_pool_constant (x))
8923                  && ! pcrel_constant_p (get_pool_constant (x))))
8924     return 1;
8925
8926   return 0;
8927 }
8928
8929 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8930    instruction of mode MODE.  */
8931 int
8932 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8933 {
8934   switch (GET_MODE_SIZE (mode))
8935     {
8936     case 1:
8937       return val >= 0 && val < 32;
8938
8939     case 2:
8940       return val >= 0 && val < 64 && (val & 1) == 0;
8941
8942     default:
8943       return (val >= 0
8944               && (val + GET_MODE_SIZE (mode)) <= 128
8945               && (val & 3) == 0);
8946     }
8947 }
8948
8949 bool
8950 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8951 {
8952   if (TARGET_ARM)
8953     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8954   else if (TARGET_THUMB2)
8955     return thumb2_legitimate_address_p (mode, x, strict_p);
8956   else /* if (TARGET_THUMB1) */
8957     return thumb1_legitimate_address_p (mode, x, strict_p);
8958 }
8959
8960 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8961
8962    Given an rtx X being reloaded into a reg required to be
8963    in class CLASS, return the class of reg to actually use.
8964    In general this is just CLASS, but for the Thumb core registers and
8965    immediate constants we prefer a LO_REGS class or a subset.  */
8966
8967 static reg_class_t
8968 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8969 {
8970   if (TARGET_32BIT)
8971     return rclass;
8972   else
8973     {
8974       if (rclass == GENERAL_REGS)
8975         return LO_REGS;
8976       else
8977         return rclass;
8978     }
8979 }
8980
8981 /* Build the SYMBOL_REF for __tls_get_addr.  */
8982
8983 static GTY(()) rtx tls_get_addr_libfunc;
8984
8985 static rtx
8986 get_tls_get_addr (void)
8987 {
8988   if (!tls_get_addr_libfunc)
8989     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8990   return tls_get_addr_libfunc;
8991 }
8992
8993 rtx
8994 arm_load_tp (rtx target)
8995 {
8996   if (!target)
8997     target = gen_reg_rtx (SImode);
8998
8999   if (TARGET_HARD_TP)
9000     {
9001       /* Can return in any reg.  */
9002       emit_insn (gen_load_tp_hard (target));
9003     }
9004   else
9005     {
9006       /* Always returned in r0.  Immediately copy the result into a pseudo,
9007          otherwise other uses of r0 (e.g. setting up function arguments) may
9008          clobber the value.  */
9009
9010       rtx tmp;
9011
9012       if (TARGET_FDPIC)
9013         {
9014           rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9015           rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9016
9017           emit_insn (gen_load_tp_soft_fdpic ());
9018
9019           /* Restore r9.  */
9020           emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9021         }
9022       else
9023         emit_insn (gen_load_tp_soft ());
9024
9025       tmp = gen_rtx_REG (SImode, R0_REGNUM);
9026       emit_move_insn (target, tmp);
9027     }
9028   return target;
9029 }
9030
9031 static rtx
9032 load_tls_operand (rtx x, rtx reg)
9033 {
9034   rtx tmp;
9035
9036   if (reg == NULL_RTX)
9037     reg = gen_reg_rtx (SImode);
9038
9039   tmp = gen_rtx_CONST (SImode, x);
9040
9041   emit_move_insn (reg, tmp);
9042
9043   return reg;
9044 }
9045
9046 static rtx_insn *
9047 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9048 {
9049   rtx label, labelno = NULL_RTX, sum;
9050
9051   gcc_assert (reloc != TLS_DESCSEQ);
9052   start_sequence ();
9053
9054   if (TARGET_FDPIC)
9055     {
9056       sum = gen_rtx_UNSPEC (Pmode,
9057                             gen_rtvec (2, x, GEN_INT (reloc)),
9058                             UNSPEC_TLS);
9059     }
9060   else
9061     {
9062       labelno = GEN_INT (pic_labelno++);
9063       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9064       label = gen_rtx_CONST (VOIDmode, label);
9065
9066       sum = gen_rtx_UNSPEC (Pmode,
9067                             gen_rtvec (4, x, GEN_INT (reloc), label,
9068                                        GEN_INT (TARGET_ARM ? 8 : 4)),
9069                             UNSPEC_TLS);
9070     }
9071   reg = load_tls_operand (sum, reg);
9072
9073   if (TARGET_FDPIC)
9074       emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9075   else if (TARGET_ARM)
9076     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9077   else
9078     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9079
9080   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9081                                      LCT_PURE, /* LCT_CONST?  */
9082                                      Pmode, reg, Pmode);
9083
9084   rtx_insn *insns = get_insns ();
9085   end_sequence ();
9086
9087   return insns;
9088 }
9089
9090 static rtx
9091 arm_tls_descseq_addr (rtx x, rtx reg)
9092 {
9093   rtx labelno = GEN_INT (pic_labelno++);
9094   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9095   rtx sum = gen_rtx_UNSPEC (Pmode,
9096                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9097                                        gen_rtx_CONST (VOIDmode, label),
9098                                        GEN_INT (!TARGET_ARM)),
9099                             UNSPEC_TLS);
9100   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9101
9102   emit_insn (gen_tlscall (x, labelno));
9103   if (!reg)
9104     reg = gen_reg_rtx (SImode);
9105   else
9106     gcc_assert (REGNO (reg) != R0_REGNUM);
9107
9108   emit_move_insn (reg, reg0);
9109
9110   return reg;
9111 }
9112
9113
9114 rtx
9115 legitimize_tls_address (rtx x, rtx reg)
9116 {
9117   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9118   rtx_insn *insns;
9119   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9120
9121   switch (model)
9122     {
9123     case TLS_MODEL_GLOBAL_DYNAMIC:
9124       if (TARGET_GNU2_TLS)
9125         {
9126           gcc_assert (!TARGET_FDPIC);
9127
9128           reg = arm_tls_descseq_addr (x, reg);
9129
9130           tp = arm_load_tp (NULL_RTX);
9131
9132           dest = gen_rtx_PLUS (Pmode, tp, reg);
9133         }
9134       else
9135         {
9136           /* Original scheme */
9137           if (TARGET_FDPIC)
9138             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9139           else
9140             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9141           dest = gen_reg_rtx (Pmode);
9142           emit_libcall_block (insns, dest, ret, x);
9143         }
9144       return dest;
9145
9146     case TLS_MODEL_LOCAL_DYNAMIC:
9147       if (TARGET_GNU2_TLS)
9148         {
9149           gcc_assert (!TARGET_FDPIC);
9150
9151           reg = arm_tls_descseq_addr (x, reg);
9152
9153           tp = arm_load_tp (NULL_RTX);
9154
9155           dest = gen_rtx_PLUS (Pmode, tp, reg);
9156         }
9157       else
9158         {
9159           if (TARGET_FDPIC)
9160             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9161           else
9162             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9163
9164           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9165              share the LDM result with other LD model accesses.  */
9166           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9167                                 UNSPEC_TLS);
9168           dest = gen_reg_rtx (Pmode);
9169           emit_libcall_block (insns, dest, ret, eqv);
9170
9171           /* Load the addend.  */
9172           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9173                                                      GEN_INT (TLS_LDO32)),
9174                                    UNSPEC_TLS);
9175           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9176           dest = gen_rtx_PLUS (Pmode, dest, addend);
9177         }
9178       return dest;
9179
9180     case TLS_MODEL_INITIAL_EXEC:
9181       if (TARGET_FDPIC)
9182         {
9183           sum = gen_rtx_UNSPEC (Pmode,
9184                                 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9185                                 UNSPEC_TLS);
9186           reg = load_tls_operand (sum, reg);
9187           emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9188           emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9189         }
9190       else
9191         {
9192           labelno = GEN_INT (pic_labelno++);
9193           label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9194           label = gen_rtx_CONST (VOIDmode, label);
9195           sum = gen_rtx_UNSPEC (Pmode,
9196                                 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9197                                            GEN_INT (TARGET_ARM ? 8 : 4)),
9198                                 UNSPEC_TLS);
9199           reg = load_tls_operand (sum, reg);
9200
9201           if (TARGET_ARM)
9202             emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9203           else if (TARGET_THUMB2)
9204             emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9205           else
9206             {
9207               emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9208               emit_move_insn (reg, gen_const_mem (SImode, reg));
9209             }
9210         }
9211
9212       tp = arm_load_tp (NULL_RTX);
9213
9214       return gen_rtx_PLUS (Pmode, tp, reg);
9215
9216     case TLS_MODEL_LOCAL_EXEC:
9217       tp = arm_load_tp (NULL_RTX);
9218
9219       reg = gen_rtx_UNSPEC (Pmode,
9220                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9221                             UNSPEC_TLS);
9222       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9223
9224       return gen_rtx_PLUS (Pmode, tp, reg);
9225
9226     default:
9227       abort ();
9228     }
9229 }
9230
9231 /* Try machine-dependent ways of modifying an illegitimate address
9232    to be legitimate.  If we find one, return the new, valid address.  */
9233 rtx
9234 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9235 {
9236   if (arm_tls_referenced_p (x))
9237     {
9238       rtx addend = NULL;
9239
9240       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9241         {
9242           addend = XEXP (XEXP (x, 0), 1);
9243           x = XEXP (XEXP (x, 0), 0);
9244         }
9245
9246       if (GET_CODE (x) != SYMBOL_REF)
9247         return x;
9248
9249       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9250
9251       x = legitimize_tls_address (x, NULL_RTX);
9252
9253       if (addend)
9254         {
9255           x = gen_rtx_PLUS (SImode, x, addend);
9256           orig_x = x;
9257         }
9258       else
9259         return x;
9260     }
9261
9262   if (TARGET_THUMB1)
9263     return thumb_legitimize_address (x, orig_x, mode);
9264
9265   if (GET_CODE (x) == PLUS)
9266     {
9267       rtx xop0 = XEXP (x, 0);
9268       rtx xop1 = XEXP (x, 1);
9269
9270       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9271         xop0 = force_reg (SImode, xop0);
9272
9273       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9274           && !symbol_mentioned_p (xop1))
9275         xop1 = force_reg (SImode, xop1);
9276
9277       if (ARM_BASE_REGISTER_RTX_P (xop0)
9278           && CONST_INT_P (xop1))
9279         {
9280           HOST_WIDE_INT n, low_n;
9281           rtx base_reg, val;
9282           n = INTVAL (xop1);
9283
9284           /* VFP addressing modes actually allow greater offsets, but for
9285              now we just stick with the lowest common denominator.  */
9286           if (mode == DImode || mode == DFmode)
9287             {
9288               low_n = n & 0x0f;
9289               n &= ~0x0f;
9290               if (low_n > 4)
9291                 {
9292                   n += 16;
9293                   low_n -= 16;
9294                 }
9295             }
9296           else
9297             {
9298               low_n = ((mode) == TImode ? 0
9299                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9300               n -= low_n;
9301             }
9302
9303           base_reg = gen_reg_rtx (SImode);
9304           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9305           emit_move_insn (base_reg, val);
9306           x = plus_constant (Pmode, base_reg, low_n);
9307         }
9308       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9309         x = gen_rtx_PLUS (SImode, xop0, xop1);
9310     }
9311
9312   /* XXX We don't allow MINUS any more -- see comment in
9313      arm_legitimate_address_outer_p ().  */
9314   else if (GET_CODE (x) == MINUS)
9315     {
9316       rtx xop0 = XEXP (x, 0);
9317       rtx xop1 = XEXP (x, 1);
9318
9319       if (CONSTANT_P (xop0))
9320         xop0 = force_reg (SImode, xop0);
9321
9322       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9323         xop1 = force_reg (SImode, xop1);
9324
9325       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9326         x = gen_rtx_MINUS (SImode, xop0, xop1);
9327     }
9328
9329   /* Make sure to take full advantage of the pre-indexed addressing mode
9330      with absolute addresses which often allows for the base register to
9331      be factorized for multiple adjacent memory references, and it might
9332      even allows for the mini pool to be avoided entirely. */
9333   else if (CONST_INT_P (x) && optimize > 0)
9334     {
9335       unsigned int bits;
9336       HOST_WIDE_INT mask, base, index;
9337       rtx base_reg;
9338
9339       /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9340          only use a 8-bit index. So let's use a 12-bit index for
9341          SImode only and hope that arm_gen_constant will enable LDRB
9342          to use more bits. */
9343       bits = (mode == SImode) ? 12 : 8;
9344       mask = (1 << bits) - 1;
9345       base = INTVAL (x) & ~mask;
9346       index = INTVAL (x) & mask;
9347       if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9348         {
9349           /* It'll most probably be more efficient to generate the
9350              base with more bits set and use a negative index instead.
9351              Don't do this for Thumb as negative offsets are much more
9352              limited.  */
9353           base |= mask;
9354           index -= mask;
9355         }
9356       base_reg = force_reg (SImode, GEN_INT (base));
9357       x = plus_constant (Pmode, base_reg, index);
9358     }
9359
9360   if (flag_pic)
9361     {
9362       /* We need to find and carefully transform any SYMBOL and LABEL
9363          references; so go back to the original address expression.  */
9364       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9365                                           false /*compute_now*/);
9366
9367       if (new_x != orig_x)
9368         x = new_x;
9369     }
9370
9371   return x;
9372 }
9373
9374
9375 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9376    to be legitimate.  If we find one, return the new, valid address.  */
9377 rtx
9378 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9379 {
9380   if (GET_CODE (x) == PLUS
9381       && CONST_INT_P (XEXP (x, 1))
9382       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9383           || INTVAL (XEXP (x, 1)) < 0))
9384     {
9385       rtx xop0 = XEXP (x, 0);
9386       rtx xop1 = XEXP (x, 1);
9387       HOST_WIDE_INT offset = INTVAL (xop1);
9388
9389       /* Try and fold the offset into a biasing of the base register and
9390          then offsetting that.  Don't do this when optimizing for space
9391          since it can cause too many CSEs.  */
9392       if (optimize_size && offset >= 0
9393           && offset < 256 + 31 * GET_MODE_SIZE (mode))
9394         {
9395           HOST_WIDE_INT delta;
9396
9397           if (offset >= 256)
9398             delta = offset - (256 - GET_MODE_SIZE (mode));
9399           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9400             delta = 31 * GET_MODE_SIZE (mode);
9401           else
9402             delta = offset & (~31 * GET_MODE_SIZE (mode));
9403
9404           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9405                                 NULL_RTX);
9406           x = plus_constant (Pmode, xop0, delta);
9407         }
9408       else if (offset < 0 && offset > -256)
9409         /* Small negative offsets are best done with a subtract before the
9410            dereference, forcing these into a register normally takes two
9411            instructions.  */
9412         x = force_operand (x, NULL_RTX);
9413       else
9414         {
9415           /* For the remaining cases, force the constant into a register.  */
9416           xop1 = force_reg (SImode, xop1);
9417           x = gen_rtx_PLUS (SImode, xop0, xop1);
9418         }
9419     }
9420   else if (GET_CODE (x) == PLUS
9421            && s_register_operand (XEXP (x, 1), SImode)
9422            && !s_register_operand (XEXP (x, 0), SImode))
9423     {
9424       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9425
9426       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9427     }
9428
9429   if (flag_pic)
9430     {
9431       /* We need to find and carefully transform any SYMBOL and LABEL
9432          references; so go back to the original address expression.  */
9433       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9434                                           false /*compute_now*/);
9435
9436       if (new_x != orig_x)
9437         x = new_x;
9438     }
9439
9440   return x;
9441 }
9442
9443 /* Return TRUE if X contains any TLS symbol references.  */
9444
9445 bool
9446 arm_tls_referenced_p (rtx x)
9447 {
9448   if (! TARGET_HAVE_TLS)
9449     return false;
9450
9451   subrtx_iterator::array_type array;
9452   FOR_EACH_SUBRTX (iter, array, x, ALL)
9453     {
9454       const_rtx x = *iter;
9455       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
9456         {
9457           /* ARM currently does not provide relocations to encode TLS variables
9458              into AArch32 instructions, only data, so there is no way to
9459              currently implement these if a literal pool is disabled.  */
9460           if (arm_disable_literal_pool)
9461             sorry ("accessing thread-local storage is not currently supported "
9462                    "with %<-mpure-code%> or %<-mslow-flash-data%>");
9463
9464           return true;
9465         }
9466
9467       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9468          TLS offsets, not real symbol references.  */
9469       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9470         iter.skip_subrtxes ();
9471     }
9472   return false;
9473 }
9474
9475 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9476
9477    On the ARM, allow any integer (invalid ones are removed later by insn
9478    patterns), nice doubles and symbol_refs which refer to the function's
9479    constant pool XXX.
9480
9481    When generating pic allow anything.  */
9482
9483 static bool
9484 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9485 {
9486   return flag_pic || !label_mentioned_p (x);
9487 }
9488
9489 static bool
9490 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9491 {
9492   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9493      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
9494      for ARMv8-M Baseline or later the result is valid.  */
9495   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9496     x = XEXP (x, 0);
9497
9498   return (CONST_INT_P (x)
9499           || CONST_DOUBLE_P (x)
9500           || CONSTANT_ADDRESS_P (x)
9501           || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
9502           || flag_pic);
9503 }
9504
9505 static bool
9506 arm_legitimate_constant_p (machine_mode mode, rtx x)
9507 {
9508   return (!arm_cannot_force_const_mem (mode, x)
9509           && (TARGET_32BIT
9510               ? arm_legitimate_constant_p_1 (mode, x)
9511               : thumb_legitimate_constant_p (mode, x)));
9512 }
9513
9514 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
9515
9516 static bool
9517 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9518 {
9519   rtx base, offset;
9520   split_const (x, &base, &offset);
9521
9522   if (SYMBOL_REF_P (base))
9523     {
9524       /* Function symbols cannot have an offset due to the Thumb bit.  */
9525       if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9526           && INTVAL (offset) != 0)
9527         return true;
9528
9529       if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9530           && !offset_within_block_p (base, INTVAL (offset)))
9531         return true;
9532     }
9533   return arm_tls_referenced_p (x);
9534 }
9535 \f
9536 #define REG_OR_SUBREG_REG(X)                                            \
9537   (REG_P (X)                                                    \
9538    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
9539
9540 #define REG_OR_SUBREG_RTX(X)                    \
9541    (REG_P (X) ? (X) : SUBREG_REG (X))
9542
9543 static inline int
9544 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9545 {
9546   machine_mode mode = GET_MODE (x);
9547   int total, words;
9548
9549   switch (code)
9550     {
9551     case ASHIFT:
9552     case ASHIFTRT:
9553     case LSHIFTRT:
9554     case ROTATERT:
9555       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9556
9557     case PLUS:
9558     case MINUS:
9559     case COMPARE:
9560     case NEG:
9561     case NOT:
9562       return COSTS_N_INSNS (1);
9563
9564     case MULT:
9565       if (arm_arch6m && arm_m_profile_small_mul)
9566         return COSTS_N_INSNS (32);
9567
9568       if (CONST_INT_P (XEXP (x, 1)))
9569         {
9570           int cycles = 0;
9571           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9572
9573           while (i)
9574             {
9575               i >>= 2;
9576               cycles++;
9577             }
9578           return COSTS_N_INSNS (2) + cycles;
9579         }
9580       return COSTS_N_INSNS (1) + 16;
9581
9582     case SET:
9583       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9584          the mode.  */
9585       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9586       return (COSTS_N_INSNS (words)
9587               + 4 * ((MEM_P (SET_SRC (x)))
9588                      + MEM_P (SET_DEST (x))));
9589
9590     case CONST_INT:
9591       if (outer == SET)
9592         {
9593           if (UINTVAL (x) < 256
9594               /* 16-bit constant.  */
9595               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9596             return 0;
9597           if (thumb_shiftable_const (INTVAL (x)))
9598             return COSTS_N_INSNS (2);
9599           return arm_disable_literal_pool
9600             ? COSTS_N_INSNS (8)
9601             : COSTS_N_INSNS (3);
9602         }
9603       else if ((outer == PLUS || outer == COMPARE)
9604                && INTVAL (x) < 256 && INTVAL (x) > -256)
9605         return 0;
9606       else if ((outer == IOR || outer == XOR || outer == AND)
9607                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9608         return COSTS_N_INSNS (1);
9609       else if (outer == AND)
9610         {
9611           int i;
9612           /* This duplicates the tests in the andsi3 expander.  */
9613           for (i = 9; i <= 31; i++)
9614             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9615                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9616               return COSTS_N_INSNS (2);
9617         }
9618       else if (outer == ASHIFT || outer == ASHIFTRT
9619                || outer == LSHIFTRT)
9620         return 0;
9621       return COSTS_N_INSNS (2);
9622
9623     case CONST:
9624     case CONST_DOUBLE:
9625     case LABEL_REF:
9626     case SYMBOL_REF:
9627       return COSTS_N_INSNS (3);
9628
9629     case UDIV:
9630     case UMOD:
9631     case DIV:
9632     case MOD:
9633       return 100;
9634
9635     case TRUNCATE:
9636       return 99;
9637
9638     case AND:
9639     case XOR:
9640     case IOR:
9641       /* XXX guess.  */
9642       return 8;
9643
9644     case MEM:
9645       /* XXX another guess.  */
9646       /* Memory costs quite a lot for the first word, but subsequent words
9647          load at the equivalent of a single insn each.  */
9648       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9649               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9650                  ? 4 : 0));
9651
9652     case IF_THEN_ELSE:
9653       /* XXX a guess.  */
9654       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9655         return 14;
9656       return 2;
9657
9658     case SIGN_EXTEND:
9659     case ZERO_EXTEND:
9660       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9661       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9662
9663       if (mode == SImode)
9664         return total;
9665
9666       if (arm_arch6)
9667         return total + COSTS_N_INSNS (1);
9668
9669       /* Assume a two-shift sequence.  Increase the cost slightly so
9670          we prefer actual shifts over an extend operation.  */
9671       return total + 1 + COSTS_N_INSNS (2);
9672
9673     default:
9674       return 99;
9675     }
9676 }
9677
9678 /* Estimates the size cost of thumb1 instructions.
9679    For now most of the code is copied from thumb1_rtx_costs. We need more
9680    fine grain tuning when we have more related test cases.  */
9681 static inline int
9682 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9683 {
9684   machine_mode mode = GET_MODE (x);
9685   int words, cost;
9686
9687   switch (code)
9688     {
9689     case ASHIFT:
9690     case ASHIFTRT:
9691     case LSHIFTRT:
9692     case ROTATERT:
9693       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9694
9695     case PLUS:
9696     case MINUS:
9697       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9698          defined by RTL expansion, especially for the expansion of
9699          multiplication.  */
9700       if ((GET_CODE (XEXP (x, 0)) == MULT
9701            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9702           || (GET_CODE (XEXP (x, 1)) == MULT
9703               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9704         return COSTS_N_INSNS (2);
9705       /* Fall through.  */
9706     case COMPARE:
9707     case NEG:
9708     case NOT:
9709       return COSTS_N_INSNS (1);
9710
9711     case MULT:
9712       if (CONST_INT_P (XEXP (x, 1)))
9713         {
9714           /* Thumb1 mul instruction can't operate on const. We must Load it
9715              into a register first.  */
9716           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9717           /* For the targets which have a very small and high-latency multiply
9718              unit, we prefer to synthesize the mult with up to 5 instructions,
9719              giving a good balance between size and performance.  */
9720           if (arm_arch6m && arm_m_profile_small_mul)
9721             return COSTS_N_INSNS (5);
9722           else
9723             return COSTS_N_INSNS (1) + const_size;
9724         }
9725       return COSTS_N_INSNS (1);
9726
9727     case SET:
9728       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9729          the mode.  */
9730       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9731       cost = COSTS_N_INSNS (words);
9732       if (satisfies_constraint_J (SET_SRC (x))
9733           || satisfies_constraint_K (SET_SRC (x))
9734              /* Too big an immediate for a 2-byte mov, using MOVT.  */
9735           || (CONST_INT_P (SET_SRC (x))
9736               && UINTVAL (SET_SRC (x)) >= 256
9737               && TARGET_HAVE_MOVT
9738               && satisfies_constraint_j (SET_SRC (x)))
9739              /* thumb1_movdi_insn.  */
9740           || ((words > 1) && MEM_P (SET_SRC (x))))
9741         cost += COSTS_N_INSNS (1);
9742       return cost;
9743
9744     case CONST_INT:
9745       if (outer == SET)
9746         {
9747           if (UINTVAL (x) < 256)
9748             return COSTS_N_INSNS (1);
9749           /* movw is 4byte long.  */
9750           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9751             return COSTS_N_INSNS (2);
9752           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9753           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9754             return COSTS_N_INSNS (2);
9755           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9756           if (thumb_shiftable_const (INTVAL (x)))
9757             return COSTS_N_INSNS (2);
9758           return arm_disable_literal_pool
9759             ? COSTS_N_INSNS (8)
9760             : COSTS_N_INSNS (3);
9761         }
9762       else if ((outer == PLUS || outer == COMPARE)
9763                && INTVAL (x) < 256 && INTVAL (x) > -256)
9764         return 0;
9765       else if ((outer == IOR || outer == XOR || outer == AND)
9766                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9767         return COSTS_N_INSNS (1);
9768       else if (outer == AND)
9769         {
9770           int i;
9771           /* This duplicates the tests in the andsi3 expander.  */
9772           for (i = 9; i <= 31; i++)
9773             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9774                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9775               return COSTS_N_INSNS (2);
9776         }
9777       else if (outer == ASHIFT || outer == ASHIFTRT
9778                || outer == LSHIFTRT)
9779         return 0;
9780       return COSTS_N_INSNS (2);
9781
9782     case CONST:
9783     case CONST_DOUBLE:
9784     case LABEL_REF:
9785     case SYMBOL_REF:
9786       return COSTS_N_INSNS (3);
9787
9788     case UDIV:
9789     case UMOD:
9790     case DIV:
9791     case MOD:
9792       return 100;
9793
9794     case TRUNCATE:
9795       return 99;
9796
9797     case AND:
9798     case XOR:
9799     case IOR:
9800       return COSTS_N_INSNS (1);
9801
9802     case MEM:
9803       return (COSTS_N_INSNS (1)
9804               + COSTS_N_INSNS (1)
9805                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9806               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9807                  ? COSTS_N_INSNS (1) : 0));
9808
9809     case IF_THEN_ELSE:
9810       /* XXX a guess.  */
9811       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9812         return 14;
9813       return 2;
9814
9815     case ZERO_EXTEND:
9816       /* XXX still guessing.  */
9817       switch (GET_MODE (XEXP (x, 0)))
9818         {
9819           case E_QImode:
9820             return (1 + (mode == DImode ? 4 : 0)
9821                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9822
9823           case E_HImode:
9824             return (4 + (mode == DImode ? 4 : 0)
9825                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9826
9827           case E_SImode:
9828             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9829
9830           default:
9831             return 99;
9832         }
9833
9834     default:
9835       return 99;
9836     }
9837 }
9838
9839 /* Helper function for arm_rtx_costs.  If one operand of the OP, a
9840    PLUS, adds the carry flag, then return the other operand.  If
9841    neither is a carry, return OP unchanged.  */
9842 static rtx
9843 strip_carry_operation (rtx op)
9844 {
9845   gcc_assert (GET_CODE (op) == PLUS);
9846   if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
9847     return XEXP (op, 1);
9848   else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
9849     return XEXP (op, 0);
9850   return op;
9851 }
9852
9853 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9854    operand, then return the operand that is being shifted.  If the shift
9855    is not by a constant, then set SHIFT_REG to point to the operand.
9856    Return NULL if OP is not a shifter operand.  */
9857 static rtx
9858 shifter_op_p (rtx op, rtx *shift_reg)
9859 {
9860   enum rtx_code code = GET_CODE (op);
9861
9862   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9863       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9864     return XEXP (op, 0);
9865   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9866     return XEXP (op, 0);
9867   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9868            || code == ASHIFTRT)
9869     {
9870       if (!CONST_INT_P (XEXP (op, 1)))
9871         *shift_reg = XEXP (op, 1);
9872       return XEXP (op, 0);
9873     }
9874
9875   return NULL;
9876 }
9877
9878 static bool
9879 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9880 {
9881   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9882   rtx_code code = GET_CODE (x);
9883   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9884
9885   switch (XINT (x, 1))
9886     {
9887     case UNSPEC_UNALIGNED_LOAD:
9888       /* We can only do unaligned loads into the integer unit, and we can't
9889          use LDM or LDRD.  */
9890       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9891       if (speed_p)
9892         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9893                   + extra_cost->ldst.load_unaligned);
9894
9895 #ifdef NOT_YET
9896       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9897                                  ADDR_SPACE_GENERIC, speed_p);
9898 #endif
9899       return true;
9900
9901     case UNSPEC_UNALIGNED_STORE:
9902       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9903       if (speed_p)
9904         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9905                   + extra_cost->ldst.store_unaligned);
9906
9907       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9908 #ifdef NOT_YET
9909       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9910                                  ADDR_SPACE_GENERIC, speed_p);
9911 #endif
9912       return true;
9913
9914     case UNSPEC_VRINTZ:
9915     case UNSPEC_VRINTP:
9916     case UNSPEC_VRINTM:
9917     case UNSPEC_VRINTR:
9918     case UNSPEC_VRINTX:
9919     case UNSPEC_VRINTA:
9920       if (speed_p)
9921         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9922
9923       return true;
9924     default:
9925       *cost = COSTS_N_INSNS (2);
9926       break;
9927     }
9928   return true;
9929 }
9930
9931 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9932    call (one insn for -Os) and then one for processing the result.  */
9933 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9934
9935 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9936         do                                                              \
9937           {                                                             \
9938             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9939             if (shift_op != NULL                                        \
9940                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9941               {                                                         \
9942                 if (shift_reg)                                          \
9943                   {                                                     \
9944                     if (speed_p)                                        \
9945                       *cost += extra_cost->alu.arith_shift_reg;         \
9946                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9947                                        ASHIFT, 1, speed_p);             \
9948                   }                                                     \
9949                 else if (speed_p)                                       \
9950                   *cost += extra_cost->alu.arith_shift;                 \
9951                                                                         \
9952                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
9953                                     ASHIFT, 0, speed_p)                 \
9954                           + rtx_cost (XEXP (x, 1 - IDX),                \
9955                                       GET_MODE (shift_op),              \
9956                                       OP, 1, speed_p));                 \
9957                 return true;                                            \
9958               }                                                         \
9959           }                                                             \
9960         while (0)
9961
9962 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
9963    considering the costs of the addressing mode and memory access
9964    separately.  */
9965 static bool
9966 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9967                int *cost, bool speed_p)
9968 {
9969   machine_mode mode = GET_MODE (x);
9970
9971   *cost = COSTS_N_INSNS (1);
9972
9973   if (flag_pic
9974       && GET_CODE (XEXP (x, 0)) == PLUS
9975       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9976     /* This will be split into two instructions.  Add the cost of the
9977        additional instruction here.  The cost of the memory access is computed
9978        below.  See arm.md:calculate_pic_address.  */
9979     *cost += COSTS_N_INSNS (1);
9980
9981   /* Calculate cost of the addressing mode.  */
9982   if (speed_p)
9983     {
9984       arm_addr_mode_op op_type;
9985       switch (GET_CODE (XEXP (x, 0)))
9986         {
9987         default:
9988         case REG:
9989           op_type = AMO_DEFAULT;
9990           break;
9991         case MINUS:
9992           /* MINUS does not appear in RTL, but the architecture supports it,
9993              so handle this case defensively.  */
9994           /* fall through */
9995         case PLUS:
9996           op_type = AMO_NO_WB;
9997           break;
9998         case PRE_INC:
9999         case PRE_DEC:
10000         case POST_INC:
10001         case POST_DEC:
10002         case PRE_MODIFY:
10003         case POST_MODIFY:
10004           op_type = AMO_WB;
10005           break;
10006         }
10007
10008       if (VECTOR_MODE_P (mode))
10009           *cost += current_tune->addr_mode_costs->vector[op_type];
10010       else if (FLOAT_MODE_P (mode))
10011           *cost += current_tune->addr_mode_costs->fp[op_type];
10012       else
10013           *cost += current_tune->addr_mode_costs->integer[op_type];
10014     }
10015
10016   /* Calculate cost of memory access.  */
10017   if (speed_p)
10018     {
10019       if (FLOAT_MODE_P (mode))
10020         {
10021           if (GET_MODE_SIZE (mode) == 8)
10022             *cost += extra_cost->ldst.loadd;
10023           else
10024             *cost += extra_cost->ldst.loadf;
10025         }
10026       else if (VECTOR_MODE_P (mode))
10027         *cost += extra_cost->ldst.loadv;
10028       else
10029         {
10030           /* Integer modes */
10031           if (GET_MODE_SIZE (mode) == 8)
10032             *cost += extra_cost->ldst.ldrd;
10033           else
10034             *cost += extra_cost->ldst.load;
10035         }
10036     }
10037
10038   return true;
10039 }
10040
10041 /* RTX costs.  Make an estimate of the cost of executing the operation
10042    X, which is contained within an operation with code OUTER_CODE.
10043    SPEED_P indicates whether the cost desired is the performance cost,
10044    or the size cost.  The estimate is stored in COST and the return
10045    value is TRUE if the cost calculation is final, or FALSE if the
10046    caller should recurse through the operands of X to add additional
10047    costs.
10048
10049    We currently make no attempt to model the size savings of Thumb-2
10050    16-bit instructions.  At the normal points in compilation where
10051    this code is called we have no measure of whether the condition
10052    flags are live or not, and thus no realistic way to determine what
10053    the size will eventually be.  */
10054 static bool
10055 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10056                    const struct cpu_cost_table *extra_cost,
10057                    int *cost, bool speed_p)
10058 {
10059   machine_mode mode = GET_MODE (x);
10060
10061   *cost = COSTS_N_INSNS (1);
10062
10063   if (TARGET_THUMB1)
10064     {
10065       if (speed_p)
10066         *cost = thumb1_rtx_costs (x, code, outer_code);
10067       else
10068         *cost = thumb1_size_rtx_costs (x, code, outer_code);
10069       return true;
10070     }
10071
10072   switch (code)
10073     {
10074     case SET:
10075       *cost = 0;
10076       /* SET RTXs don't have a mode so we get it from the destination.  */
10077       mode = GET_MODE (SET_DEST (x));
10078
10079       if (REG_P (SET_SRC (x))
10080           && REG_P (SET_DEST (x)))
10081         {
10082           /* Assume that most copies can be done with a single insn,
10083              unless we don't have HW FP, in which case everything
10084              larger than word mode will require two insns.  */
10085           *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10086                                    && GET_MODE_SIZE (mode) > 4)
10087                                   || mode == DImode)
10088                                  ? 2 : 1);
10089           /* Conditional register moves can be encoded
10090              in 16 bits in Thumb mode.  */
10091           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10092             *cost >>= 1;
10093
10094           return true;
10095         }
10096
10097       if (CONST_INT_P (SET_SRC (x)))
10098         {
10099           /* Handle CONST_INT here, since the value doesn't have a mode
10100              and we would otherwise be unable to work out the true cost.  */
10101           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10102                             0, speed_p);
10103           outer_code = SET;
10104           /* Slightly lower the cost of setting a core reg to a constant.
10105              This helps break up chains and allows for better scheduling.  */
10106           if (REG_P (SET_DEST (x))
10107               && REGNO (SET_DEST (x)) <= LR_REGNUM)
10108             *cost -= 1;
10109           x = SET_SRC (x);
10110           /* Immediate moves with an immediate in the range [0, 255] can be
10111              encoded in 16 bits in Thumb mode.  */
10112           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10113               && INTVAL (x) >= 0 && INTVAL (x) <=255)
10114             *cost >>= 1;
10115           goto const_int_cost;
10116         }
10117
10118       return false;
10119
10120     case MEM:
10121       return arm_mem_costs (x, extra_cost, cost, speed_p);
10122
10123     case PARALLEL:
10124     {
10125    /* Calculations of LDM costs are complex.  We assume an initial cost
10126    (ldm_1st) which will load the number of registers mentioned in
10127    ldm_regs_per_insn_1st registers; then each additional
10128    ldm_regs_per_insn_subsequent registers cost one more insn.  The
10129    formula for N regs is thus:
10130
10131    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10132                              + ldm_regs_per_insn_subsequent - 1)
10133                             / ldm_regs_per_insn_subsequent).
10134
10135    Additional costs may also be added for addressing.  A similar
10136    formula is used for STM.  */
10137
10138       bool is_ldm = load_multiple_operation (x, SImode);
10139       bool is_stm = store_multiple_operation (x, SImode);
10140
10141       if (is_ldm || is_stm)
10142         {
10143           if (speed_p)
10144             {
10145               HOST_WIDE_INT nregs = XVECLEN (x, 0);
10146               HOST_WIDE_INT regs_per_insn_1st = is_ldm
10147                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
10148                                       : extra_cost->ldst.stm_regs_per_insn_1st;
10149               HOST_WIDE_INT regs_per_insn_sub = is_ldm
10150                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10151                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
10152
10153               *cost += regs_per_insn_1st
10154                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10155                                             + regs_per_insn_sub - 1)
10156                                           / regs_per_insn_sub);
10157               return true;
10158             }
10159
10160         }
10161       return false;
10162     }
10163     case DIV:
10164     case UDIV:
10165       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10166           && (mode == SFmode || !TARGET_VFP_SINGLE))
10167         *cost += COSTS_N_INSNS (speed_p
10168                                ? extra_cost->fp[mode != SFmode].div : 0);
10169       else if (mode == SImode && TARGET_IDIV)
10170         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10171       else
10172         *cost = LIBCALL_COST (2);
10173
10174       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10175          possible udiv is prefered.  */
10176       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10177       return false;     /* All arguments must be in registers.  */
10178
10179     case MOD:
10180       /* MOD by a power of 2 can be expanded as:
10181          rsbs    r1, r0, #0
10182          and     r0, r0, #(n - 1)
10183          and     r1, r1, #(n - 1)
10184          rsbpl   r0, r1, #0.  */
10185       if (CONST_INT_P (XEXP (x, 1))
10186           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10187           && mode == SImode)
10188         {
10189           *cost += COSTS_N_INSNS (3);
10190
10191           if (speed_p)
10192             *cost += 2 * extra_cost->alu.logical
10193                      + extra_cost->alu.arith;
10194           return true;
10195         }
10196
10197     /* Fall-through.  */
10198     case UMOD:
10199       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10200          possible udiv is prefered.  */
10201       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10202       return false;     /* All arguments must be in registers.  */
10203
10204     case ROTATE:
10205       if (mode == SImode && REG_P (XEXP (x, 1)))
10206         {
10207           *cost += (COSTS_N_INSNS (1)
10208                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10209           if (speed_p)
10210             *cost += extra_cost->alu.shift_reg;
10211           return true;
10212         }
10213       /* Fall through */
10214     case ROTATERT:
10215     case ASHIFT:
10216     case LSHIFTRT:
10217     case ASHIFTRT:
10218       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10219         {
10220           *cost += (COSTS_N_INSNS (2)
10221                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10222           if (speed_p)
10223             *cost += 2 * extra_cost->alu.shift;
10224           /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
10225           if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10226             *cost += 1;
10227           return true;
10228         }
10229       else if (mode == SImode)
10230         {
10231           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10232           /* Slightly disparage register shifts at -Os, but not by much.  */
10233           if (!CONST_INT_P (XEXP (x, 1)))
10234             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10235                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10236           return true;
10237         }
10238       else if (GET_MODE_CLASS (mode) == MODE_INT
10239                && GET_MODE_SIZE (mode) < 4)
10240         {
10241           if (code == ASHIFT)
10242             {
10243               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10244               /* Slightly disparage register shifts at -Os, but not by
10245                  much.  */
10246               if (!CONST_INT_P (XEXP (x, 1)))
10247                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10248                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10249             }
10250           else if (code == LSHIFTRT || code == ASHIFTRT)
10251             {
10252               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10253                 {
10254                   /* Can use SBFX/UBFX.  */
10255                   if (speed_p)
10256                     *cost += extra_cost->alu.bfx;
10257                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10258                 }
10259               else
10260                 {
10261                   *cost += COSTS_N_INSNS (1);
10262                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10263                   if (speed_p)
10264                     {
10265                       if (CONST_INT_P (XEXP (x, 1)))
10266                         *cost += 2 * extra_cost->alu.shift;
10267                       else
10268                         *cost += (extra_cost->alu.shift
10269                                   + extra_cost->alu.shift_reg);
10270                     }
10271                   else
10272                     /* Slightly disparage register shifts.  */
10273                     *cost += !CONST_INT_P (XEXP (x, 1));
10274                 }
10275             }
10276           else /* Rotates.  */
10277             {
10278               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10279               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10280               if (speed_p)
10281                 {
10282                   if (CONST_INT_P (XEXP (x, 1)))
10283                     *cost += (2 * extra_cost->alu.shift
10284                               + extra_cost->alu.log_shift);
10285                   else
10286                     *cost += (extra_cost->alu.shift
10287                               + extra_cost->alu.shift_reg
10288                               + extra_cost->alu.log_shift_reg);
10289                 }
10290             }
10291           return true;
10292         }
10293
10294       *cost = LIBCALL_COST (2);
10295       return false;
10296
10297     case BSWAP:
10298       if (arm_arch6)
10299         {
10300           if (mode == SImode)
10301             {
10302               if (speed_p)
10303                 *cost += extra_cost->alu.rev;
10304
10305               return false;
10306             }
10307         }
10308       else
10309         {
10310         /* No rev instruction available.  Look at arm_legacy_rev
10311            and thumb_legacy_rev for the form of RTL used then.  */
10312           if (TARGET_THUMB)
10313             {
10314               *cost += COSTS_N_INSNS (9);
10315
10316               if (speed_p)
10317                 {
10318                   *cost += 6 * extra_cost->alu.shift;
10319                   *cost += 3 * extra_cost->alu.logical;
10320                 }
10321             }
10322           else
10323             {
10324               *cost += COSTS_N_INSNS (4);
10325
10326               if (speed_p)
10327                 {
10328                   *cost += 2 * extra_cost->alu.shift;
10329                   *cost += extra_cost->alu.arith_shift;
10330                   *cost += 2 * extra_cost->alu.logical;
10331                 }
10332             }
10333           return true;
10334         }
10335       return false;
10336
10337     case MINUS:
10338       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10339           && (mode == SFmode || !TARGET_VFP_SINGLE))
10340         {
10341           if (GET_CODE (XEXP (x, 0)) == MULT
10342               || GET_CODE (XEXP (x, 1)) == MULT)
10343             {
10344               rtx mul_op0, mul_op1, sub_op;
10345
10346               if (speed_p)
10347                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10348
10349               if (GET_CODE (XEXP (x, 0)) == MULT)
10350                 {
10351                   mul_op0 = XEXP (XEXP (x, 0), 0);
10352                   mul_op1 = XEXP (XEXP (x, 0), 1);
10353                   sub_op = XEXP (x, 1);
10354                 }
10355               else
10356                 {
10357                   mul_op0 = XEXP (XEXP (x, 1), 0);
10358                   mul_op1 = XEXP (XEXP (x, 1), 1);
10359                   sub_op = XEXP (x, 0);
10360                 }
10361
10362               /* The first operand of the multiply may be optionally
10363                  negated.  */
10364               if (GET_CODE (mul_op0) == NEG)
10365                 mul_op0 = XEXP (mul_op0, 0);
10366
10367               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10368                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10369                         + rtx_cost (sub_op, mode, code, 0, speed_p));
10370
10371               return true;
10372             }
10373
10374           if (speed_p)
10375             *cost += extra_cost->fp[mode != SFmode].addsub;
10376           return false;
10377         }
10378
10379       if (mode == SImode)
10380         {
10381           rtx shift_by_reg = NULL;
10382           rtx shift_op;
10383           rtx non_shift_op;
10384           rtx op0 = XEXP (x, 0);
10385           rtx op1 = XEXP (x, 1);
10386
10387           /* Factor out any borrow operation.  There's more than one way
10388              of expressing this; try to recognize them all.  */
10389           if (GET_CODE (op0) == MINUS)
10390             {
10391               if (arm_borrow_operation (op1, SImode))
10392                 {
10393                   op1 = XEXP (op0, 1);
10394                   op0 = XEXP (op0, 0);
10395                 }
10396               else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10397                 op0 = XEXP (op0, 0);
10398             }
10399           else if (GET_CODE (op1) == PLUS
10400                    && arm_borrow_operation (XEXP (op1, 0), SImode))
10401             op1 = XEXP (op1, 0);
10402           else if (GET_CODE (op0) == NEG
10403                    && arm_borrow_operation (op1, SImode))
10404             {
10405               /* Negate with carry-in.  For Thumb2 this is done with
10406                  SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10407                  RSC instruction that exists in Arm mode.  */
10408               if (speed_p)
10409                 *cost += (TARGET_THUMB2
10410                           ? extra_cost->alu.arith_shift
10411                           : extra_cost->alu.arith);
10412               *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10413               return true;
10414             }
10415           /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10416              Note we do mean ~borrow here.  */
10417           else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10418             {
10419               *cost += rtx_cost (op1, mode, code, 1, speed_p);
10420               return true;
10421             }
10422
10423           shift_op = shifter_op_p (op0, &shift_by_reg);
10424           if (shift_op == NULL)
10425             {
10426               shift_op = shifter_op_p (op1, &shift_by_reg);
10427               non_shift_op = op0;
10428             }
10429           else
10430             non_shift_op = op1;
10431
10432           if (shift_op != NULL)
10433             {
10434               if (shift_by_reg != NULL)
10435                 {
10436                   if (speed_p)
10437                     *cost += extra_cost->alu.arith_shift_reg;
10438                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10439                 }
10440               else if (speed_p)
10441                 *cost += extra_cost->alu.arith_shift;
10442
10443               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10444               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10445               return true;
10446             }
10447
10448           if (arm_arch_thumb2
10449               && GET_CODE (XEXP (x, 1)) == MULT)
10450             {
10451               /* MLS.  */
10452               if (speed_p)
10453                 *cost += extra_cost->mult[0].add;
10454               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10455               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10456               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10457               return true;
10458             }
10459
10460           if (CONST_INT_P (op0))
10461             {
10462               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10463                                             INTVAL (op0), NULL_RTX,
10464                                             NULL_RTX, 1, 0);
10465               *cost = COSTS_N_INSNS (insns);
10466               if (speed_p)
10467                 *cost += insns * extra_cost->alu.arith;
10468               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10469               return true;
10470             }
10471           else if (speed_p)
10472             *cost += extra_cost->alu.arith;
10473
10474           /* Don't recurse as we don't want to cost any borrow that
10475              we've stripped.  */
10476           *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10477           *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10478           return true;
10479         }
10480
10481       if (GET_MODE_CLASS (mode) == MODE_INT
10482           && GET_MODE_SIZE (mode) < 4)
10483         {
10484           rtx shift_op, shift_reg;
10485           shift_reg = NULL;
10486
10487           /* We check both sides of the MINUS for shifter operands since,
10488              unlike PLUS, it's not commutative.  */
10489
10490           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10491           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10492
10493           /* Slightly disparage, as we might need to widen the result.  */
10494           *cost += 1;
10495           if (speed_p)
10496             *cost += extra_cost->alu.arith;
10497
10498           if (CONST_INT_P (XEXP (x, 0)))
10499             {
10500               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10501               return true;
10502             }
10503
10504           return false;
10505         }
10506
10507       if (mode == DImode)
10508         {
10509           *cost += COSTS_N_INSNS (1);
10510
10511           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10512             {
10513               rtx op1 = XEXP (x, 1);
10514
10515               if (speed_p)
10516                 *cost += 2 * extra_cost->alu.arith;
10517
10518               if (GET_CODE (op1) == ZERO_EXTEND)
10519                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10520                                    0, speed_p);
10521               else
10522                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10523               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10524                                  0, speed_p);
10525               return true;
10526             }
10527           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10528             {
10529               if (speed_p)
10530                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10531               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10532                                   0, speed_p)
10533                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10534               return true;
10535             }
10536           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10537                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10538             {
10539               if (speed_p)
10540                 *cost += (extra_cost->alu.arith
10541                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10542                              ? extra_cost->alu.arith
10543                              : extra_cost->alu.arith_shift));
10544               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10545                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10546                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
10547               return true;
10548             }
10549
10550           if (speed_p)
10551             *cost += 2 * extra_cost->alu.arith;
10552           return false;
10553         }
10554
10555       /* Vector mode?  */
10556
10557       *cost = LIBCALL_COST (2);
10558       return false;
10559
10560     case PLUS:
10561       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10562           && (mode == SFmode || !TARGET_VFP_SINGLE))
10563         {
10564           if (GET_CODE (XEXP (x, 0)) == MULT)
10565             {
10566               rtx mul_op0, mul_op1, add_op;
10567
10568               if (speed_p)
10569                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10570
10571               mul_op0 = XEXP (XEXP (x, 0), 0);
10572               mul_op1 = XEXP (XEXP (x, 0), 1);
10573               add_op = XEXP (x, 1);
10574
10575               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10576                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10577                         + rtx_cost (add_op, mode, code, 0, speed_p));
10578
10579               return true;
10580             }
10581
10582           if (speed_p)
10583             *cost += extra_cost->fp[mode != SFmode].addsub;
10584           return false;
10585         }
10586       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10587         {
10588           *cost = LIBCALL_COST (2);
10589           return false;
10590         }
10591
10592         /* Narrow modes can be synthesized in SImode, but the range
10593            of useful sub-operations is limited.  Check for shift operations
10594            on one of the operands.  Only left shifts can be used in the
10595            narrow modes.  */
10596       if (GET_MODE_CLASS (mode) == MODE_INT
10597           && GET_MODE_SIZE (mode) < 4)
10598         {
10599           rtx shift_op, shift_reg;
10600           shift_reg = NULL;
10601
10602           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10603
10604           if (CONST_INT_P (XEXP (x, 1)))
10605             {
10606               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10607                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10608                                             NULL_RTX, 1, 0);
10609               *cost = COSTS_N_INSNS (insns);
10610               if (speed_p)
10611                 *cost += insns * extra_cost->alu.arith;
10612               /* Slightly penalize a narrow operation as the result may
10613                  need widening.  */
10614               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10615               return true;
10616             }
10617
10618           /* Slightly penalize a narrow operation as the result may
10619              need widening.  */
10620           *cost += 1;
10621           if (speed_p)
10622             *cost += extra_cost->alu.arith;
10623
10624           return false;
10625         }
10626
10627       if (mode == SImode)
10628         {
10629           rtx shift_op, shift_reg;
10630
10631           if (TARGET_INT_SIMD
10632               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10633                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10634             {
10635               /* UXTA[BH] or SXTA[BH].  */
10636               if (speed_p)
10637                 *cost += extra_cost->alu.extend_arith;
10638               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10639                                   0, speed_p)
10640                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10641               return true;
10642             }
10643
10644           rtx op0 = XEXP (x, 0);
10645           rtx op1 = XEXP (x, 1);
10646
10647           /* Handle a side effect of adding in the carry to an addition.  */
10648           if (GET_CODE (op0) == PLUS
10649               && arm_carry_operation (op1, mode))
10650             {
10651               op1 = XEXP (op0, 1);
10652               op0 = XEXP (op0, 0);
10653             }
10654           else if (GET_CODE (op1) == PLUS
10655                    && arm_carry_operation (op0, mode))
10656             {
10657               op0 = XEXP (op1, 0);
10658               op1 = XEXP (op1, 1);
10659             }
10660           else if (GET_CODE (op0) == PLUS)
10661             {
10662               op0 = strip_carry_operation (op0);
10663               if (swap_commutative_operands_p (op0, op1))
10664                 std::swap (op0, op1);
10665             }
10666
10667           if (arm_carry_operation (op0, mode))
10668             {
10669               /* Adding the carry to a register is a canonicalization of
10670                  adding 0 to the register plus the carry.  */
10671               if (speed_p)
10672                 *cost += extra_cost->alu.arith;
10673               *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10674               return true;
10675             }
10676
10677           shift_reg = NULL;
10678           shift_op = shifter_op_p (op0, &shift_reg);
10679           if (shift_op != NULL)
10680             {
10681               if (shift_reg)
10682                 {
10683                   if (speed_p)
10684                     *cost += extra_cost->alu.arith_shift_reg;
10685                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10686                 }
10687               else if (speed_p)
10688                 *cost += extra_cost->alu.arith_shift;
10689
10690               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10691                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
10692               return true;
10693             }
10694
10695           if (GET_CODE (op0) == MULT)
10696             {
10697               rtx mul_op = op0;
10698
10699               if (TARGET_DSP_MULTIPLY
10700                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10701                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10702                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10703                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10704                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10705                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10706                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10707                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10708                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10709                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10710                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10711                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10712                                       == 16))))))
10713                 {
10714                   /* SMLA[BT][BT].  */
10715                   if (speed_p)
10716                     *cost += extra_cost->mult[0].extend_add;
10717                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10718                                       SIGN_EXTEND, 0, speed_p)
10719                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10720                                         SIGN_EXTEND, 0, speed_p)
10721                             + rtx_cost (op1, mode, PLUS, 1, speed_p));
10722                   return true;
10723                 }
10724
10725               if (speed_p)
10726                 *cost += extra_cost->mult[0].add;
10727               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10728                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10729                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
10730               return true;
10731             }
10732
10733           if (CONST_INT_P (op1))
10734             {
10735               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10736                                             INTVAL (op1), NULL_RTX,
10737                                             NULL_RTX, 1, 0);
10738               *cost = COSTS_N_INSNS (insns);
10739               if (speed_p)
10740                 *cost += insns * extra_cost->alu.arith;
10741               *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10742               return true;
10743             }
10744
10745           if (speed_p)
10746             *cost += extra_cost->alu.arith;
10747
10748           /* Don't recurse here because we want to test the operands
10749              without any carry operation.  */
10750           *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10751           *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10752           return true;
10753         }
10754
10755       if (mode == DImode)
10756         {
10757           if (GET_CODE (XEXP (x, 0)) == MULT
10758               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10759                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10760                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10761                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10762             {
10763               if (speed_p)
10764                 *cost += extra_cost->mult[1].extend_add;
10765               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10766                                   ZERO_EXTEND, 0, speed_p)
10767                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10768                                     ZERO_EXTEND, 0, speed_p)
10769                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10770               return true;
10771             }
10772
10773           *cost += COSTS_N_INSNS (1);
10774
10775           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10776               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10777             {
10778               if (speed_p)
10779                 *cost += (extra_cost->alu.arith
10780                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10781                              ? extra_cost->alu.arith
10782                              : extra_cost->alu.arith_shift));
10783
10784               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10785                                   0, speed_p)
10786                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10787               return true;
10788             }
10789
10790           if (speed_p)
10791             *cost += 2 * extra_cost->alu.arith;
10792           return false;
10793         }
10794
10795       /* Vector mode?  */
10796       *cost = LIBCALL_COST (2);
10797       return false;
10798     case IOR:
10799       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10800         {
10801           if (speed_p)
10802             *cost += extra_cost->alu.rev;
10803
10804           return true;
10805         }
10806     /* Fall through.  */
10807     case AND: case XOR:
10808       if (mode == SImode)
10809         {
10810           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10811           rtx op0 = XEXP (x, 0);
10812           rtx shift_op, shift_reg;
10813
10814           if (subcode == NOT
10815               && (code == AND
10816                   || (code == IOR && TARGET_THUMB2)))
10817             op0 = XEXP (op0, 0);
10818
10819           shift_reg = NULL;
10820           shift_op = shifter_op_p (op0, &shift_reg);
10821           if (shift_op != NULL)
10822             {
10823               if (shift_reg)
10824                 {
10825                   if (speed_p)
10826                     *cost += extra_cost->alu.log_shift_reg;
10827                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10828                 }
10829               else if (speed_p)
10830                 *cost += extra_cost->alu.log_shift;
10831
10832               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10833                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10834               return true;
10835             }
10836
10837           if (CONST_INT_P (XEXP (x, 1)))
10838             {
10839               int insns = arm_gen_constant (code, SImode, NULL_RTX,
10840                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10841                                             NULL_RTX, 1, 0);
10842
10843               *cost = COSTS_N_INSNS (insns);
10844               if (speed_p)
10845                 *cost += insns * extra_cost->alu.logical;
10846               *cost += rtx_cost (op0, mode, code, 0, speed_p);
10847               return true;
10848             }
10849
10850           if (speed_p)
10851             *cost += extra_cost->alu.logical;
10852           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10853                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10854           return true;
10855         }
10856
10857       if (mode == DImode)
10858         {
10859           rtx op0 = XEXP (x, 0);
10860           enum rtx_code subcode = GET_CODE (op0);
10861
10862           *cost += COSTS_N_INSNS (1);
10863
10864           if (subcode == NOT
10865               && (code == AND
10866                   || (code == IOR && TARGET_THUMB2)))
10867             op0 = XEXP (op0, 0);
10868
10869           if (GET_CODE (op0) == ZERO_EXTEND)
10870             {
10871               if (speed_p)
10872                 *cost += 2 * extra_cost->alu.logical;
10873
10874               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10875                                   0, speed_p)
10876                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10877               return true;
10878             }
10879           else if (GET_CODE (op0) == SIGN_EXTEND)
10880             {
10881               if (speed_p)
10882                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10883
10884               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10885                                   0, speed_p)
10886                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10887               return true;
10888             }
10889
10890           if (speed_p)
10891             *cost += 2 * extra_cost->alu.logical;
10892
10893           return true;
10894         }
10895       /* Vector mode?  */
10896
10897       *cost = LIBCALL_COST (2);
10898       return false;
10899
10900     case MULT:
10901       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10902           && (mode == SFmode || !TARGET_VFP_SINGLE))
10903         {
10904           rtx op0 = XEXP (x, 0);
10905
10906           if (GET_CODE (op0) == NEG && !flag_rounding_math)
10907             op0 = XEXP (op0, 0);
10908
10909           if (speed_p)
10910             *cost += extra_cost->fp[mode != SFmode].mult;
10911
10912           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10913                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10914           return true;
10915         }
10916       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10917         {
10918           *cost = LIBCALL_COST (2);
10919           return false;
10920         }
10921
10922       if (mode == SImode)
10923         {
10924           if (TARGET_DSP_MULTIPLY
10925               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10926                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10927                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10928                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10929                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10930                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10931                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10932                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10933                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10934                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10935                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10936                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10937                                   == 16))))))
10938             {
10939               /* SMUL[TB][TB].  */
10940               if (speed_p)
10941                 *cost += extra_cost->mult[0].extend;
10942               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10943                                  SIGN_EXTEND, 0, speed_p);
10944               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10945                                  SIGN_EXTEND, 1, speed_p);
10946               return true;
10947             }
10948           if (speed_p)
10949             *cost += extra_cost->mult[0].simple;
10950           return false;
10951         }
10952
10953       if (mode == DImode)
10954         {
10955           if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10956                 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10957                || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10958                    && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10959             {
10960               if (speed_p)
10961                 *cost += extra_cost->mult[1].extend;
10962               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10963                                   ZERO_EXTEND, 0, speed_p)
10964                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10965                                     ZERO_EXTEND, 0, speed_p));
10966               return true;
10967             }
10968
10969           *cost = LIBCALL_COST (2);
10970           return false;
10971         }
10972
10973       /* Vector mode?  */
10974       *cost = LIBCALL_COST (2);
10975       return false;
10976
10977     case NEG:
10978       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10979           && (mode == SFmode || !TARGET_VFP_SINGLE))
10980         {
10981           if (GET_CODE (XEXP (x, 0)) == MULT)
10982             {
10983               /* VNMUL.  */
10984               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10985               return true;
10986             }
10987
10988           if (speed_p)
10989             *cost += extra_cost->fp[mode != SFmode].neg;
10990
10991           return false;
10992         }
10993       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10994         {
10995           *cost = LIBCALL_COST (1);
10996           return false;
10997         }
10998
10999       if (mode == SImode)
11000         {
11001           if (GET_CODE (XEXP (x, 0)) == ABS)
11002             {
11003               *cost += COSTS_N_INSNS (1);
11004               /* Assume the non-flag-changing variant.  */
11005               if (speed_p)
11006                 *cost += (extra_cost->alu.log_shift
11007                           + extra_cost->alu.arith_shift);
11008               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11009               return true;
11010             }
11011
11012           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11013               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11014             {
11015               *cost += COSTS_N_INSNS (1);
11016               /* No extra cost for MOV imm and MVN imm.  */
11017               /* If the comparison op is using the flags, there's no further
11018                  cost, otherwise we need to add the cost of the comparison.  */
11019               if (!(REG_P (XEXP (XEXP (x, 0), 0))
11020                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11021                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
11022                 {
11023                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11024                   *cost += (COSTS_N_INSNS (1)
11025                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11026                                         0, speed_p)
11027                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11028                                         1, speed_p));
11029                   if (speed_p)
11030                     *cost += extra_cost->alu.arith;
11031                 }
11032               return true;
11033             }
11034
11035           if (speed_p)
11036             *cost += extra_cost->alu.arith;
11037           return false;
11038         }
11039
11040       if (GET_MODE_CLASS (mode) == MODE_INT
11041           && GET_MODE_SIZE (mode) < 4)
11042         {
11043           /* Slightly disparage, as we might need an extend operation.  */
11044           *cost += 1;
11045           if (speed_p)
11046             *cost += extra_cost->alu.arith;
11047           return false;
11048         }
11049
11050       if (mode == DImode)
11051         {
11052           *cost += COSTS_N_INSNS (1);
11053           if (speed_p)
11054             *cost += 2 * extra_cost->alu.arith;
11055           return false;
11056         }
11057
11058       /* Vector mode?  */
11059       *cost = LIBCALL_COST (1);
11060       return false;
11061
11062     case NOT:
11063       if (mode == SImode)
11064         {
11065           rtx shift_op;
11066           rtx shift_reg = NULL;
11067
11068           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11069
11070           if (shift_op)
11071             {
11072               if (shift_reg != NULL)
11073                 {
11074                   if (speed_p)
11075                     *cost += extra_cost->alu.log_shift_reg;
11076                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11077                 }
11078               else if (speed_p)
11079                 *cost += extra_cost->alu.log_shift;
11080               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11081               return true;
11082             }
11083
11084           if (speed_p)
11085             *cost += extra_cost->alu.logical;
11086           return false;
11087         }
11088       if (mode == DImode)
11089         {
11090           *cost += COSTS_N_INSNS (1);
11091           return false;
11092         }
11093
11094       /* Vector mode?  */
11095
11096       *cost += LIBCALL_COST (1);
11097       return false;
11098
11099     case IF_THEN_ELSE:
11100       {
11101         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11102           {
11103             *cost += COSTS_N_INSNS (3);
11104             return true;
11105           }
11106         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11107         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11108
11109         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11110         /* Assume that if one arm of the if_then_else is a register,
11111            that it will be tied with the result and eliminate the
11112            conditional insn.  */
11113         if (REG_P (XEXP (x, 1)))
11114           *cost += op2cost;
11115         else if (REG_P (XEXP (x, 2)))
11116           *cost += op1cost;
11117         else
11118           {
11119             if (speed_p)
11120               {
11121                 if (extra_cost->alu.non_exec_costs_exec)
11122                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11123                 else
11124                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11125               }
11126             else
11127               *cost += op1cost + op2cost;
11128           }
11129       }
11130       return true;
11131
11132     case COMPARE:
11133       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11134         *cost = 0;
11135       else
11136         {
11137           machine_mode op0mode;
11138           /* We'll mostly assume that the cost of a compare is the cost of the
11139              LHS.  However, there are some notable exceptions.  */
11140
11141           /* Floating point compares are never done as side-effects.  */
11142           op0mode = GET_MODE (XEXP (x, 0));
11143           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11144               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11145             {
11146               if (speed_p)
11147                 *cost += extra_cost->fp[op0mode != SFmode].compare;
11148
11149               if (XEXP (x, 1) == CONST0_RTX (op0mode))
11150                 {
11151                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11152                   return true;
11153                 }
11154
11155               return false;
11156             }
11157           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11158             {
11159               *cost = LIBCALL_COST (2);
11160               return false;
11161             }
11162
11163           /* DImode compares normally take two insns.  */
11164           if (op0mode == DImode)
11165             {
11166               *cost += COSTS_N_INSNS (1);
11167               if (speed_p)
11168                 *cost += 2 * extra_cost->alu.arith;
11169               return false;
11170             }
11171
11172           if (op0mode == SImode)
11173             {
11174               rtx shift_op;
11175               rtx shift_reg;
11176
11177               if (XEXP (x, 1) == const0_rtx
11178                   && !(REG_P (XEXP (x, 0))
11179                        || (GET_CODE (XEXP (x, 0)) == SUBREG
11180                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
11181                 {
11182                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11183
11184                   /* Multiply operations that set the flags are often
11185                      significantly more expensive.  */
11186                   if (speed_p
11187                       && GET_CODE (XEXP (x, 0)) == MULT
11188                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11189                     *cost += extra_cost->mult[0].flag_setting;
11190
11191                   if (speed_p
11192                       && GET_CODE (XEXP (x, 0)) == PLUS
11193                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11194                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11195                                                             0), 1), mode))
11196                     *cost += extra_cost->mult[0].flag_setting;
11197                   return true;
11198                 }
11199
11200               shift_reg = NULL;
11201               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11202               if (shift_op != NULL)
11203                 {
11204                   if (shift_reg != NULL)
11205                     {
11206                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11207                                          1, speed_p);
11208                       if (speed_p)
11209                         *cost += extra_cost->alu.arith_shift_reg;
11210                     }
11211                   else if (speed_p)
11212                     *cost += extra_cost->alu.arith_shift;
11213                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11214                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11215                   return true;
11216                 }
11217
11218               if (speed_p)
11219                 *cost += extra_cost->alu.arith;
11220               if (CONST_INT_P (XEXP (x, 1))
11221                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11222                 {
11223                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11224                   return true;
11225                 }
11226               return false;
11227             }
11228
11229           /* Vector mode?  */
11230
11231           *cost = LIBCALL_COST (2);
11232           return false;
11233         }
11234       return true;
11235
11236     case EQ:
11237     case NE:
11238     case LT:
11239     case LE:
11240     case GT:
11241     case GE:
11242     case LTU:
11243     case LEU:
11244     case GEU:
11245     case GTU:
11246     case ORDERED:
11247     case UNORDERED:
11248     case UNEQ:
11249     case UNLE:
11250     case UNLT:
11251     case UNGE:
11252     case UNGT:
11253     case LTGT:
11254       if (outer_code == SET)
11255         {
11256           /* Is it a store-flag operation?  */
11257           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11258               && XEXP (x, 1) == const0_rtx)
11259             {
11260               /* Thumb also needs an IT insn.  */
11261               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11262               return true;
11263             }
11264           if (XEXP (x, 1) == const0_rtx)
11265             {
11266               switch (code)
11267                 {
11268                 case LT:
11269                   /* LSR Rd, Rn, #31.  */
11270                   if (speed_p)
11271                     *cost += extra_cost->alu.shift;
11272                   break;
11273
11274                 case EQ:
11275                   /* RSBS T1, Rn, #0
11276                      ADC  Rd, Rn, T1.  */
11277
11278                 case NE:
11279                   /* SUBS T1, Rn, #1
11280                      SBC  Rd, Rn, T1.  */
11281                   *cost += COSTS_N_INSNS (1);
11282                   break;
11283
11284                 case LE:
11285                   /* RSBS T1, Rn, Rn, LSR #31
11286                      ADC  Rd, Rn, T1. */
11287                   *cost += COSTS_N_INSNS (1);
11288                   if (speed_p)
11289                     *cost += extra_cost->alu.arith_shift;
11290                   break;
11291
11292                 case GT:
11293                   /* RSB  Rd, Rn, Rn, ASR #1
11294                      LSR  Rd, Rd, #31.  */
11295                   *cost += COSTS_N_INSNS (1);
11296                   if (speed_p)
11297                     *cost += (extra_cost->alu.arith_shift
11298                               + extra_cost->alu.shift);
11299                   break;
11300
11301                 case GE:
11302                   /* ASR  Rd, Rn, #31
11303                      ADD  Rd, Rn, #1.  */
11304                   *cost += COSTS_N_INSNS (1);
11305                   if (speed_p)
11306                     *cost += extra_cost->alu.shift;
11307                   break;
11308
11309                 default:
11310                   /* Remaining cases are either meaningless or would take
11311                      three insns anyway.  */
11312                   *cost = COSTS_N_INSNS (3);
11313                   break;
11314                 }
11315               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11316               return true;
11317             }
11318           else
11319             {
11320               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11321               if (CONST_INT_P (XEXP (x, 1))
11322                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11323                 {
11324                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11325                   return true;
11326                 }
11327
11328               return false;
11329             }
11330         }
11331       /* Not directly inside a set.  If it involves the condition code
11332          register it must be the condition for a branch, cond_exec or
11333          I_T_E operation.  Since the comparison is performed elsewhere
11334          this is just the control part which has no additional
11335          cost.  */
11336       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11337                && XEXP (x, 1) == const0_rtx)
11338         {
11339           *cost = 0;
11340           return true;
11341         }
11342       return false;
11343
11344     case ABS:
11345       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11346           && (mode == SFmode || !TARGET_VFP_SINGLE))
11347         {
11348           if (speed_p)
11349             *cost += extra_cost->fp[mode != SFmode].neg;
11350
11351           return false;
11352         }
11353       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11354         {
11355           *cost = LIBCALL_COST (1);
11356           return false;
11357         }
11358
11359       if (mode == SImode)
11360         {
11361           if (speed_p)
11362             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11363           return false;
11364         }
11365       /* Vector mode?  */
11366       *cost = LIBCALL_COST (1);
11367       return false;
11368
11369     case SIGN_EXTEND:
11370       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11371           && MEM_P (XEXP (x, 0)))
11372         {
11373           if (mode == DImode)
11374             *cost += COSTS_N_INSNS (1);
11375
11376           if (!speed_p)
11377             return true;
11378
11379           if (GET_MODE (XEXP (x, 0)) == SImode)
11380             *cost += extra_cost->ldst.load;
11381           else
11382             *cost += extra_cost->ldst.load_sign_extend;
11383
11384           if (mode == DImode)
11385             *cost += extra_cost->alu.shift;
11386
11387           return true;
11388         }
11389
11390       /* Widening from less than 32-bits requires an extend operation.  */
11391       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11392         {
11393           /* We have SXTB/SXTH.  */
11394           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11395           if (speed_p)
11396             *cost += extra_cost->alu.extend;
11397         }
11398       else if (GET_MODE (XEXP (x, 0)) != SImode)
11399         {
11400           /* Needs two shifts.  */
11401           *cost += COSTS_N_INSNS (1);
11402           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11403           if (speed_p)
11404             *cost += 2 * extra_cost->alu.shift;
11405         }
11406
11407       /* Widening beyond 32-bits requires one more insn.  */
11408       if (mode == DImode)
11409         {
11410           *cost += COSTS_N_INSNS (1);
11411           if (speed_p)
11412             *cost += extra_cost->alu.shift;
11413         }
11414
11415       return true;
11416
11417     case ZERO_EXTEND:
11418       if ((arm_arch4
11419            || GET_MODE (XEXP (x, 0)) == SImode
11420            || GET_MODE (XEXP (x, 0)) == QImode)
11421           && MEM_P (XEXP (x, 0)))
11422         {
11423           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11424
11425           if (mode == DImode)
11426             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
11427
11428           return true;
11429         }
11430
11431       /* Widening from less than 32-bits requires an extend operation.  */
11432       if (GET_MODE (XEXP (x, 0)) == QImode)
11433         {
11434           /* UXTB can be a shorter instruction in Thumb2, but it might
11435              be slower than the AND Rd, Rn, #255 alternative.  When
11436              optimizing for speed it should never be slower to use
11437              AND, and we don't really model 16-bit vs 32-bit insns
11438              here.  */
11439           if (speed_p)
11440             *cost += extra_cost->alu.logical;
11441         }
11442       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11443         {
11444           /* We have UXTB/UXTH.  */
11445           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11446           if (speed_p)
11447             *cost += extra_cost->alu.extend;
11448         }
11449       else if (GET_MODE (XEXP (x, 0)) != SImode)
11450         {
11451           /* Needs two shifts.  It's marginally preferable to use
11452              shifts rather than two BIC instructions as the second
11453              shift may merge with a subsequent insn as a shifter
11454              op.  */
11455           *cost = COSTS_N_INSNS (2);
11456           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11457           if (speed_p)
11458             *cost += 2 * extra_cost->alu.shift;
11459         }
11460
11461       /* Widening beyond 32-bits requires one more insn.  */
11462       if (mode == DImode)
11463         {
11464           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
11465         }
11466
11467       return true;
11468
11469     case CONST_INT:
11470       *cost = 0;
11471       /* CONST_INT has no mode, so we cannot tell for sure how many
11472          insns are really going to be needed.  The best we can do is
11473          look at the value passed.  If it fits in SImode, then assume
11474          that's the mode it will be used for.  Otherwise assume it
11475          will be used in DImode.  */
11476       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11477         mode = SImode;
11478       else
11479         mode = DImode;
11480
11481       /* Avoid blowing up in arm_gen_constant ().  */
11482       if (!(outer_code == PLUS
11483             || outer_code == AND
11484             || outer_code == IOR
11485             || outer_code == XOR
11486             || outer_code == MINUS))
11487         outer_code = SET;
11488
11489     const_int_cost:
11490       if (mode == SImode)
11491         {
11492           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11493                                                     INTVAL (x), NULL, NULL,
11494                                                     0, 0));
11495           /* Extra costs?  */
11496         }
11497       else
11498         {
11499           *cost += COSTS_N_INSNS (arm_gen_constant
11500                                   (outer_code, SImode, NULL,
11501                                    trunc_int_for_mode (INTVAL (x), SImode),
11502                                    NULL, NULL, 0, 0)
11503                                   + arm_gen_constant (outer_code, SImode, NULL,
11504                                                       INTVAL (x) >> 32, NULL,
11505                                                       NULL, 0, 0));
11506           /* Extra costs?  */
11507         }
11508
11509       return true;
11510
11511     case CONST:
11512     case LABEL_REF:
11513     case SYMBOL_REF:
11514       if (speed_p)
11515         {
11516           if (arm_arch_thumb2 && !flag_pic)
11517             *cost += COSTS_N_INSNS (1);
11518           else
11519             *cost += extra_cost->ldst.load;
11520         }
11521       else
11522         *cost += COSTS_N_INSNS (1);
11523
11524       if (flag_pic)
11525         {
11526           *cost += COSTS_N_INSNS (1);
11527           if (speed_p)
11528             *cost += extra_cost->alu.arith;
11529         }
11530
11531       return true;
11532
11533     case CONST_FIXED:
11534       *cost = COSTS_N_INSNS (4);
11535       /* Fixme.  */
11536       return true;
11537
11538     case CONST_DOUBLE:
11539       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11540           && (mode == SFmode || !TARGET_VFP_SINGLE))
11541         {
11542           if (vfp3_const_double_rtx (x))
11543             {
11544               if (speed_p)
11545                 *cost += extra_cost->fp[mode == DFmode].fpconst;
11546               return true;
11547             }
11548
11549           if (speed_p)
11550             {
11551               if (mode == DFmode)
11552                 *cost += extra_cost->ldst.loadd;
11553               else
11554                 *cost += extra_cost->ldst.loadf;
11555             }
11556           else
11557             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11558
11559           return true;
11560         }
11561       *cost = COSTS_N_INSNS (4);
11562       return true;
11563
11564     case CONST_VECTOR:
11565       /* Fixme.  */
11566       if (((TARGET_NEON && TARGET_HARD_FLOAT
11567             && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11568            || TARGET_HAVE_MVE)
11569           && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11570         *cost = COSTS_N_INSNS (1);
11571       else
11572         *cost = COSTS_N_INSNS (4);
11573       return true;
11574
11575     case HIGH:
11576     case LO_SUM:
11577       /* When optimizing for size, we prefer constant pool entries to
11578          MOVW/MOVT pairs, so bump the cost of these slightly.  */
11579       if (!speed_p)
11580         *cost += 1;
11581       return true;
11582
11583     case CLZ:
11584       if (speed_p)
11585         *cost += extra_cost->alu.clz;
11586       return false;
11587
11588     case SMIN:
11589       if (XEXP (x, 1) == const0_rtx)
11590         {
11591           if (speed_p)
11592             *cost += extra_cost->alu.log_shift;
11593           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11594           return true;
11595         }
11596       /* Fall through.  */
11597     case SMAX:
11598     case UMIN:
11599     case UMAX:
11600       *cost += COSTS_N_INSNS (1);
11601       return false;
11602
11603     case TRUNCATE:
11604       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11605           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11606           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11607           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11608           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11609                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11610               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11611                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11612                       == ZERO_EXTEND))))
11613         {
11614           if (speed_p)
11615             *cost += extra_cost->mult[1].extend;
11616           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11617                               ZERO_EXTEND, 0, speed_p)
11618                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11619                                 ZERO_EXTEND, 0, speed_p));
11620           return true;
11621         }
11622       *cost = LIBCALL_COST (1);
11623       return false;
11624
11625     case UNSPEC_VOLATILE:
11626     case UNSPEC:
11627       return arm_unspec_cost (x, outer_code, speed_p, cost);
11628
11629     case PC:
11630       /* Reading the PC is like reading any other register.  Writing it
11631          is more expensive, but we take that into account elsewhere.  */
11632       *cost = 0;
11633       return true;
11634
11635     case ZERO_EXTRACT:
11636       /* TODO: Simple zero_extract of bottom bits using AND.  */
11637       /* Fall through.  */
11638     case SIGN_EXTRACT:
11639       if (arm_arch6
11640           && mode == SImode
11641           && CONST_INT_P (XEXP (x, 1))
11642           && CONST_INT_P (XEXP (x, 2)))
11643         {
11644           if (speed_p)
11645             *cost += extra_cost->alu.bfx;
11646           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11647           return true;
11648         }
11649       /* Without UBFX/SBFX, need to resort to shift operations.  */
11650       *cost += COSTS_N_INSNS (1);
11651       if (speed_p)
11652         *cost += 2 * extra_cost->alu.shift;
11653       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11654       return true;
11655
11656     case FLOAT_EXTEND:
11657       if (TARGET_HARD_FLOAT)
11658         {
11659           if (speed_p)
11660             *cost += extra_cost->fp[mode == DFmode].widen;
11661           if (!TARGET_VFP5
11662               && GET_MODE (XEXP (x, 0)) == HFmode)
11663             {
11664               /* Pre v8, widening HF->DF is a two-step process, first
11665                  widening to SFmode.  */
11666               *cost += COSTS_N_INSNS (1);
11667               if (speed_p)
11668                 *cost += extra_cost->fp[0].widen;
11669             }
11670           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11671           return true;
11672         }
11673
11674       *cost = LIBCALL_COST (1);
11675       return false;
11676
11677     case FLOAT_TRUNCATE:
11678       if (TARGET_HARD_FLOAT)
11679         {
11680           if (speed_p)
11681             *cost += extra_cost->fp[mode == DFmode].narrow;
11682           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11683           return true;
11684           /* Vector modes?  */
11685         }
11686       *cost = LIBCALL_COST (1);
11687       return false;
11688
11689     case FMA:
11690       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11691         {
11692           rtx op0 = XEXP (x, 0);
11693           rtx op1 = XEXP (x, 1);
11694           rtx op2 = XEXP (x, 2);
11695
11696
11697           /* vfms or vfnma.  */
11698           if (GET_CODE (op0) == NEG)
11699             op0 = XEXP (op0, 0);
11700
11701           /* vfnms or vfnma.  */
11702           if (GET_CODE (op2) == NEG)
11703             op2 = XEXP (op2, 0);
11704
11705           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11706           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11707           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11708
11709           if (speed_p)
11710             *cost += extra_cost->fp[mode ==DFmode].fma;
11711
11712           return true;
11713         }
11714
11715       *cost = LIBCALL_COST (3);
11716       return false;
11717
11718     case FIX:
11719     case UNSIGNED_FIX:
11720       if (TARGET_HARD_FLOAT)
11721         {
11722           /* The *combine_vcvtf2i reduces a vmul+vcvt into
11723              a vcvt fixed-point conversion.  */
11724           if (code == FIX && mode == SImode
11725               && GET_CODE (XEXP (x, 0)) == FIX
11726               && GET_MODE (XEXP (x, 0)) == SFmode
11727               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11728               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11729                  > 0)
11730             {
11731               if (speed_p)
11732                 *cost += extra_cost->fp[0].toint;
11733
11734               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11735                                  code, 0, speed_p);
11736               return true;
11737             }
11738
11739           if (GET_MODE_CLASS (mode) == MODE_INT)
11740             {
11741               mode = GET_MODE (XEXP (x, 0));
11742               if (speed_p)
11743                 *cost += extra_cost->fp[mode == DFmode].toint;
11744               /* Strip of the 'cost' of rounding towards zero.  */
11745               if (GET_CODE (XEXP (x, 0)) == FIX)
11746                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11747                                    0, speed_p);
11748               else
11749                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11750               /* ??? Increase the cost to deal with transferring from
11751                  FP -> CORE registers?  */
11752               return true;
11753             }
11754           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11755                    && TARGET_VFP5)
11756             {
11757               if (speed_p)
11758                 *cost += extra_cost->fp[mode == DFmode].roundint;
11759               return false;
11760             }
11761           /* Vector costs? */
11762         }
11763       *cost = LIBCALL_COST (1);
11764       return false;
11765
11766     case FLOAT:
11767     case UNSIGNED_FLOAT:
11768       if (TARGET_HARD_FLOAT)
11769         {
11770           /* ??? Increase the cost to deal with transferring from CORE
11771              -> FP registers?  */
11772           if (speed_p)
11773             *cost += extra_cost->fp[mode == DFmode].fromint;
11774           return false;
11775         }
11776       *cost = LIBCALL_COST (1);
11777       return false;
11778
11779     case CALL:
11780       return true;
11781
11782     case ASM_OPERANDS:
11783       {
11784       /* Just a guess.  Guess number of instructions in the asm
11785          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11786          though (see PR60663).  */
11787         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11788         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11789
11790         *cost = COSTS_N_INSNS (asm_length + num_operands);
11791         return true;
11792       }
11793     default:
11794       if (mode != VOIDmode)
11795         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11796       else
11797         *cost = COSTS_N_INSNS (4); /* Who knows?  */
11798       return false;
11799     }
11800 }
11801
11802 #undef HANDLE_NARROW_SHIFT_ARITH
11803
11804 /* RTX costs entry point.  */
11805
11806 static bool
11807 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11808                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11809 {
11810   bool result;
11811   int code = GET_CODE (x);
11812   gcc_assert (current_tune->insn_extra_cost);
11813
11814   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
11815                                 (enum rtx_code) outer_code,
11816                                 current_tune->insn_extra_cost,
11817                                 total, speed);
11818
11819   if (dump_file && arm_verbose_cost)
11820     {
11821       print_rtl_single (dump_file, x);
11822       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11823                *total, result ? "final" : "partial");
11824     }
11825   return result;
11826 }
11827
11828 static int
11829 arm_insn_cost (rtx_insn *insn, bool speed)
11830 {
11831   int cost;
11832
11833   /* Don't cost a simple reg-reg move at a full insn cost: such moves
11834      will likely disappear during register allocation.  */
11835   if (!reload_completed
11836       && GET_CODE (PATTERN (insn)) == SET
11837       && REG_P (SET_DEST (PATTERN (insn)))
11838       && REG_P (SET_SRC (PATTERN (insn))))
11839     return 2;
11840   cost = pattern_cost (PATTERN (insn), speed);
11841   /* If the cost is zero, then it's likely a complex insn.  We don't want the
11842      cost of these to be less than something we know about.  */
11843   return cost ? cost : COSTS_N_INSNS (2);
11844 }
11845
11846 /* All address computations that can be done are free, but rtx cost returns
11847    the same for practically all of them.  So we weight the different types
11848    of address here in the order (most pref first):
11849    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11850 static inline int
11851 arm_arm_address_cost (rtx x)
11852 {
11853   enum rtx_code c  = GET_CODE (x);
11854
11855   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11856     return 0;
11857   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11858     return 10;
11859
11860   if (c == PLUS)
11861     {
11862       if (CONST_INT_P (XEXP (x, 1)))
11863         return 2;
11864
11865       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11866         return 3;
11867
11868       return 4;
11869     }
11870
11871   return 6;
11872 }
11873
11874 static inline int
11875 arm_thumb_address_cost (rtx x)
11876 {
11877   enum rtx_code c  = GET_CODE (x);
11878
11879   if (c == REG)
11880     return 1;
11881   if (c == PLUS
11882       && REG_P (XEXP (x, 0))
11883       && CONST_INT_P (XEXP (x, 1)))
11884     return 1;
11885
11886   return 2;
11887 }
11888
11889 static int
11890 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11891                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11892 {
11893   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11894 }
11895
11896 /* Adjust cost hook for XScale.  */
11897 static bool
11898 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11899                           int * cost)
11900 {
11901   /* Some true dependencies can have a higher cost depending
11902      on precisely how certain input operands are used.  */
11903   if (dep_type == 0
11904       && recog_memoized (insn) >= 0
11905       && recog_memoized (dep) >= 0)
11906     {
11907       int shift_opnum = get_attr_shift (insn);
11908       enum attr_type attr_type = get_attr_type (dep);
11909
11910       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11911          operand for INSN.  If we have a shifted input operand and the
11912          instruction we depend on is another ALU instruction, then we may
11913          have to account for an additional stall.  */
11914       if (shift_opnum != 0
11915           && (attr_type == TYPE_ALU_SHIFT_IMM
11916               || attr_type == TYPE_ALUS_SHIFT_IMM
11917               || attr_type == TYPE_LOGIC_SHIFT_IMM
11918               || attr_type == TYPE_LOGICS_SHIFT_IMM
11919               || attr_type == TYPE_ALU_SHIFT_REG
11920               || attr_type == TYPE_ALUS_SHIFT_REG
11921               || attr_type == TYPE_LOGIC_SHIFT_REG
11922               || attr_type == TYPE_LOGICS_SHIFT_REG
11923               || attr_type == TYPE_MOV_SHIFT
11924               || attr_type == TYPE_MVN_SHIFT
11925               || attr_type == TYPE_MOV_SHIFT_REG
11926               || attr_type == TYPE_MVN_SHIFT_REG))
11927         {
11928           rtx shifted_operand;
11929           int opno;
11930
11931           /* Get the shifted operand.  */
11932           extract_insn (insn);
11933           shifted_operand = recog_data.operand[shift_opnum];
11934
11935           /* Iterate over all the operands in DEP.  If we write an operand
11936              that overlaps with SHIFTED_OPERAND, then we have increase the
11937              cost of this dependency.  */
11938           extract_insn (dep);
11939           preprocess_constraints (dep);
11940           for (opno = 0; opno < recog_data.n_operands; opno++)
11941             {
11942               /* We can ignore strict inputs.  */
11943               if (recog_data.operand_type[opno] == OP_IN)
11944                 continue;
11945
11946               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11947                                            shifted_operand))
11948                 {
11949                   *cost = 2;
11950                   return false;
11951                 }
11952             }
11953         }
11954     }
11955   return true;
11956 }
11957
11958 /* Adjust cost hook for Cortex A9.  */
11959 static bool
11960 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11961                              int * cost)
11962 {
11963   switch (dep_type)
11964     {
11965     case REG_DEP_ANTI:
11966       *cost = 0;
11967       return false;
11968
11969     case REG_DEP_TRUE:
11970     case REG_DEP_OUTPUT:
11971         if (recog_memoized (insn) >= 0
11972             && recog_memoized (dep) >= 0)
11973           {
11974             if (GET_CODE (PATTERN (insn)) == SET)
11975               {
11976                 if (GET_MODE_CLASS
11977                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11978                   || GET_MODE_CLASS
11979                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11980                   {
11981                     enum attr_type attr_type_insn = get_attr_type (insn);
11982                     enum attr_type attr_type_dep = get_attr_type (dep);
11983
11984                     /* By default all dependencies of the form
11985                        s0 = s0 <op> s1
11986                        s0 = s0 <op> s2
11987                        have an extra latency of 1 cycle because
11988                        of the input and output dependency in this
11989                        case. However this gets modeled as an true
11990                        dependency and hence all these checks.  */
11991                     if (REG_P (SET_DEST (PATTERN (insn)))
11992                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11993                       {
11994                         /* FMACS is a special case where the dependent
11995                            instruction can be issued 3 cycles before
11996                            the normal latency in case of an output
11997                            dependency.  */
11998                         if ((attr_type_insn == TYPE_FMACS
11999                              || attr_type_insn == TYPE_FMACD)
12000                             && (attr_type_dep == TYPE_FMACS
12001                                 || attr_type_dep == TYPE_FMACD))
12002                           {
12003                             if (dep_type == REG_DEP_OUTPUT)
12004                               *cost = insn_default_latency (dep) - 3;
12005                             else
12006                               *cost = insn_default_latency (dep);
12007                             return false;
12008                           }
12009                         else
12010                           {
12011                             if (dep_type == REG_DEP_OUTPUT)
12012                               *cost = insn_default_latency (dep) + 1;
12013                             else
12014                               *cost = insn_default_latency (dep);
12015                           }
12016                         return false;
12017                       }
12018                   }
12019               }
12020           }
12021         break;
12022
12023     default:
12024       gcc_unreachable ();
12025     }
12026
12027   return true;
12028 }
12029
12030 /* Adjust cost hook for FA726TE.  */
12031 static bool
12032 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12033                            int * cost)
12034 {
12035   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12036      have penalty of 3.  */
12037   if (dep_type == REG_DEP_TRUE
12038       && recog_memoized (insn) >= 0
12039       && recog_memoized (dep) >= 0
12040       && get_attr_conds (dep) == CONDS_SET)
12041     {
12042       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
12043       if (get_attr_conds (insn) == CONDS_USE
12044           && get_attr_type (insn) != TYPE_BRANCH)
12045         {
12046           *cost = 3;
12047           return false;
12048         }
12049
12050       if (GET_CODE (PATTERN (insn)) == COND_EXEC
12051           || get_attr_conds (insn) == CONDS_USE)
12052         {
12053           *cost = 0;
12054           return false;
12055         }
12056     }
12057
12058   return true;
12059 }
12060
12061 /* Implement TARGET_REGISTER_MOVE_COST.
12062
12063    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12064    it is typically more expensive than a single memory access.  We set
12065    the cost to less than two memory accesses so that floating
12066    point to integer conversion does not go through memory.  */
12067
12068 int
12069 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12070                         reg_class_t from, reg_class_t to)
12071 {
12072   if (TARGET_32BIT)
12073     {
12074       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12075           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12076         return 15;
12077       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12078                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12079         return 4;
12080       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12081         return 20;
12082       else
12083         return 2;
12084     }
12085   else
12086     {
12087       if (from == HI_REGS || to == HI_REGS)
12088         return 4;
12089       else
12090         return 2;
12091     }
12092 }
12093
12094 /* Implement TARGET_MEMORY_MOVE_COST.  */
12095
12096 int
12097 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12098                       bool in ATTRIBUTE_UNUSED)
12099 {
12100   if (TARGET_32BIT)
12101     return 10;
12102   else
12103     {
12104       if (GET_MODE_SIZE (mode) < 4)
12105         return 8;
12106       else
12107         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12108     }
12109 }
12110
12111 /* Vectorizer cost model implementation.  */
12112
12113 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
12114 static int
12115 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12116                                 tree vectype,
12117                                 int misalign ATTRIBUTE_UNUSED)
12118 {
12119   unsigned elements;
12120
12121   switch (type_of_cost)
12122     {
12123       case scalar_stmt:
12124         return current_tune->vec_costs->scalar_stmt_cost;
12125
12126       case scalar_load:
12127         return current_tune->vec_costs->scalar_load_cost;
12128
12129       case scalar_store:
12130         return current_tune->vec_costs->scalar_store_cost;
12131
12132       case vector_stmt:
12133         return current_tune->vec_costs->vec_stmt_cost;
12134
12135       case vector_load:
12136         return current_tune->vec_costs->vec_align_load_cost;
12137
12138       case vector_store:
12139         return current_tune->vec_costs->vec_store_cost;
12140
12141       case vec_to_scalar:
12142         return current_tune->vec_costs->vec_to_scalar_cost;
12143
12144       case scalar_to_vec:
12145         return current_tune->vec_costs->scalar_to_vec_cost;
12146
12147       case unaligned_load:
12148       case vector_gather_load:
12149         return current_tune->vec_costs->vec_unalign_load_cost;
12150
12151       case unaligned_store:
12152       case vector_scatter_store:
12153         return current_tune->vec_costs->vec_unalign_store_cost;
12154
12155       case cond_branch_taken:
12156         return current_tune->vec_costs->cond_taken_branch_cost;
12157
12158       case cond_branch_not_taken:
12159         return current_tune->vec_costs->cond_not_taken_branch_cost;
12160
12161       case vec_perm:
12162       case vec_promote_demote:
12163         return current_tune->vec_costs->vec_stmt_cost;
12164
12165       case vec_construct:
12166         elements = TYPE_VECTOR_SUBPARTS (vectype);
12167         return elements / 2 + 1;
12168
12169       default:
12170         gcc_unreachable ();
12171     }
12172 }
12173
12174 /* Implement targetm.vectorize.add_stmt_cost.  */
12175
12176 static unsigned
12177 arm_add_stmt_cost (vec_info *vinfo, void *data, int count,
12178                    enum vect_cost_for_stmt kind,
12179                    struct _stmt_vec_info *stmt_info, tree vectype,
12180                    int misalign, enum vect_cost_model_location where)
12181 {
12182   unsigned *cost = (unsigned *) data;
12183   unsigned retval = 0;
12184
12185   if (flag_vect_cost_model)
12186     {
12187       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
12188
12189       /* Statements in an inner loop relative to the loop being
12190          vectorized are weighted more heavily.  The value here is
12191          arbitrary and could potentially be improved with analysis.  */
12192       if (where == vect_body && stmt_info
12193           && stmt_in_inner_loop_p (vinfo, stmt_info))
12194         count *= 50;  /* FIXME.  */
12195
12196       retval = (unsigned) (count * stmt_cost);
12197       cost[where] += retval;
12198     }
12199
12200   return retval;
12201 }
12202
12203 /* Return true if and only if this insn can dual-issue only as older.  */
12204 static bool
12205 cortexa7_older_only (rtx_insn *insn)
12206 {
12207   if (recog_memoized (insn) < 0)
12208     return false;
12209
12210   switch (get_attr_type (insn))
12211     {
12212     case TYPE_ALU_DSP_REG:
12213     case TYPE_ALU_SREG:
12214     case TYPE_ALUS_SREG:
12215     case TYPE_LOGIC_REG:
12216     case TYPE_LOGICS_REG:
12217     case TYPE_ADC_REG:
12218     case TYPE_ADCS_REG:
12219     case TYPE_ADR:
12220     case TYPE_BFM:
12221     case TYPE_REV:
12222     case TYPE_MVN_REG:
12223     case TYPE_SHIFT_IMM:
12224     case TYPE_SHIFT_REG:
12225     case TYPE_LOAD_BYTE:
12226     case TYPE_LOAD_4:
12227     case TYPE_STORE_4:
12228     case TYPE_FFARITHS:
12229     case TYPE_FADDS:
12230     case TYPE_FFARITHD:
12231     case TYPE_FADDD:
12232     case TYPE_FMOV:
12233     case TYPE_F_CVT:
12234     case TYPE_FCMPS:
12235     case TYPE_FCMPD:
12236     case TYPE_FCONSTS:
12237     case TYPE_FCONSTD:
12238     case TYPE_FMULS:
12239     case TYPE_FMACS:
12240     case TYPE_FMULD:
12241     case TYPE_FMACD:
12242     case TYPE_FDIVS:
12243     case TYPE_FDIVD:
12244     case TYPE_F_MRC:
12245     case TYPE_F_MRRC:
12246     case TYPE_F_FLAG:
12247     case TYPE_F_LOADS:
12248     case TYPE_F_STORES:
12249       return true;
12250     default:
12251       return false;
12252     }
12253 }
12254
12255 /* Return true if and only if this insn can dual-issue as younger.  */
12256 static bool
12257 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12258 {
12259   if (recog_memoized (insn) < 0)
12260     {
12261       if (verbose > 5)
12262         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12263       return false;
12264     }
12265
12266   switch (get_attr_type (insn))
12267     {
12268     case TYPE_ALU_IMM:
12269     case TYPE_ALUS_IMM:
12270     case TYPE_LOGIC_IMM:
12271     case TYPE_LOGICS_IMM:
12272     case TYPE_EXTEND:
12273     case TYPE_MVN_IMM:
12274     case TYPE_MOV_IMM:
12275     case TYPE_MOV_REG:
12276     case TYPE_MOV_SHIFT:
12277     case TYPE_MOV_SHIFT_REG:
12278     case TYPE_BRANCH:
12279     case TYPE_CALL:
12280       return true;
12281     default:
12282       return false;
12283     }
12284 }
12285
12286
12287 /* Look for an instruction that can dual issue only as an older
12288    instruction, and move it in front of any instructions that can
12289    dual-issue as younger, while preserving the relative order of all
12290    other instructions in the ready list.  This is a hueuristic to help
12291    dual-issue in later cycles, by postponing issue of more flexible
12292    instructions.  This heuristic may affect dual issue opportunities
12293    in the current cycle.  */
12294 static void
12295 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12296                         int *n_readyp, int clock)
12297 {
12298   int i;
12299   int first_older_only = -1, first_younger = -1;
12300
12301   if (verbose > 5)
12302     fprintf (file,
12303              ";; sched_reorder for cycle %d with %d insns in ready list\n",
12304              clock,
12305              *n_readyp);
12306
12307   /* Traverse the ready list from the head (the instruction to issue
12308      first), and looking for the first instruction that can issue as
12309      younger and the first instruction that can dual-issue only as
12310      older.  */
12311   for (i = *n_readyp - 1; i >= 0; i--)
12312     {
12313       rtx_insn *insn = ready[i];
12314       if (cortexa7_older_only (insn))
12315         {
12316           first_older_only = i;
12317           if (verbose > 5)
12318             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12319           break;
12320         }
12321       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12322         first_younger = i;
12323     }
12324
12325   /* Nothing to reorder because either no younger insn found or insn
12326      that can dual-issue only as older appears before any insn that
12327      can dual-issue as younger.  */
12328   if (first_younger == -1)
12329     {
12330       if (verbose > 5)
12331         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12332       return;
12333     }
12334
12335   /* Nothing to reorder because no older-only insn in the ready list.  */
12336   if (first_older_only == -1)
12337     {
12338       if (verbose > 5)
12339         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12340       return;
12341     }
12342
12343   /* Move first_older_only insn before first_younger.  */
12344   if (verbose > 5)
12345     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12346              INSN_UID(ready [first_older_only]),
12347              INSN_UID(ready [first_younger]));
12348   rtx_insn *first_older_only_insn = ready [first_older_only];
12349   for (i = first_older_only; i < first_younger; i++)
12350     {
12351       ready[i] = ready[i+1];
12352     }
12353
12354   ready[i] = first_older_only_insn;
12355   return;
12356 }
12357
12358 /* Implement TARGET_SCHED_REORDER. */
12359 static int
12360 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12361                    int clock)
12362 {
12363   switch (arm_tune)
12364     {
12365     case TARGET_CPU_cortexa7:
12366       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12367       break;
12368     default:
12369       /* Do nothing for other cores.  */
12370       break;
12371     }
12372
12373   return arm_issue_rate ();
12374 }
12375
12376 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12377    It corrects the value of COST based on the relationship between
12378    INSN and DEP through the dependence LINK.  It returns the new
12379    value. There is a per-core adjust_cost hook to adjust scheduler costs
12380    and the per-core hook can choose to completely override the generic
12381    adjust_cost function. Only put bits of code into arm_adjust_cost that
12382    are common across all cores.  */
12383 static int
12384 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12385                  unsigned int)
12386 {
12387   rtx i_pat, d_pat;
12388
12389  /* When generating Thumb-1 code, we want to place flag-setting operations
12390     close to a conditional branch which depends on them, so that we can
12391     omit the comparison. */
12392   if (TARGET_THUMB1
12393       && dep_type == 0
12394       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12395       && recog_memoized (dep) >= 0
12396       && get_attr_conds (dep) == CONDS_SET)
12397     return 0;
12398
12399   if (current_tune->sched_adjust_cost != NULL)
12400     {
12401       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12402         return cost;
12403     }
12404
12405   /* XXX Is this strictly true?  */
12406   if (dep_type == REG_DEP_ANTI
12407       || dep_type == REG_DEP_OUTPUT)
12408     return 0;
12409
12410   /* Call insns don't incur a stall, even if they follow a load.  */
12411   if (dep_type == 0
12412       && CALL_P (insn))
12413     return 1;
12414
12415   if ((i_pat = single_set (insn)) != NULL
12416       && MEM_P (SET_SRC (i_pat))
12417       && (d_pat = single_set (dep)) != NULL
12418       && MEM_P (SET_DEST (d_pat)))
12419     {
12420       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12421       /* This is a load after a store, there is no conflict if the load reads
12422          from a cached area.  Assume that loads from the stack, and from the
12423          constant pool are cached, and that others will miss.  This is a
12424          hack.  */
12425
12426       if ((GET_CODE (src_mem) == SYMBOL_REF
12427            && CONSTANT_POOL_ADDRESS_P (src_mem))
12428           || reg_mentioned_p (stack_pointer_rtx, src_mem)
12429           || reg_mentioned_p (frame_pointer_rtx, src_mem)
12430           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12431         return 1;
12432     }
12433
12434   return cost;
12435 }
12436
12437 int
12438 arm_max_conditional_execute (void)
12439 {
12440   return max_insns_skipped;
12441 }
12442
12443 static int
12444 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12445 {
12446   if (TARGET_32BIT)
12447     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12448   else
12449     return (optimize > 0) ? 2 : 0;
12450 }
12451
12452 static int
12453 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12454 {
12455   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12456 }
12457
12458 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12459    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12460    sequences of non-executed instructions in IT blocks probably take the same
12461    amount of time as executed instructions (and the IT instruction itself takes
12462    space in icache).  This function was experimentally determined to give good
12463    results on a popular embedded benchmark.  */
12464
12465 static int
12466 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12467 {
12468   return (TARGET_32BIT && speed_p) ? 1
12469          : arm_default_branch_cost (speed_p, predictable_p);
12470 }
12471
12472 static int
12473 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12474 {
12475   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12476 }
12477
12478 static bool fp_consts_inited = false;
12479
12480 static REAL_VALUE_TYPE value_fp0;
12481
12482 static void
12483 init_fp_table (void)
12484 {
12485   REAL_VALUE_TYPE r;
12486
12487   r = REAL_VALUE_ATOF ("0", DFmode);
12488   value_fp0 = r;
12489   fp_consts_inited = true;
12490 }
12491
12492 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12493 int
12494 arm_const_double_rtx (rtx x)
12495 {
12496   const REAL_VALUE_TYPE *r;
12497
12498   if (!fp_consts_inited)
12499     init_fp_table ();
12500
12501   r = CONST_DOUBLE_REAL_VALUE (x);
12502   if (REAL_VALUE_MINUS_ZERO (*r))
12503     return 0;
12504
12505   if (real_equal (r, &value_fp0))
12506     return 1;
12507
12508   return 0;
12509 }
12510
12511 /* VFPv3 has a fairly wide range of representable immediates, formed from
12512    "quarter-precision" floating-point values. These can be evaluated using this
12513    formula (with ^ for exponentiation):
12514
12515      -1^s * n * 2^-r
12516
12517    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12518    16 <= n <= 31 and 0 <= r <= 7.
12519
12520    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12521
12522      - A (most-significant) is the sign bit.
12523      - BCD are the exponent (encoded as r XOR 3).
12524      - EFGH are the mantissa (encoded as n - 16).
12525 */
12526
12527 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12528    fconst[sd] instruction, or -1 if X isn't suitable.  */
12529 static int
12530 vfp3_const_double_index (rtx x)
12531 {
12532   REAL_VALUE_TYPE r, m;
12533   int sign, exponent;
12534   unsigned HOST_WIDE_INT mantissa, mant_hi;
12535   unsigned HOST_WIDE_INT mask;
12536   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12537   bool fail;
12538
12539   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12540     return -1;
12541
12542   r = *CONST_DOUBLE_REAL_VALUE (x);
12543
12544   /* We can't represent these things, so detect them first.  */
12545   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12546     return -1;
12547
12548   /* Extract sign, exponent and mantissa.  */
12549   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12550   r = real_value_abs (&r);
12551   exponent = REAL_EXP (&r);
12552   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12553      highest (sign) bit, with a fixed binary point at bit point_pos.
12554      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12555      bits for the mantissa, this may fail (low bits would be lost).  */
12556   real_ldexp (&m, &r, point_pos - exponent);
12557   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12558   mantissa = w.elt (0);
12559   mant_hi = w.elt (1);
12560
12561   /* If there are bits set in the low part of the mantissa, we can't
12562      represent this value.  */
12563   if (mantissa != 0)
12564     return -1;
12565
12566   /* Now make it so that mantissa contains the most-significant bits, and move
12567      the point_pos to indicate that the least-significant bits have been
12568      discarded.  */
12569   point_pos -= HOST_BITS_PER_WIDE_INT;
12570   mantissa = mant_hi;
12571
12572   /* We can permit four significant bits of mantissa only, plus a high bit
12573      which is always 1.  */
12574   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12575   if ((mantissa & mask) != 0)
12576     return -1;
12577
12578   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12579   mantissa >>= point_pos - 5;
12580
12581   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12582      floating-point immediate zero with Neon using an integer-zero load, but
12583      that case is handled elsewhere.)  */
12584   if (mantissa == 0)
12585     return -1;
12586
12587   gcc_assert (mantissa >= 16 && mantissa <= 31);
12588
12589   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12590      normalized significands are in the range [1, 2). (Our mantissa is shifted
12591      left 4 places at this point relative to normalized IEEE754 values).  GCC
12592      internally uses [0.5, 1) (see real.c), so the exponent returned from
12593      REAL_EXP must be altered.  */
12594   exponent = 5 - exponent;
12595
12596   if (exponent < 0 || exponent > 7)
12597     return -1;
12598
12599   /* Sign, mantissa and exponent are now in the correct form to plug into the
12600      formula described in the comment above.  */
12601   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12602 }
12603
12604 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12605 int
12606 vfp3_const_double_rtx (rtx x)
12607 {
12608   if (!TARGET_VFP3)
12609     return 0;
12610
12611   return vfp3_const_double_index (x) != -1;
12612 }
12613
12614 /* Recognize immediates which can be used in various Neon and MVE instructions.
12615    Legal immediates are described by the following table (for VMVN variants, the
12616    bitwise inverse of the constant shown is recognized. In either case, VMOV
12617    is output and the correct instruction to use for a given constant is chosen
12618    by the assembler). The constant shown is replicated across all elements of
12619    the destination vector.
12620
12621    insn elems variant constant (binary)
12622    ---- ----- ------- -----------------
12623    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12624    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12625    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12626    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12627    vmov  i16     4    00000000 abcdefgh
12628    vmov  i16     5    abcdefgh 00000000
12629    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12630    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12631    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12632    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12633    vmvn  i16    10    00000000 abcdefgh
12634    vmvn  i16    11    abcdefgh 00000000
12635    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12636    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12637    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12638    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12639    vmov   i8    16    abcdefgh
12640    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12641                       eeeeeeee ffffffff gggggggg hhhhhhhh
12642    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12643    vmov  f32    19    00000000 00000000 00000000 00000000
12644
12645    For case 18, B = !b. Representable values are exactly those accepted by
12646    vfp3_const_double_index, but are output as floating-point numbers rather
12647    than indices.
12648
12649    For case 19, we will change it to vmov.i32 when assembling.
12650
12651    Variants 0-5 (inclusive) may also be used as immediates for the second
12652    operand of VORR/VBIC instructions.
12653
12654    The INVERSE argument causes the bitwise inverse of the given operand to be
12655    recognized instead (used for recognizing legal immediates for the VAND/VORN
12656    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12657    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12658    output, rather than the real insns vbic/vorr).
12659
12660    INVERSE makes no difference to the recognition of float vectors.
12661
12662    The return value is the variant of immediate as shown in the above table, or
12663    -1 if the given value doesn't match any of the listed patterns.
12664 */
12665 static int
12666 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12667                       rtx *modconst, int *elementwidth)
12668 {
12669 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
12670   matches = 1;                                  \
12671   for (i = 0; i < idx; i += (STRIDE))           \
12672     if (!(TEST))                                \
12673       matches = 0;                              \
12674   if (matches)                                  \
12675     {                                           \
12676       immtype = (CLASS);                        \
12677       elsize = (ELSIZE);                        \
12678       break;                                    \
12679     }
12680
12681   unsigned int i, elsize = 0, idx = 0, n_elts;
12682   unsigned int innersize;
12683   unsigned char bytes[16] = {};
12684   int immtype = -1, matches;
12685   unsigned int invmask = inverse ? 0xff : 0;
12686   bool vector = GET_CODE (op) == CONST_VECTOR;
12687
12688   if (vector)
12689     n_elts = CONST_VECTOR_NUNITS (op);
12690   else
12691     {
12692       n_elts = 1;
12693       gcc_assert (mode != VOIDmode);
12694     }
12695
12696   innersize = GET_MODE_UNIT_SIZE (mode);
12697
12698   /* Only support 128-bit vectors for MVE.  */
12699   if (TARGET_HAVE_MVE && (!vector || n_elts * innersize != 16))
12700     return -1;
12701
12702   /* Vectors of float constants.  */
12703   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12704     {
12705       rtx el0 = CONST_VECTOR_ELT (op, 0);
12706
12707       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12708         return -1;
12709
12710       /* FP16 vectors cannot be represented.  */
12711       if (GET_MODE_INNER (mode) == HFmode)
12712         return -1;
12713
12714       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
12715          are distinct in this context.  */
12716       if (!const_vec_duplicate_p (op))
12717         return -1;
12718
12719       if (modconst)
12720         *modconst = CONST_VECTOR_ELT (op, 0);
12721
12722       if (elementwidth)
12723         *elementwidth = 0;
12724
12725       if (el0 == CONST0_RTX (GET_MODE (el0)))
12726         return 19;
12727       else
12728         return 18;
12729     }
12730
12731   /* The tricks done in the code below apply for little-endian vector layout.
12732      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12733      FIXME: Implement logic for big-endian vectors.  */
12734   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12735     return -1;
12736
12737   /* Splat vector constant out into a byte vector.  */
12738   for (i = 0; i < n_elts; i++)
12739     {
12740       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12741       unsigned HOST_WIDE_INT elpart;
12742
12743       gcc_assert (CONST_INT_P (el));
12744       elpart = INTVAL (el);
12745
12746       for (unsigned int byte = 0; byte < innersize; byte++)
12747         {
12748           bytes[idx++] = (elpart & 0xff) ^ invmask;
12749           elpart >>= BITS_PER_UNIT;
12750         }
12751     }
12752
12753   /* Sanity check.  */
12754   gcc_assert (idx == GET_MODE_SIZE (mode));
12755
12756   do
12757     {
12758       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12759                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12760
12761       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12762                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12763
12764       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12765                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12766
12767       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12768                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12769
12770       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12771
12772       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12773
12774       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12775                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12776
12777       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12778                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12779
12780       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12781                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12782
12783       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12784                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12785
12786       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12787
12788       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12789
12790       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12791                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12792
12793       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12794                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12795
12796       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12797                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12798
12799       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12800                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12801
12802       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12803
12804       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12805                         && bytes[i] == bytes[(i + 8) % idx]);
12806     }
12807   while (0);
12808
12809   if (immtype == -1)
12810     return -1;
12811
12812   if (elementwidth)
12813     *elementwidth = elsize;
12814
12815   if (modconst)
12816     {
12817       unsigned HOST_WIDE_INT imm = 0;
12818
12819       /* Un-invert bytes of recognized vector, if necessary.  */
12820       if (invmask != 0)
12821         for (i = 0; i < idx; i++)
12822           bytes[i] ^= invmask;
12823
12824       if (immtype == 17)
12825         {
12826           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12827           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12828
12829           for (i = 0; i < 8; i++)
12830             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12831                    << (i * BITS_PER_UNIT);
12832
12833           *modconst = GEN_INT (imm);
12834         }
12835       else
12836         {
12837           unsigned HOST_WIDE_INT imm = 0;
12838
12839           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12840             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12841
12842           *modconst = GEN_INT (imm);
12843         }
12844     }
12845
12846   return immtype;
12847 #undef CHECK
12848 }
12849
12850 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
12851    implicitly, VMVN) immediate.  Write back width per element to *ELEMENTWIDTH
12852    (or zero for float elements), and a modified constant (whatever should be
12853    output for a VMOV) in *MODCONST.  "neon_immediate_valid_for_move" function is
12854    modified to "simd_immediate_valid_for_move" as this function will be used
12855    both by neon and mve.  */
12856 int
12857 simd_immediate_valid_for_move (rtx op, machine_mode mode,
12858                                rtx *modconst, int *elementwidth)
12859 {
12860   rtx tmpconst;
12861   int tmpwidth;
12862   int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12863
12864   if (retval == -1)
12865     return 0;
12866
12867   if (modconst)
12868     *modconst = tmpconst;
12869
12870   if (elementwidth)
12871     *elementwidth = tmpwidth;
12872
12873   return 1;
12874 }
12875
12876 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12877    the immediate is valid, write a constant suitable for using as an operand
12878    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12879    *ELEMENTWIDTH.  See simd_valid_immediate for description of INVERSE.  */
12880
12881 int
12882 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12883                                 rtx *modconst, int *elementwidth)
12884 {
12885   rtx tmpconst;
12886   int tmpwidth;
12887   int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12888
12889   if (retval < 0 || retval > 5)
12890     return 0;
12891
12892   if (modconst)
12893     *modconst = tmpconst;
12894
12895   if (elementwidth)
12896     *elementwidth = tmpwidth;
12897
12898   return 1;
12899 }
12900
12901 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12902    the immediate is valid, write a constant suitable for using as an operand
12903    to VSHR/VSHL to *MODCONST and the corresponding element width to
12904    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12905    because they have different limitations.  */
12906
12907 int
12908 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12909                                 rtx *modconst, int *elementwidth,
12910                                 bool isleftshift)
12911 {
12912   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12913   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12914   unsigned HOST_WIDE_INT last_elt = 0;
12915   unsigned HOST_WIDE_INT maxshift;
12916
12917   /* Split vector constant out into a byte vector.  */
12918   for (i = 0; i < n_elts; i++)
12919     {
12920       rtx el = CONST_VECTOR_ELT (op, i);
12921       unsigned HOST_WIDE_INT elpart;
12922
12923       if (CONST_INT_P (el))
12924         elpart = INTVAL (el);
12925       else if (CONST_DOUBLE_P (el))
12926         return 0;
12927       else
12928         gcc_unreachable ();
12929
12930       if (i != 0 && elpart != last_elt)
12931         return 0;
12932
12933       last_elt = elpart;
12934     }
12935
12936   /* Shift less than element size.  */
12937   maxshift = innersize * 8;
12938
12939   if (isleftshift)
12940     {
12941       /* Left shift immediate value can be from 0 to <size>-1.  */
12942       if (last_elt >= maxshift)
12943         return 0;
12944     }
12945   else
12946     {
12947       /* Right shift immediate value can be from 1 to <size>.  */
12948       if (last_elt == 0 || last_elt > maxshift)
12949         return 0;
12950     }
12951
12952   if (elementwidth)
12953     *elementwidth = innersize * 8;
12954
12955   if (modconst)
12956     *modconst = CONST_VECTOR_ELT (op, 0);
12957
12958   return 1;
12959 }
12960
12961 /* Return a string suitable for output of Neon immediate logic operation
12962    MNEM.  */
12963
12964 char *
12965 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12966                              int inverse, int quad)
12967 {
12968   int width, is_valid;
12969   static char templ[40];
12970
12971   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12972
12973   gcc_assert (is_valid != 0);
12974
12975   if (quad)
12976     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12977   else
12978     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12979
12980   return templ;
12981 }
12982
12983 /* Return a string suitable for output of Neon immediate shift operation
12984    (VSHR or VSHL) MNEM.  */
12985
12986 char *
12987 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12988                              machine_mode mode, int quad,
12989                              bool isleftshift)
12990 {
12991   int width, is_valid;
12992   static char templ[40];
12993
12994   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12995   gcc_assert (is_valid != 0);
12996
12997   if (quad)
12998     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12999   else
13000     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13001
13002   return templ;
13003 }
13004
13005 /* Output a sequence of pairwise operations to implement a reduction.
13006    NOTE: We do "too much work" here, because pairwise operations work on two
13007    registers-worth of operands in one go. Unfortunately we can't exploit those
13008    extra calculations to do the full operation in fewer steps, I don't think.
13009    Although all vector elements of the result but the first are ignored, we
13010    actually calculate the same result in each of the elements. An alternative
13011    such as initially loading a vector with zero to use as each of the second
13012    operands would use up an additional register and take an extra instruction,
13013    for no particular gain.  */
13014
13015 void
13016 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13017                       rtx (*reduc) (rtx, rtx, rtx))
13018 {
13019   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13020   rtx tmpsum = op1;
13021
13022   for (i = parts / 2; i >= 1; i /= 2)
13023     {
13024       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13025       emit_insn (reduc (dest, tmpsum, tmpsum));
13026       tmpsum = dest;
13027     }
13028 }
13029
13030 /* If VALS is a vector constant that can be loaded into a register
13031    using VDUP, generate instructions to do so and return an RTX to
13032    assign to the register.  Otherwise return NULL_RTX.  */
13033
13034 static rtx
13035 neon_vdup_constant (rtx vals)
13036 {
13037   machine_mode mode = GET_MODE (vals);
13038   machine_mode inner_mode = GET_MODE_INNER (mode);
13039   rtx x;
13040
13041   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13042     return NULL_RTX;
13043
13044   if (!const_vec_duplicate_p (vals, &x))
13045     /* The elements are not all the same.  We could handle repeating
13046        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13047        {0, C, 0, C, 0, C, 0, C} which can be loaded using
13048        vdup.i16).  */
13049     return NULL_RTX;
13050
13051   /* We can load this constant by using VDUP and a constant in a
13052      single ARM register.  This will be cheaper than a vector
13053      load.  */
13054
13055   x = copy_to_mode_reg (inner_mode, x);
13056   return gen_vec_duplicate (mode, x);
13057 }
13058
13059 /* Generate code to load VALS, which is a PARALLEL containing only
13060    constants (for vec_init) or CONST_VECTOR, efficiently into a
13061    register.  Returns an RTX to copy into the register, or NULL_RTX
13062    for a PARALLEL that cannot be converted into a CONST_VECTOR.  */
13063
13064 rtx
13065 neon_make_constant (rtx vals)
13066 {
13067   machine_mode mode = GET_MODE (vals);
13068   rtx target;
13069   rtx const_vec = NULL_RTX;
13070   int n_elts = GET_MODE_NUNITS (mode);
13071   int n_const = 0;
13072   int i;
13073
13074   if (GET_CODE (vals) == CONST_VECTOR)
13075     const_vec = vals;
13076   else if (GET_CODE (vals) == PARALLEL)
13077     {
13078       /* A CONST_VECTOR must contain only CONST_INTs and
13079          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13080          Only store valid constants in a CONST_VECTOR.  */
13081       for (i = 0; i < n_elts; ++i)
13082         {
13083           rtx x = XVECEXP (vals, 0, i);
13084           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13085             n_const++;
13086         }
13087       if (n_const == n_elts)
13088         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13089     }
13090   else
13091     gcc_unreachable ();
13092
13093   if (const_vec != NULL
13094       && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13095     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
13096     return const_vec;
13097   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
13098     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
13099        pipeline cycle; creating the constant takes one or two ARM
13100        pipeline cycles.  */
13101     return target;
13102   else if (const_vec != NULL_RTX)
13103     /* Load from constant pool.  On Cortex-A8 this takes two cycles
13104        (for either double or quad vectors).  We cannot take advantage
13105        of single-cycle VLD1 because we need a PC-relative addressing
13106        mode.  */
13107     return const_vec;
13108   else
13109     /* A PARALLEL containing something not valid inside CONST_VECTOR.
13110        We cannot construct an initializer.  */
13111     return NULL_RTX;
13112 }
13113
13114 /* Initialize vector TARGET to VALS.  */
13115
13116 void
13117 neon_expand_vector_init (rtx target, rtx vals)
13118 {
13119   machine_mode mode = GET_MODE (target);
13120   machine_mode inner_mode = GET_MODE_INNER (mode);
13121   int n_elts = GET_MODE_NUNITS (mode);
13122   int n_var = 0, one_var = -1;
13123   bool all_same = true;
13124   rtx x, mem;
13125   int i;
13126
13127   for (i = 0; i < n_elts; ++i)
13128     {
13129       x = XVECEXP (vals, 0, i);
13130       if (!CONSTANT_P (x))
13131         ++n_var, one_var = i;
13132
13133       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13134         all_same = false;
13135     }
13136
13137   if (n_var == 0)
13138     {
13139       rtx constant = neon_make_constant (vals);
13140       if (constant != NULL_RTX)
13141         {
13142           emit_move_insn (target, constant);
13143           return;
13144         }
13145     }
13146
13147   /* Splat a single non-constant element if we can.  */
13148   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13149     {
13150       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13151       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13152       return;
13153     }
13154
13155   /* One field is non-constant.  Load constant then overwrite varying
13156      field.  This is more efficient than using the stack.  */
13157   if (n_var == 1)
13158     {
13159       rtx copy = copy_rtx (vals);
13160       rtx merge_mask = GEN_INT (1 << one_var);
13161
13162       /* Load constant part of vector, substitute neighboring value for
13163          varying element.  */
13164       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13165       neon_expand_vector_init (target, copy);
13166
13167       /* Insert variable.  */
13168       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13169       emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13170       return;
13171     }
13172
13173   /* Construct the vector in memory one field at a time
13174      and load the whole vector.  */
13175   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13176   for (i = 0; i < n_elts; i++)
13177     emit_move_insn (adjust_address_nv (mem, inner_mode,
13178                                     i * GET_MODE_SIZE (inner_mode)),
13179                     XVECEXP (vals, 0, i));
13180   emit_move_insn (target, mem);
13181 }
13182
13183 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
13184    ERR if it doesn't.  EXP indicates the source location, which includes the
13185    inlining history for intrinsics.  */
13186
13187 static void
13188 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13189               const_tree exp, const char *desc)
13190 {
13191   HOST_WIDE_INT lane;
13192
13193   gcc_assert (CONST_INT_P (operand));
13194
13195   lane = INTVAL (operand);
13196
13197   if (lane < low || lane >= high)
13198     {
13199       if (exp)
13200         error ("%K%s %wd out of range %wd - %wd",
13201                exp, desc, lane, low, high - 1);
13202       else
13203         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13204     }
13205 }
13206
13207 /* Bounds-check lanes.  */
13208
13209 void
13210 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13211                   const_tree exp)
13212 {
13213   bounds_check (operand, low, high, exp, "lane");
13214 }
13215
13216 /* Bounds-check constants.  */
13217
13218 void
13219 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13220 {
13221   bounds_check (operand, low, high, NULL_TREE, "constant");
13222 }
13223
13224 HOST_WIDE_INT
13225 neon_element_bits (machine_mode mode)
13226 {
13227   return GET_MODE_UNIT_BITSIZE (mode);
13228 }
13229
13230 \f
13231 /* Predicates for `match_operand' and `match_operator'.  */
13232
13233 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13234    WB level is 2 if full writeback address modes are allowed, 1
13235    if limited writeback address modes (POST_INC and PRE_DEC) are
13236    allowed and 0 if no writeback at all is supported.  */
13237
13238 int
13239 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13240 {
13241   gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13242   rtx ind;
13243
13244   /* Reject eliminable registers.  */
13245   if (! (reload_in_progress || reload_completed || lra_in_progress)
13246       && (   reg_mentioned_p (frame_pointer_rtx, op)
13247           || reg_mentioned_p (arg_pointer_rtx, op)
13248           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13249           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13250           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13251           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13252     return FALSE;
13253
13254   /* Constants are converted into offsets from labels.  */
13255   if (!MEM_P (op))
13256     return FALSE;
13257
13258   ind = XEXP (op, 0);
13259
13260   if (reload_completed
13261       && (GET_CODE (ind) == LABEL_REF
13262           || (GET_CODE (ind) == CONST
13263               && GET_CODE (XEXP (ind, 0)) == PLUS
13264               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13265               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13266     return TRUE;
13267
13268   /* Match: (mem (reg)).  */
13269   if (REG_P (ind))
13270     return arm_address_register_rtx_p (ind, 0);
13271
13272   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
13273      acceptable in any case (subject to verification by
13274      arm_address_register_rtx_p).  We need full writeback to accept
13275      PRE_INC and POST_DEC, and at least restricted writeback for
13276      PRE_INC and POST_DEC.  */
13277   if (wb_level > 0
13278       && (GET_CODE (ind) == POST_INC
13279           || GET_CODE (ind) == PRE_DEC
13280           || (wb_level > 1
13281               && (GET_CODE (ind) == PRE_INC
13282                   || GET_CODE (ind) == POST_DEC))))
13283     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13284
13285   if (wb_level > 1
13286       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13287       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13288       && GET_CODE (XEXP (ind, 1)) == PLUS
13289       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13290     ind = XEXP (ind, 1);
13291
13292   /* Match:
13293      (plus (reg)
13294            (const))
13295
13296      The encoded immediate for 16-bit modes is multiplied by 2,
13297      while the encoded immediate for 32-bit and 64-bit modes is
13298      multiplied by 4.  */
13299   int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13300   if (GET_CODE (ind) == PLUS
13301       && REG_P (XEXP (ind, 0))
13302       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13303       && CONST_INT_P (XEXP (ind, 1))
13304       && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13305       && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13306     return TRUE;
13307
13308   return FALSE;
13309 }
13310
13311 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13312    WB is true if full writeback address modes are allowed and is false
13313    if limited writeback address modes (POST_INC and PRE_DEC) are
13314    allowed.  */
13315
13316 int arm_coproc_mem_operand (rtx op, bool wb)
13317 {
13318   return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13319 }
13320
13321 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13322    context in which no writeback address modes are allowed.  */
13323
13324 int
13325 arm_coproc_mem_operand_no_writeback (rtx op)
13326 {
13327   return arm_coproc_mem_operand_wb (op, 0);
13328 }
13329
13330 /* This function returns TRUE on matching mode and op.
13331 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13332 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13).  */
13333 int
13334 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13335 {
13336   enum rtx_code code;
13337   int val, reg_no;
13338
13339   /* Match: (mem (reg)).  */
13340   if (REG_P (op))
13341     {
13342       int reg_no = REGNO (op);
13343       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13344                ? reg_no <= LAST_LO_REGNUM
13345                :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13346               || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13347     }
13348   code = GET_CODE (op);
13349
13350   if (code == POST_INC || code == PRE_DEC
13351       || code == PRE_INC || code == POST_DEC)
13352     {
13353       reg_no = REGNO (XEXP (op, 0));
13354       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13355                ? reg_no <= LAST_LO_REGNUM
13356                :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13357               || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13358     }
13359   else if ((code == POST_MODIFY || code == PRE_MODIFY)
13360            && GET_CODE (XEXP (op, 1)) == PLUS && REG_P (XEXP (XEXP (op, 1), 1)))
13361     {
13362       reg_no = REGNO (XEXP (op, 0));
13363       val = INTVAL (XEXP ( XEXP (op, 1), 1));
13364       switch (mode)
13365         {
13366           case E_V16QImode:
13367             if (abs (val) <= 127)
13368               return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13369                       || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13370             return FALSE;
13371           case E_V8HImode:
13372           case E_V8HFmode:
13373             if (abs (val) <= 255)
13374               return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13375                       || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13376             return FALSE;
13377           case E_V8QImode:
13378           case E_V4QImode:
13379             if (abs (val) <= 127)
13380               return (reg_no <= LAST_LO_REGNUM
13381                       || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13382             return FALSE;
13383           case E_V4HImode:
13384           case E_V4HFmode:
13385             if (val % 2 == 0 && abs (val) <= 254)
13386               return (reg_no <= LAST_LO_REGNUM
13387                       || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13388             return FALSE;
13389           case E_V4SImode:
13390           case E_V4SFmode:
13391             if (val % 4 == 0 && abs (val) <= 508)
13392               return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13393                       || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13394             return FALSE;
13395           case E_V2DImode:
13396           case E_V2DFmode:
13397           case E_TImode:
13398             if (val % 4 == 0 && val >= 0 && val <= 1020)
13399               return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13400                       || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13401             return FALSE;
13402           default:
13403             return FALSE;
13404         }
13405     }
13406   return FALSE;
13407 }
13408
13409 /* Return TRUE if OP is a memory operand which we can load or store a vector
13410    to/from. TYPE is one of the following values:
13411     0 - Vector load/stor (vldr)
13412     1 - Core registers (ldm)
13413     2 - Element/structure loads (vld1)
13414  */
13415 int
13416 neon_vector_mem_operand (rtx op, int type, bool strict)
13417 {
13418   rtx ind;
13419
13420   /* Reject eliminable registers.  */
13421   if (strict && ! (reload_in_progress || reload_completed)
13422       && (reg_mentioned_p (frame_pointer_rtx, op)
13423           || reg_mentioned_p (arg_pointer_rtx, op)
13424           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13425           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13426           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13427           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13428     return FALSE;
13429
13430   /* Constants are converted into offsets from labels.  */
13431   if (!MEM_P (op))
13432     return FALSE;
13433
13434   ind = XEXP (op, 0);
13435
13436   if (reload_completed
13437       && (GET_CODE (ind) == LABEL_REF
13438           || (GET_CODE (ind) == CONST
13439               && GET_CODE (XEXP (ind, 0)) == PLUS
13440               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13441               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13442     return TRUE;
13443
13444   /* Match: (mem (reg)).  */
13445   if (REG_P (ind))
13446     return arm_address_register_rtx_p (ind, 0);
13447
13448   /* Allow post-increment with Neon registers.  */
13449   if ((type != 1 && GET_CODE (ind) == POST_INC)
13450       || (type == 0 && GET_CODE (ind) == PRE_DEC))
13451     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13452
13453   /* Allow post-increment by register for VLDn */
13454   if (type == 2 && GET_CODE (ind) == POST_MODIFY
13455       && GET_CODE (XEXP (ind, 1)) == PLUS
13456       && REG_P (XEXP (XEXP (ind, 1), 1)))
13457      return true;
13458
13459   /* Match:
13460      (plus (reg)
13461           (const)).  */
13462   if (type == 0
13463       && GET_CODE (ind) == PLUS
13464       && REG_P (XEXP (ind, 0))
13465       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13466       && CONST_INT_P (XEXP (ind, 1))
13467       && INTVAL (XEXP (ind, 1)) > -1024
13468       /* For quad modes, we restrict the constant offset to be slightly less
13469          than what the instruction format permits.  We have no such constraint
13470          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
13471       && (INTVAL (XEXP (ind, 1))
13472           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13473       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13474     return TRUE;
13475
13476   return FALSE;
13477 }
13478
13479 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13480    type.  */
13481 int
13482 neon_struct_mem_operand (rtx op)
13483 {
13484   rtx ind;
13485
13486   /* Reject eliminable registers.  */
13487   if (! (reload_in_progress || reload_completed)
13488       && (   reg_mentioned_p (frame_pointer_rtx, op)
13489           || reg_mentioned_p (arg_pointer_rtx, op)
13490           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13491           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13492           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13493           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13494     return FALSE;
13495
13496   /* Constants are converted into offsets from labels.  */
13497   if (!MEM_P (op))
13498     return FALSE;
13499
13500   ind = XEXP (op, 0);
13501
13502   if (reload_completed
13503       && (GET_CODE (ind) == LABEL_REF
13504           || (GET_CODE (ind) == CONST
13505               && GET_CODE (XEXP (ind, 0)) == PLUS
13506               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13507               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13508     return TRUE;
13509
13510   /* Match: (mem (reg)).  */
13511   if (REG_P (ind))
13512     return arm_address_register_rtx_p (ind, 0);
13513
13514   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13515   if (GET_CODE (ind) == POST_INC
13516       || GET_CODE (ind) == PRE_DEC)
13517     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13518
13519   return FALSE;
13520 }
13521
13522 /* Prepares the operands for the VCMLA by lane instruction such that the right
13523    register number is selected.  This instruction is special in that it always
13524    requires a D register, however there is a choice to be made between Dn[0],
13525    Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13526
13527    The VCMLA by lane function always selects two values. For instance given D0
13528    and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13529    used by the instruction.  However given V4SF then index 0 and 1 are valid as
13530    D0[0] or D1[0] are both valid.
13531
13532    This function centralizes that information based on OPERANDS, OPERANDS[3]
13533    will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13534    updated to contain the right index.  */
13535
13536 rtx *
13537 neon_vcmla_lane_prepare_operands (rtx *operands)
13538 {
13539   int lane = INTVAL (operands[4]);
13540   machine_mode constmode = SImode;
13541   machine_mode mode = GET_MODE (operands[3]);
13542   int regno = REGNO (operands[3]);
13543   regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13544   if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13545     {
13546       operands[3] = gen_int_mode (regno + 1, constmode);
13547       operands[4]
13548         = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13549     }
13550   else
13551     {
13552       operands[3] = gen_int_mode (regno, constmode);
13553       operands[4] = gen_int_mode (lane, constmode);
13554     }
13555   return operands;
13556 }
13557
13558
13559 /* Return true if X is a register that will be eliminated later on.  */
13560 int
13561 arm_eliminable_register (rtx x)
13562 {
13563   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13564                        || REGNO (x) == ARG_POINTER_REGNUM
13565                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13566                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13567 }
13568
13569 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13570    coprocessor registers.  Otherwise return NO_REGS.  */
13571
13572 enum reg_class
13573 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13574 {
13575   if (mode == HFmode)
13576     {
13577       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13578         return GENERAL_REGS;
13579       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13580         return NO_REGS;
13581       return GENERAL_REGS;
13582     }
13583
13584   /* The neon move patterns handle all legitimate vector and struct
13585      addresses.  */
13586   if (TARGET_NEON
13587       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13588       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13589           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13590           || VALID_NEON_STRUCT_MODE (mode)))
13591     return NO_REGS;
13592
13593   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13594     return NO_REGS;
13595
13596   return GENERAL_REGS;
13597 }
13598
13599 /* Values which must be returned in the most-significant end of the return
13600    register.  */
13601
13602 static bool
13603 arm_return_in_msb (const_tree valtype)
13604 {
13605   return (TARGET_AAPCS_BASED
13606           && BYTES_BIG_ENDIAN
13607           && (AGGREGATE_TYPE_P (valtype)
13608               || TREE_CODE (valtype) == COMPLEX_TYPE
13609               || FIXED_POINT_TYPE_P (valtype)));
13610 }
13611
13612 /* Return TRUE if X references a SYMBOL_REF.  */
13613 int
13614 symbol_mentioned_p (rtx x)
13615 {
13616   const char * fmt;
13617   int i;
13618
13619   if (GET_CODE (x) == SYMBOL_REF)
13620     return 1;
13621
13622   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13623      are constant offsets, not symbols.  */
13624   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13625     return 0;
13626
13627   fmt = GET_RTX_FORMAT (GET_CODE (x));
13628
13629   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13630     {
13631       if (fmt[i] == 'E')
13632         {
13633           int j;
13634
13635           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13636             if (symbol_mentioned_p (XVECEXP (x, i, j)))
13637               return 1;
13638         }
13639       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13640         return 1;
13641     }
13642
13643   return 0;
13644 }
13645
13646 /* Return TRUE if X references a LABEL_REF.  */
13647 int
13648 label_mentioned_p (rtx x)
13649 {
13650   const char * fmt;
13651   int i;
13652
13653   if (GET_CODE (x) == LABEL_REF)
13654     return 1;
13655
13656   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13657      instruction, but they are constant offsets, not symbols.  */
13658   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13659     return 0;
13660
13661   fmt = GET_RTX_FORMAT (GET_CODE (x));
13662   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13663     {
13664       if (fmt[i] == 'E')
13665         {
13666           int j;
13667
13668           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13669             if (label_mentioned_p (XVECEXP (x, i, j)))
13670               return 1;
13671         }
13672       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13673         return 1;
13674     }
13675
13676   return 0;
13677 }
13678
13679 int
13680 tls_mentioned_p (rtx x)
13681 {
13682   switch (GET_CODE (x))
13683     {
13684     case CONST:
13685       return tls_mentioned_p (XEXP (x, 0));
13686
13687     case UNSPEC:
13688       if (XINT (x, 1) == UNSPEC_TLS)
13689         return 1;
13690
13691     /* Fall through.  */
13692     default:
13693       return 0;
13694     }
13695 }
13696
13697 /* Must not copy any rtx that uses a pc-relative address.
13698    Also, disallow copying of load-exclusive instructions that
13699    may appear after splitting of compare-and-swap-style operations
13700    so as to prevent those loops from being transformed away from their
13701    canonical forms (see PR 69904).  */
13702
13703 static bool
13704 arm_cannot_copy_insn_p (rtx_insn *insn)
13705 {
13706   /* The tls call insn cannot be copied, as it is paired with a data
13707      word.  */
13708   if (recog_memoized (insn) == CODE_FOR_tlscall)
13709     return true;
13710
13711   subrtx_iterator::array_type array;
13712   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13713     {
13714       const_rtx x = *iter;
13715       if (GET_CODE (x) == UNSPEC
13716           && (XINT (x, 1) == UNSPEC_PIC_BASE
13717               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13718         return true;
13719     }
13720
13721   rtx set = single_set (insn);
13722   if (set)
13723     {
13724       rtx src = SET_SRC (set);
13725       if (GET_CODE (src) == ZERO_EXTEND)
13726         src = XEXP (src, 0);
13727
13728       /* Catch the load-exclusive and load-acquire operations.  */
13729       if (GET_CODE (src) == UNSPEC_VOLATILE
13730           && (XINT (src, 1) == VUNSPEC_LL
13731               || XINT (src, 1) == VUNSPEC_LAX))
13732         return true;
13733     }
13734   return false;
13735 }
13736
13737 enum rtx_code
13738 minmax_code (rtx x)
13739 {
13740   enum rtx_code code = GET_CODE (x);
13741
13742   switch (code)
13743     {
13744     case SMAX:
13745       return GE;
13746     case SMIN:
13747       return LE;
13748     case UMIN:
13749       return LEU;
13750     case UMAX:
13751       return GEU;
13752     default:
13753       gcc_unreachable ();
13754     }
13755 }
13756
13757 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
13758
13759 bool
13760 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13761                         int *mask, bool *signed_sat)
13762 {
13763   /* The high bound must be a power of two minus one.  */
13764   int log = exact_log2 (INTVAL (hi_bound) + 1);
13765   if (log == -1)
13766     return false;
13767
13768   /* The low bound is either zero (for usat) or one less than the
13769      negation of the high bound (for ssat).  */
13770   if (INTVAL (lo_bound) == 0)
13771     {
13772       if (mask)
13773         *mask = log;
13774       if (signed_sat)
13775         *signed_sat = false;
13776
13777       return true;
13778     }
13779
13780   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13781     {
13782       if (mask)
13783         *mask = log + 1;
13784       if (signed_sat)
13785         *signed_sat = true;
13786
13787       return true;
13788     }
13789
13790   return false;
13791 }
13792
13793 /* Return 1 if memory locations are adjacent.  */
13794 int
13795 adjacent_mem_locations (rtx a, rtx b)
13796 {
13797   /* We don't guarantee to preserve the order of these memory refs.  */
13798   if (volatile_refs_p (a) || volatile_refs_p (b))
13799     return 0;
13800
13801   if ((REG_P (XEXP (a, 0))
13802        || (GET_CODE (XEXP (a, 0)) == PLUS
13803            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13804       && (REG_P (XEXP (b, 0))
13805           || (GET_CODE (XEXP (b, 0)) == PLUS
13806               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13807     {
13808       HOST_WIDE_INT val0 = 0, val1 = 0;
13809       rtx reg0, reg1;
13810       int val_diff;
13811
13812       if (GET_CODE (XEXP (a, 0)) == PLUS)
13813         {
13814           reg0 = XEXP (XEXP (a, 0), 0);
13815           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13816         }
13817       else
13818         reg0 = XEXP (a, 0);
13819
13820       if (GET_CODE (XEXP (b, 0)) == PLUS)
13821         {
13822           reg1 = XEXP (XEXP (b, 0), 0);
13823           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13824         }
13825       else
13826         reg1 = XEXP (b, 0);
13827
13828       /* Don't accept any offset that will require multiple
13829          instructions to handle, since this would cause the
13830          arith_adjacentmem pattern to output an overlong sequence.  */
13831       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13832         return 0;
13833
13834       /* Don't allow an eliminable register: register elimination can make
13835          the offset too large.  */
13836       if (arm_eliminable_register (reg0))
13837         return 0;
13838
13839       val_diff = val1 - val0;
13840
13841       if (arm_ld_sched)
13842         {
13843           /* If the target has load delay slots, then there's no benefit
13844              to using an ldm instruction unless the offset is zero and
13845              we are optimizing for size.  */
13846           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13847                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13848                   && (val_diff == 4 || val_diff == -4));
13849         }
13850
13851       return ((REGNO (reg0) == REGNO (reg1))
13852               && (val_diff == 4 || val_diff == -4));
13853     }
13854
13855   return 0;
13856 }
13857
13858 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
13859    for load operations, false for store operations.  CONSECUTIVE is true
13860    if the register numbers in the operation must be consecutive in the register
13861    bank. RETURN_PC is true if value is to be loaded in PC.
13862    The pattern we are trying to match for load is:
13863      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13864       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13865        :
13866        :
13867       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13868      ]
13869      where
13870      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13871      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13872      3.  If consecutive is TRUE, then for kth register being loaded,
13873          REGNO (R_dk) = REGNO (R_d0) + k.
13874    The pattern for store is similar.  */
13875 bool
13876 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13877                      bool consecutive, bool return_pc)
13878 {
13879   HOST_WIDE_INT count = XVECLEN (op, 0);
13880   rtx reg, mem, addr;
13881   unsigned regno;
13882   unsigned first_regno;
13883   HOST_WIDE_INT i = 1, base = 0, offset = 0;
13884   rtx elt;
13885   bool addr_reg_in_reglist = false;
13886   bool update = false;
13887   int reg_increment;
13888   int offset_adj;
13889   int regs_per_val;
13890
13891   /* If not in SImode, then registers must be consecutive
13892      (e.g., VLDM instructions for DFmode).  */
13893   gcc_assert ((mode == SImode) || consecutive);
13894   /* Setting return_pc for stores is illegal.  */
13895   gcc_assert (!return_pc || load);
13896
13897   /* Set up the increments and the regs per val based on the mode.  */
13898   reg_increment = GET_MODE_SIZE (mode);
13899   regs_per_val = reg_increment / 4;
13900   offset_adj = return_pc ? 1 : 0;
13901
13902   if (count <= 1
13903       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13904       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13905     return false;
13906
13907   /* Check if this is a write-back.  */
13908   elt = XVECEXP (op, 0, offset_adj);
13909   if (GET_CODE (SET_SRC (elt)) == PLUS)
13910     {
13911       i++;
13912       base = 1;
13913       update = true;
13914
13915       /* The offset adjustment must be the number of registers being
13916          popped times the size of a single register.  */
13917       if (!REG_P (SET_DEST (elt))
13918           || !REG_P (XEXP (SET_SRC (elt), 0))
13919           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13920           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13921           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13922              ((count - 1 - offset_adj) * reg_increment))
13923         return false;
13924     }
13925
13926   i = i + offset_adj;
13927   base = base + offset_adj;
13928   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13929      success depends on the type: VLDM can do just one reg,
13930      LDM must do at least two.  */
13931   if ((count <= i) && (mode == SImode))
13932       return false;
13933
13934   elt = XVECEXP (op, 0, i - 1);
13935   if (GET_CODE (elt) != SET)
13936     return false;
13937
13938   if (load)
13939     {
13940       reg = SET_DEST (elt);
13941       mem = SET_SRC (elt);
13942     }
13943   else
13944     {
13945       reg = SET_SRC (elt);
13946       mem = SET_DEST (elt);
13947     }
13948
13949   if (!REG_P (reg) || !MEM_P (mem))
13950     return false;
13951
13952   regno = REGNO (reg);
13953   first_regno = regno;
13954   addr = XEXP (mem, 0);
13955   if (GET_CODE (addr) == PLUS)
13956     {
13957       if (!CONST_INT_P (XEXP (addr, 1)))
13958         return false;
13959
13960       offset = INTVAL (XEXP (addr, 1));
13961       addr = XEXP (addr, 0);
13962     }
13963
13964   if (!REG_P (addr))
13965     return false;
13966
13967   /* Don't allow SP to be loaded unless it is also the base register. It
13968      guarantees that SP is reset correctly when an LDM instruction
13969      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13970   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13971     return false;
13972
13973   if (regno == REGNO (addr))
13974     addr_reg_in_reglist = true;
13975
13976   for (; i < count; i++)
13977     {
13978       elt = XVECEXP (op, 0, i);
13979       if (GET_CODE (elt) != SET)
13980         return false;
13981
13982       if (load)
13983         {
13984           reg = SET_DEST (elt);
13985           mem = SET_SRC (elt);
13986         }
13987       else
13988         {
13989           reg = SET_SRC (elt);
13990           mem = SET_DEST (elt);
13991         }
13992
13993       if (!REG_P (reg)
13994           || GET_MODE (reg) != mode
13995           || REGNO (reg) <= regno
13996           || (consecutive
13997               && (REGNO (reg) !=
13998                   (unsigned int) (first_regno + regs_per_val * (i - base))))
13999           /* Don't allow SP to be loaded unless it is also the base register. It
14000              guarantees that SP is reset correctly when an LDM instruction
14001              is interrupted. Otherwise, we might end up with a corrupt stack.  */
14002           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14003           || !MEM_P (mem)
14004           || GET_MODE (mem) != mode
14005           || ((GET_CODE (XEXP (mem, 0)) != PLUS
14006                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14007                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14008                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14009                    offset + (i - base) * reg_increment))
14010               && (!REG_P (XEXP (mem, 0))
14011                   || offset + (i - base) * reg_increment != 0)))
14012         return false;
14013
14014       regno = REGNO (reg);
14015       if (regno == REGNO (addr))
14016         addr_reg_in_reglist = true;
14017     }
14018
14019   if (load)
14020     {
14021       if (update && addr_reg_in_reglist)
14022         return false;
14023
14024       /* For Thumb-1, address register is always modified - either by write-back
14025          or by explicit load.  If the pattern does not describe an update,
14026          then the address register must be in the list of loaded registers.  */
14027       if (TARGET_THUMB1)
14028         return update || addr_reg_in_reglist;
14029     }
14030
14031   return true;
14032 }
14033
14034 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14035    or VSCCLRM (otherwise) insn.  To be a valid CLRM pattern, OP must have the
14036    following form:
14037
14038    [(set (reg:SI <N>) (const_int 0))
14039     (set (reg:SI <M>) (const_int 0))
14040     ...
14041     (unspec_volatile [(const_int 0)]
14042                      VUNSPEC_CLRM_APSR)
14043     (clobber (reg:CC CC_REGNUM))
14044    ]
14045
14046    Any number (including 0) of set expressions is valid, the volatile unspec is
14047    optional.  All registers but SP and PC are allowed and registers must be in
14048    strict increasing order.
14049
14050    To be a valid VSCCLRM pattern, OP must have the following form:
14051
14052    [(unspec_volatile [(const_int 0)]
14053                      VUNSPEC_VSCCLRM_VPR)
14054     (set (reg:SF <N>) (const_int 0))
14055     (set (reg:SF <M>) (const_int 0))
14056     ...
14057    ]
14058
14059    As with CLRM, any number (including 0) of set expressions is valid, however
14060    the volatile unspec is mandatory here.  Any VFP single-precision register is
14061    accepted but all registers must be consecutive and in increasing order.  */
14062
14063 bool
14064 clear_operation_p (rtx op, bool vfp)
14065 {
14066   unsigned regno;
14067   unsigned last_regno = INVALID_REGNUM;
14068   rtx elt, reg, zero;
14069   int count = XVECLEN (op, 0);
14070   int first_set = vfp ? 1 : 0;
14071   machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14072
14073   for (int i = first_set; i < count; i++)
14074     {
14075       elt = XVECEXP (op, 0, i);
14076
14077       if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14078         {
14079           if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14080               || XVECLEN (elt, 0) != 1
14081               || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14082               || i != count - 2)
14083             return false;
14084
14085           continue;
14086         }
14087
14088       if (GET_CODE (elt) == CLOBBER)
14089         continue;
14090
14091       if (GET_CODE (elt) != SET)
14092         return false;
14093
14094       reg = SET_DEST (elt);
14095       zero = SET_SRC (elt);
14096
14097       if (!REG_P (reg)
14098           || GET_MODE (reg) != expected_mode
14099           || zero != CONST0_RTX (SImode))
14100         return false;
14101
14102       regno = REGNO (reg);
14103
14104       if (vfp)
14105         {
14106           if (i != first_set && regno != last_regno + 1)
14107             return false;
14108         }
14109       else
14110         {
14111           if (regno == SP_REGNUM || regno == PC_REGNUM)
14112             return false;
14113           if (i != first_set && regno <= last_regno)
14114             return false;
14115         }
14116
14117       last_regno = regno;
14118     }
14119
14120   return true;
14121 }
14122
14123 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14124    or stores (depending on IS_STORE) into a load-multiple or store-multiple
14125    instruction.  ADD_OFFSET is nonzero if the base address register needs
14126    to be modified with an add instruction before we can use it.  */
14127
14128 static bool
14129 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14130                                  int nops, HOST_WIDE_INT add_offset)
14131  {
14132   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14133      if the offset isn't small enough.  The reason 2 ldrs are faster
14134      is because these ARMs are able to do more than one cache access
14135      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
14136      whilst the ARM8 has a double bandwidth cache.  This means that
14137      these cores can do both an instruction fetch and a data fetch in
14138      a single cycle, so the trick of calculating the address into a
14139      scratch register (one of the result regs) and then doing a load
14140      multiple actually becomes slower (and no smaller in code size).
14141      That is the transformation
14142
14143         ldr     rd1, [rbase + offset]
14144         ldr     rd2, [rbase + offset + 4]
14145
14146      to
14147
14148         add     rd1, rbase, offset
14149         ldmia   rd1, {rd1, rd2}
14150
14151      produces worse code -- '3 cycles + any stalls on rd2' instead of
14152      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
14153      access per cycle, the first sequence could never complete in less
14154      than 6 cycles, whereas the ldm sequence would only take 5 and
14155      would make better use of sequential accesses if not hitting the
14156      cache.
14157
14158      We cheat here and test 'arm_ld_sched' which we currently know to
14159      only be true for the ARM8, ARM9 and StrongARM.  If this ever
14160      changes, then the test below needs to be reworked.  */
14161   if (nops == 2 && arm_ld_sched && add_offset != 0)
14162     return false;
14163
14164   /* XScale has load-store double instructions, but they have stricter
14165      alignment requirements than load-store multiple, so we cannot
14166      use them.
14167
14168      For XScale ldm requires 2 + NREGS cycles to complete and blocks
14169      the pipeline until completion.
14170
14171         NREGS           CYCLES
14172           1               3
14173           2               4
14174           3               5
14175           4               6
14176
14177      An ldr instruction takes 1-3 cycles, but does not block the
14178      pipeline.
14179
14180         NREGS           CYCLES
14181           1              1-3
14182           2              2-6
14183           3              3-9
14184           4              4-12
14185
14186      Best case ldr will always win.  However, the more ldr instructions
14187      we issue, the less likely we are to be able to schedule them well.
14188      Using ldr instructions also increases code size.
14189
14190      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14191      for counts of 3 or 4 regs.  */
14192   if (nops <= 2 && arm_tune_xscale && !optimize_size)
14193     return false;
14194   return true;
14195 }
14196
14197 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14198    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14199    an array ORDER which describes the sequence to use when accessing the
14200    offsets that produces an ascending order.  In this sequence, each
14201    offset must be larger by exactly 4 than the previous one.  ORDER[0]
14202    must have been filled in with the lowest offset by the caller.
14203    If UNSORTED_REGS is nonnull, it is an array of register numbers that
14204    we use to verify that ORDER produces an ascending order of registers.
14205    Return true if it was possible to construct such an order, false if
14206    not.  */
14207
14208 static bool
14209 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14210                       int *unsorted_regs)
14211 {
14212   int i;
14213   for (i = 1; i < nops; i++)
14214     {
14215       int j;
14216
14217       order[i] = order[i - 1];
14218       for (j = 0; j < nops; j++)
14219         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14220           {
14221             /* We must find exactly one offset that is higher than the
14222                previous one by 4.  */
14223             if (order[i] != order[i - 1])
14224               return false;
14225             order[i] = j;
14226           }
14227       if (order[i] == order[i - 1])
14228         return false;
14229       /* The register numbers must be ascending.  */
14230       if (unsorted_regs != NULL
14231           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14232         return false;
14233     }
14234   return true;
14235 }
14236
14237 /* Used to determine in a peephole whether a sequence of load
14238    instructions can be changed into a load-multiple instruction.
14239    NOPS is the number of separate load instructions we are examining.  The
14240    first NOPS entries in OPERANDS are the destination registers, the
14241    next NOPS entries are memory operands.  If this function is
14242    successful, *BASE is set to the common base register of the memory
14243    accesses; *LOAD_OFFSET is set to the first memory location's offset
14244    from that base register.
14245    REGS is an array filled in with the destination register numbers.
14246    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14247    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
14248    the sequence of registers in REGS matches the loads from ascending memory
14249    locations, and the function verifies that the register numbers are
14250    themselves ascending.  If CHECK_REGS is false, the register numbers
14251    are stored in the order they are found in the operands.  */
14252 static int
14253 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14254                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14255 {
14256   int unsorted_regs[MAX_LDM_STM_OPS];
14257   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14258   int order[MAX_LDM_STM_OPS];
14259   int base_reg = -1;
14260   int i, ldm_case;
14261
14262   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14263      easily extended if required.  */
14264   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14265
14266   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14267
14268   /* Loop over the operands and check that the memory references are
14269      suitable (i.e. immediate offsets from the same base register).  At
14270      the same time, extract the target register, and the memory
14271      offsets.  */
14272   for (i = 0; i < nops; i++)
14273     {
14274       rtx reg;
14275       rtx offset;
14276
14277       /* Convert a subreg of a mem into the mem itself.  */
14278       if (GET_CODE (operands[nops + i]) == SUBREG)
14279         operands[nops + i] = alter_subreg (operands + (nops + i), true);
14280
14281       gcc_assert (MEM_P (operands[nops + i]));
14282
14283       /* Don't reorder volatile memory references; it doesn't seem worth
14284          looking for the case where the order is ok anyway.  */
14285       if (MEM_VOLATILE_P (operands[nops + i]))
14286         return 0;
14287
14288       offset = const0_rtx;
14289
14290       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14291            || (GET_CODE (reg) == SUBREG
14292                && REG_P (reg = SUBREG_REG (reg))))
14293           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14294               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14295                   || (GET_CODE (reg) == SUBREG
14296                       && REG_P (reg = SUBREG_REG (reg))))
14297               && (CONST_INT_P (offset
14298                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
14299         {
14300           if (i == 0)
14301             {
14302               base_reg = REGNO (reg);
14303               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14304                 return 0;
14305             }
14306           else if (base_reg != (int) REGNO (reg))
14307             /* Not addressed from the same base register.  */
14308             return 0;
14309
14310           unsorted_regs[i] = (REG_P (operands[i])
14311                               ? REGNO (operands[i])
14312                               : REGNO (SUBREG_REG (operands[i])));
14313
14314           /* If it isn't an integer register, or if it overwrites the
14315              base register but isn't the last insn in the list, then
14316              we can't do this.  */
14317           if (unsorted_regs[i] < 0
14318               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14319               || unsorted_regs[i] > 14
14320               || (i != nops - 1 && unsorted_regs[i] == base_reg))
14321             return 0;
14322
14323           /* Don't allow SP to be loaded unless it is also the base
14324              register.  It guarantees that SP is reset correctly when
14325              an LDM instruction is interrupted.  Otherwise, we might
14326              end up with a corrupt stack.  */
14327           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14328             return 0;
14329
14330           unsorted_offsets[i] = INTVAL (offset);
14331           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14332             order[0] = i;
14333         }
14334       else
14335         /* Not a suitable memory address.  */
14336         return 0;
14337     }
14338
14339   /* All the useful information has now been extracted from the
14340      operands into unsorted_regs and unsorted_offsets; additionally,
14341      order[0] has been set to the lowest offset in the list.  Sort
14342      the offsets into order, verifying that they are adjacent, and
14343      check that the register numbers are ascending.  */
14344   if (!compute_offset_order (nops, unsorted_offsets, order,
14345                              check_regs ? unsorted_regs : NULL))
14346     return 0;
14347
14348   if (saved_order)
14349     memcpy (saved_order, order, sizeof order);
14350
14351   if (base)
14352     {
14353       *base = base_reg;
14354
14355       for (i = 0; i < nops; i++)
14356         regs[i] = unsorted_regs[check_regs ? order[i] : i];
14357
14358       *load_offset = unsorted_offsets[order[0]];
14359     }
14360
14361   if (unsorted_offsets[order[0]] == 0)
14362     ldm_case = 1; /* ldmia */
14363   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14364     ldm_case = 2; /* ldmib */
14365   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14366     ldm_case = 3; /* ldmda */
14367   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14368     ldm_case = 4; /* ldmdb */
14369   else if (const_ok_for_arm (unsorted_offsets[order[0]])
14370            || const_ok_for_arm (-unsorted_offsets[order[0]]))
14371     ldm_case = 5;
14372   else
14373     return 0;
14374
14375   if (!multiple_operation_profitable_p (false, nops,
14376                                         ldm_case == 5
14377                                         ? unsorted_offsets[order[0]] : 0))
14378     return 0;
14379
14380   return ldm_case;
14381 }
14382
14383 /* Used to determine in a peephole whether a sequence of store instructions can
14384    be changed into a store-multiple instruction.
14385    NOPS is the number of separate store instructions we are examining.
14386    NOPS_TOTAL is the total number of instructions recognized by the peephole
14387    pattern.
14388    The first NOPS entries in OPERANDS are the source registers, the next
14389    NOPS entries are memory operands.  If this function is successful, *BASE is
14390    set to the common base register of the memory accesses; *LOAD_OFFSET is set
14391    to the first memory location's offset from that base register.  REGS is an
14392    array filled in with the source register numbers, REG_RTXS (if nonnull) is
14393    likewise filled with the corresponding rtx's.
14394    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14395    numbers to an ascending order of stores.
14396    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14397    from ascending memory locations, and the function verifies that the register
14398    numbers are themselves ascending.  If CHECK_REGS is false, the register
14399    numbers are stored in the order they are found in the operands.  */
14400 static int
14401 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14402                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14403                          HOST_WIDE_INT *load_offset, bool check_regs)
14404 {
14405   int unsorted_regs[MAX_LDM_STM_OPS];
14406   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14407   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14408   int order[MAX_LDM_STM_OPS];
14409   int base_reg = -1;
14410   rtx base_reg_rtx = NULL;
14411   int i, stm_case;
14412
14413   /* Write back of base register is currently only supported for Thumb 1.  */
14414   int base_writeback = TARGET_THUMB1;
14415
14416   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14417      easily extended if required.  */
14418   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14419
14420   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14421
14422   /* Loop over the operands and check that the memory references are
14423      suitable (i.e. immediate offsets from the same base register).  At
14424      the same time, extract the target register, and the memory
14425      offsets.  */
14426   for (i = 0; i < nops; i++)
14427     {
14428       rtx reg;
14429       rtx offset;
14430
14431       /* Convert a subreg of a mem into the mem itself.  */
14432       if (GET_CODE (operands[nops + i]) == SUBREG)
14433         operands[nops + i] = alter_subreg (operands + (nops + i), true);
14434
14435       gcc_assert (MEM_P (operands[nops + i]));
14436
14437       /* Don't reorder volatile memory references; it doesn't seem worth
14438          looking for the case where the order is ok anyway.  */
14439       if (MEM_VOLATILE_P (operands[nops + i]))
14440         return 0;
14441
14442       offset = const0_rtx;
14443
14444       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14445            || (GET_CODE (reg) == SUBREG
14446                && REG_P (reg = SUBREG_REG (reg))))
14447           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14448               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14449                   || (GET_CODE (reg) == SUBREG
14450                       && REG_P (reg = SUBREG_REG (reg))))
14451               && (CONST_INT_P (offset
14452                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
14453         {
14454           unsorted_reg_rtxs[i] = (REG_P (operands[i])
14455                                   ? operands[i] : SUBREG_REG (operands[i]));
14456           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14457
14458           if (i == 0)
14459             {
14460               base_reg = REGNO (reg);
14461               base_reg_rtx = reg;
14462               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14463                 return 0;
14464             }
14465           else if (base_reg != (int) REGNO (reg))
14466             /* Not addressed from the same base register.  */
14467             return 0;
14468
14469           /* If it isn't an integer register, then we can't do this.  */
14470           if (unsorted_regs[i] < 0
14471               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14472               /* The effects are unpredictable if the base register is
14473                  both updated and stored.  */
14474               || (base_writeback && unsorted_regs[i] == base_reg)
14475               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14476               || unsorted_regs[i] > 14)
14477             return 0;
14478
14479           unsorted_offsets[i] = INTVAL (offset);
14480           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14481             order[0] = i;
14482         }
14483       else
14484         /* Not a suitable memory address.  */
14485         return 0;
14486     }
14487
14488   /* All the useful information has now been extracted from the
14489      operands into unsorted_regs and unsorted_offsets; additionally,
14490      order[0] has been set to the lowest offset in the list.  Sort
14491      the offsets into order, verifying that they are adjacent, and
14492      check that the register numbers are ascending.  */
14493   if (!compute_offset_order (nops, unsorted_offsets, order,
14494                              check_regs ? unsorted_regs : NULL))
14495     return 0;
14496
14497   if (saved_order)
14498     memcpy (saved_order, order, sizeof order);
14499
14500   if (base)
14501     {
14502       *base = base_reg;
14503
14504       for (i = 0; i < nops; i++)
14505         {
14506           regs[i] = unsorted_regs[check_regs ? order[i] : i];
14507           if (reg_rtxs)
14508             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14509         }
14510
14511       *load_offset = unsorted_offsets[order[0]];
14512     }
14513
14514   if (TARGET_THUMB1
14515       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14516     return 0;
14517
14518   if (unsorted_offsets[order[0]] == 0)
14519     stm_case = 1; /* stmia */
14520   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14521     stm_case = 2; /* stmib */
14522   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14523     stm_case = 3; /* stmda */
14524   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14525     stm_case = 4; /* stmdb */
14526   else
14527     return 0;
14528
14529   if (!multiple_operation_profitable_p (false, nops, 0))
14530     return 0;
14531
14532   return stm_case;
14533 }
14534 \f
14535 /* Routines for use in generating RTL.  */
14536
14537 /* Generate a load-multiple instruction.  COUNT is the number of loads in
14538    the instruction; REGS and MEMS are arrays containing the operands.
14539    BASEREG is the base register to be used in addressing the memory operands.
14540    WBACK_OFFSET is nonzero if the instruction should update the base
14541    register.  */
14542
14543 static rtx
14544 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14545                          HOST_WIDE_INT wback_offset)
14546 {
14547   int i = 0, j;
14548   rtx result;
14549
14550   if (!multiple_operation_profitable_p (false, count, 0))
14551     {
14552       rtx seq;
14553
14554       start_sequence ();
14555
14556       for (i = 0; i < count; i++)
14557         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14558
14559       if (wback_offset != 0)
14560         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14561
14562       seq = get_insns ();
14563       end_sequence ();
14564
14565       return seq;
14566     }
14567
14568   result = gen_rtx_PARALLEL (VOIDmode,
14569                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14570   if (wback_offset != 0)
14571     {
14572       XVECEXP (result, 0, 0)
14573         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14574       i = 1;
14575       count++;
14576     }
14577
14578   for (j = 0; i < count; i++, j++)
14579     XVECEXP (result, 0, i)
14580       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14581
14582   return result;
14583 }
14584
14585 /* Generate a store-multiple instruction.  COUNT is the number of stores in
14586    the instruction; REGS and MEMS are arrays containing the operands.
14587    BASEREG is the base register to be used in addressing the memory operands.
14588    WBACK_OFFSET is nonzero if the instruction should update the base
14589    register.  */
14590
14591 static rtx
14592 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14593                           HOST_WIDE_INT wback_offset)
14594 {
14595   int i = 0, j;
14596   rtx result;
14597
14598   if (GET_CODE (basereg) == PLUS)
14599     basereg = XEXP (basereg, 0);
14600
14601   if (!multiple_operation_profitable_p (false, count, 0))
14602     {
14603       rtx seq;
14604
14605       start_sequence ();
14606
14607       for (i = 0; i < count; i++)
14608         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14609
14610       if (wback_offset != 0)
14611         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14612
14613       seq = get_insns ();
14614       end_sequence ();
14615
14616       return seq;
14617     }
14618
14619   result = gen_rtx_PARALLEL (VOIDmode,
14620                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14621   if (wback_offset != 0)
14622     {
14623       XVECEXP (result, 0, 0)
14624         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14625       i = 1;
14626       count++;
14627     }
14628
14629   for (j = 0; i < count; i++, j++)
14630     XVECEXP (result, 0, i)
14631       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14632
14633   return result;
14634 }
14635
14636 /* Generate either a load-multiple or a store-multiple instruction.  This
14637    function can be used in situations where we can start with a single MEM
14638    rtx and adjust its address upwards.
14639    COUNT is the number of operations in the instruction, not counting a
14640    possible update of the base register.  REGS is an array containing the
14641    register operands.
14642    BASEREG is the base register to be used in addressing the memory operands,
14643    which are constructed from BASEMEM.
14644    WRITE_BACK specifies whether the generated instruction should include an
14645    update of the base register.
14646    OFFSETP is used to pass an offset to and from this function; this offset
14647    is not used when constructing the address (instead BASEMEM should have an
14648    appropriate offset in its address), it is used only for setting
14649    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
14650
14651 static rtx
14652 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14653                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14654 {
14655   rtx mems[MAX_LDM_STM_OPS];
14656   HOST_WIDE_INT offset = *offsetp;
14657   int i;
14658
14659   gcc_assert (count <= MAX_LDM_STM_OPS);
14660
14661   if (GET_CODE (basereg) == PLUS)
14662     basereg = XEXP (basereg, 0);
14663
14664   for (i = 0; i < count; i++)
14665     {
14666       rtx addr = plus_constant (Pmode, basereg, i * 4);
14667       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14668       offset += 4;
14669     }
14670
14671   if (write_back)
14672     *offsetp = offset;
14673
14674   if (is_load)
14675     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14676                                     write_back ? 4 * count : 0);
14677   else
14678     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14679                                      write_back ? 4 * count : 0);
14680 }
14681
14682 rtx
14683 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14684                        rtx basemem, HOST_WIDE_INT *offsetp)
14685 {
14686   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14687                               offsetp);
14688 }
14689
14690 rtx
14691 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14692                         rtx basemem, HOST_WIDE_INT *offsetp)
14693 {
14694   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14695                               offsetp);
14696 }
14697
14698 /* Called from a peephole2 expander to turn a sequence of loads into an
14699    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
14700    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
14701    is true if we can reorder the registers because they are used commutatively
14702    subsequently.
14703    Returns true iff we could generate a new instruction.  */
14704
14705 bool
14706 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14707 {
14708   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14709   rtx mems[MAX_LDM_STM_OPS];
14710   int i, j, base_reg;
14711   rtx base_reg_rtx;
14712   HOST_WIDE_INT offset;
14713   int write_back = FALSE;
14714   int ldm_case;
14715   rtx addr;
14716
14717   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14718                                      &base_reg, &offset, !sort_regs);
14719
14720   if (ldm_case == 0)
14721     return false;
14722
14723   if (sort_regs)
14724     for (i = 0; i < nops - 1; i++)
14725       for (j = i + 1; j < nops; j++)
14726         if (regs[i] > regs[j])
14727           {
14728             int t = regs[i];
14729             regs[i] = regs[j];
14730             regs[j] = t;
14731           }
14732   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14733
14734   if (TARGET_THUMB1)
14735     {
14736       gcc_assert (ldm_case == 1 || ldm_case == 5);
14737
14738       /* Thumb-1 ldm uses writeback except if the base is loaded.  */
14739       write_back = true;
14740       for (i = 0; i < nops; i++)
14741         if (base_reg == regs[i])
14742           write_back = false;
14743
14744       /* Ensure the base is dead if it is updated.  */
14745       if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
14746         return false;
14747     }
14748
14749   if (ldm_case == 5)
14750     {
14751       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14752       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14753       offset = 0;
14754       base_reg_rtx = newbase;
14755     }
14756
14757   for (i = 0; i < nops; i++)
14758     {
14759       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14760       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14761                                               SImode, addr, 0);
14762     }
14763   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14764                                       write_back ? offset + i * 4 : 0));
14765   return true;
14766 }
14767
14768 /* Called from a peephole2 expander to turn a sequence of stores into an
14769    STM instruction.  OPERANDS are the operands found by the peephole matcher;
14770    NOPS indicates how many separate stores we are trying to combine.
14771    Returns true iff we could generate a new instruction.  */
14772
14773 bool
14774 gen_stm_seq (rtx *operands, int nops)
14775 {
14776   int i;
14777   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14778   rtx mems[MAX_LDM_STM_OPS];
14779   int base_reg;
14780   rtx base_reg_rtx;
14781   HOST_WIDE_INT offset;
14782   int write_back = FALSE;
14783   int stm_case;
14784   rtx addr;
14785   bool base_reg_dies;
14786
14787   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14788                                       mem_order, &base_reg, &offset, true);
14789
14790   if (stm_case == 0)
14791     return false;
14792
14793   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14794
14795   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14796   if (TARGET_THUMB1)
14797     {
14798       gcc_assert (base_reg_dies);
14799       write_back = TRUE;
14800     }
14801
14802   if (stm_case == 5)
14803     {
14804       gcc_assert (base_reg_dies);
14805       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14806       offset = 0;
14807     }
14808
14809   addr = plus_constant (Pmode, base_reg_rtx, offset);
14810
14811   for (i = 0; i < nops; i++)
14812     {
14813       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14814       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14815                                               SImode, addr, 0);
14816     }
14817   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14818                                        write_back ? offset + i * 4 : 0));
14819   return true;
14820 }
14821
14822 /* Called from a peephole2 expander to turn a sequence of stores that are
14823    preceded by constant loads into an STM instruction.  OPERANDS are the
14824    operands found by the peephole matcher; NOPS indicates how many
14825    separate stores we are trying to combine; there are 2 * NOPS
14826    instructions in the peephole.
14827    Returns true iff we could generate a new instruction.  */
14828
14829 bool
14830 gen_const_stm_seq (rtx *operands, int nops)
14831 {
14832   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14833   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14834   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14835   rtx mems[MAX_LDM_STM_OPS];
14836   int base_reg;
14837   rtx base_reg_rtx;
14838   HOST_WIDE_INT offset;
14839   int write_back = FALSE;
14840   int stm_case;
14841   rtx addr;
14842   bool base_reg_dies;
14843   int i, j;
14844   HARD_REG_SET allocated;
14845
14846   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14847                                       mem_order, &base_reg, &offset, false);
14848
14849   if (stm_case == 0)
14850     return false;
14851
14852   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14853
14854   /* If the same register is used more than once, try to find a free
14855      register.  */
14856   CLEAR_HARD_REG_SET (allocated);
14857   for (i = 0; i < nops; i++)
14858     {
14859       for (j = i + 1; j < nops; j++)
14860         if (regs[i] == regs[j])
14861           {
14862             rtx t = peep2_find_free_register (0, nops * 2,
14863                                               TARGET_THUMB1 ? "l" : "r",
14864                                               SImode, &allocated);
14865             if (t == NULL_RTX)
14866               return false;
14867             reg_rtxs[i] = t;
14868             regs[i] = REGNO (t);
14869           }
14870     }
14871
14872   /* Compute an ordering that maps the register numbers to an ascending
14873      sequence.  */
14874   reg_order[0] = 0;
14875   for (i = 0; i < nops; i++)
14876     if (regs[i] < regs[reg_order[0]])
14877       reg_order[0] = i;
14878
14879   for (i = 1; i < nops; i++)
14880     {
14881       int this_order = reg_order[i - 1];
14882       for (j = 0; j < nops; j++)
14883         if (regs[j] > regs[reg_order[i - 1]]
14884             && (this_order == reg_order[i - 1]
14885                 || regs[j] < regs[this_order]))
14886           this_order = j;
14887       reg_order[i] = this_order;
14888     }
14889
14890   /* Ensure that registers that must be live after the instruction end
14891      up with the correct value.  */
14892   for (i = 0; i < nops; i++)
14893     {
14894       int this_order = reg_order[i];
14895       if ((this_order != mem_order[i]
14896            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14897           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14898         return false;
14899     }
14900
14901   /* Load the constants.  */
14902   for (i = 0; i < nops; i++)
14903     {
14904       rtx op = operands[2 * nops + mem_order[i]];
14905       sorted_regs[i] = regs[reg_order[i]];
14906       emit_move_insn (reg_rtxs[reg_order[i]], op);
14907     }
14908
14909   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14910
14911   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14912   if (TARGET_THUMB1)
14913     {
14914       gcc_assert (base_reg_dies);
14915       write_back = TRUE;
14916     }
14917
14918   if (stm_case == 5)
14919     {
14920       gcc_assert (base_reg_dies);
14921       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14922       offset = 0;
14923     }
14924
14925   addr = plus_constant (Pmode, base_reg_rtx, offset);
14926
14927   for (i = 0; i < nops; i++)
14928     {
14929       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14930       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14931                                               SImode, addr, 0);
14932     }
14933   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14934                                        write_back ? offset + i * 4 : 0));
14935   return true;
14936 }
14937
14938 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14939    unaligned copies on processors which support unaligned semantics for those
14940    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
14941    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14942    An interleave factor of 1 (the minimum) will perform no interleaving.
14943    Load/store multiple are used for aligned addresses where possible.  */
14944
14945 static void
14946 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14947                                    HOST_WIDE_INT length,
14948                                    unsigned int interleave_factor)
14949 {
14950   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14951   int *regnos = XALLOCAVEC (int, interleave_factor);
14952   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14953   HOST_WIDE_INT i, j;
14954   HOST_WIDE_INT remaining = length, words;
14955   rtx halfword_tmp = NULL, byte_tmp = NULL;
14956   rtx dst, src;
14957   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14958   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14959   HOST_WIDE_INT srcoffset, dstoffset;
14960   HOST_WIDE_INT src_autoinc, dst_autoinc;
14961   rtx mem, addr;
14962
14963   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
14964
14965   /* Use hard registers if we have aligned source or destination so we can use
14966      load/store multiple with contiguous registers.  */
14967   if (dst_aligned || src_aligned)
14968     for (i = 0; i < interleave_factor; i++)
14969       regs[i] = gen_rtx_REG (SImode, i);
14970   else
14971     for (i = 0; i < interleave_factor; i++)
14972       regs[i] = gen_reg_rtx (SImode);
14973
14974   dst = copy_addr_to_reg (XEXP (dstbase, 0));
14975   src = copy_addr_to_reg (XEXP (srcbase, 0));
14976
14977   srcoffset = dstoffset = 0;
14978
14979   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14980      For copying the last bytes we want to subtract this offset again.  */
14981   src_autoinc = dst_autoinc = 0;
14982
14983   for (i = 0; i < interleave_factor; i++)
14984     regnos[i] = i;
14985
14986   /* Copy BLOCK_SIZE_BYTES chunks.  */
14987
14988   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14989     {
14990       /* Load words.  */
14991       if (src_aligned && interleave_factor > 1)
14992         {
14993           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14994                                             TRUE, srcbase, &srcoffset));
14995           src_autoinc += UNITS_PER_WORD * interleave_factor;
14996         }
14997       else
14998         {
14999           for (j = 0; j < interleave_factor; j++)
15000             {
15001               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15002                                                  - src_autoinc));
15003               mem = adjust_automodify_address (srcbase, SImode, addr,
15004                                                srcoffset + j * UNITS_PER_WORD);
15005               emit_insn (gen_unaligned_loadsi (regs[j], mem));
15006             }
15007           srcoffset += block_size_bytes;
15008         }
15009
15010       /* Store words.  */
15011       if (dst_aligned && interleave_factor > 1)
15012         {
15013           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15014                                              TRUE, dstbase, &dstoffset));
15015           dst_autoinc += UNITS_PER_WORD * interleave_factor;
15016         }
15017       else
15018         {
15019           for (j = 0; j < interleave_factor; j++)
15020             {
15021               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15022                                                  - dst_autoinc));
15023               mem = adjust_automodify_address (dstbase, SImode, addr,
15024                                                dstoffset + j * UNITS_PER_WORD);
15025               emit_insn (gen_unaligned_storesi (mem, regs[j]));
15026             }
15027           dstoffset += block_size_bytes;
15028         }
15029
15030       remaining -= block_size_bytes;
15031     }
15032
15033   /* Copy any whole words left (note these aren't interleaved with any
15034      subsequent halfword/byte load/stores in the interests of simplicity).  */
15035
15036   words = remaining / UNITS_PER_WORD;
15037
15038   gcc_assert (words < interleave_factor);
15039
15040   if (src_aligned && words > 1)
15041     {
15042       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15043                                         &srcoffset));
15044       src_autoinc += UNITS_PER_WORD * words;
15045     }
15046   else
15047     {
15048       for (j = 0; j < words; j++)
15049         {
15050           addr = plus_constant (Pmode, src,
15051                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15052           mem = adjust_automodify_address (srcbase, SImode, addr,
15053                                            srcoffset + j * UNITS_PER_WORD);
15054           if (src_aligned)
15055             emit_move_insn (regs[j], mem);
15056           else
15057             emit_insn (gen_unaligned_loadsi (regs[j], mem));
15058         }
15059       srcoffset += words * UNITS_PER_WORD;
15060     }
15061
15062   if (dst_aligned && words > 1)
15063     {
15064       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15065                                          &dstoffset));
15066       dst_autoinc += words * UNITS_PER_WORD;
15067     }
15068   else
15069     {
15070       for (j = 0; j < words; j++)
15071         {
15072           addr = plus_constant (Pmode, dst,
15073                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15074           mem = adjust_automodify_address (dstbase, SImode, addr,
15075                                            dstoffset + j * UNITS_PER_WORD);
15076           if (dst_aligned)
15077             emit_move_insn (mem, regs[j]);
15078           else
15079             emit_insn (gen_unaligned_storesi (mem, regs[j]));
15080         }
15081       dstoffset += words * UNITS_PER_WORD;
15082     }
15083
15084   remaining -= words * UNITS_PER_WORD;
15085
15086   gcc_assert (remaining < 4);
15087
15088   /* Copy a halfword if necessary.  */
15089
15090   if (remaining >= 2)
15091     {
15092       halfword_tmp = gen_reg_rtx (SImode);
15093
15094       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15095       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15096       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15097
15098       /* Either write out immediately, or delay until we've loaded the last
15099          byte, depending on interleave factor.  */
15100       if (interleave_factor == 1)
15101         {
15102           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15103           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15104           emit_insn (gen_unaligned_storehi (mem,
15105                        gen_lowpart (HImode, halfword_tmp)));
15106           halfword_tmp = NULL;
15107           dstoffset += 2;
15108         }
15109
15110       remaining -= 2;
15111       srcoffset += 2;
15112     }
15113
15114   gcc_assert (remaining < 2);
15115
15116   /* Copy last byte.  */
15117
15118   if ((remaining & 1) != 0)
15119     {
15120       byte_tmp = gen_reg_rtx (SImode);
15121
15122       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15123       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15124       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15125
15126       if (interleave_factor == 1)
15127         {
15128           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15129           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15130           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15131           byte_tmp = NULL;
15132           dstoffset++;
15133         }
15134
15135       remaining--;
15136       srcoffset++;
15137     }
15138
15139   /* Store last halfword if we haven't done so already.  */
15140
15141   if (halfword_tmp)
15142     {
15143       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15144       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15145       emit_insn (gen_unaligned_storehi (mem,
15146                    gen_lowpart (HImode, halfword_tmp)));
15147       dstoffset += 2;
15148     }
15149
15150   /* Likewise for last byte.  */
15151
15152   if (byte_tmp)
15153     {
15154       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15155       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15156       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15157       dstoffset++;
15158     }
15159
15160   gcc_assert (remaining == 0 && srcoffset == dstoffset);
15161 }
15162
15163 /* From mips_adjust_block_mem:
15164
15165    Helper function for doing a loop-based block operation on memory
15166    reference MEM.  Each iteration of the loop will operate on LENGTH
15167    bytes of MEM.
15168
15169    Create a new base register for use within the loop and point it to
15170    the start of MEM.  Create a new memory reference that uses this
15171    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
15172
15173 static void
15174 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15175                       rtx *loop_mem)
15176 {
15177   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15178
15179   /* Although the new mem does not refer to a known location,
15180      it does keep up to LENGTH bytes of alignment.  */
15181   *loop_mem = change_address (mem, BLKmode, *loop_reg);
15182   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15183 }
15184
15185 /* From mips_block_move_loop:
15186
15187    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15188    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
15189    the memory regions do not overlap.  */
15190
15191 static void
15192 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15193                                unsigned int interleave_factor,
15194                                HOST_WIDE_INT bytes_per_iter)
15195 {
15196   rtx src_reg, dest_reg, final_src, test;
15197   HOST_WIDE_INT leftover;
15198
15199   leftover = length % bytes_per_iter;
15200   length -= leftover;
15201
15202   /* Create registers and memory references for use within the loop.  */
15203   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15204   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15205
15206   /* Calculate the value that SRC_REG should have after the last iteration of
15207      the loop.  */
15208   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15209                                    0, 0, OPTAB_WIDEN);
15210
15211   /* Emit the start of the loop.  */
15212   rtx_code_label *label = gen_label_rtx ();
15213   emit_label (label);
15214
15215   /* Emit the loop body.  */
15216   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15217                                      interleave_factor);
15218
15219   /* Move on to the next block.  */
15220   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15221   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15222
15223   /* Emit the loop condition.  */
15224   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15225   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15226
15227   /* Mop up any left-over bytes.  */
15228   if (leftover)
15229     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15230 }
15231
15232 /* Emit a block move when either the source or destination is unaligned (not
15233    aligned to a four-byte boundary).  This may need further tuning depending on
15234    core type, optimize_size setting, etc.  */
15235
15236 static int
15237 arm_cpymemqi_unaligned (rtx *operands)
15238 {
15239   HOST_WIDE_INT length = INTVAL (operands[2]);
15240
15241   if (optimize_size)
15242     {
15243       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15244       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15245       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15246          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
15247          or dst_aligned though: allow more interleaving in those cases since the
15248          resulting code can be smaller.  */
15249       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15250       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15251
15252       if (length > 12)
15253         arm_block_move_unaligned_loop (operands[0], operands[1], length,
15254                                        interleave_factor, bytes_per_iter);
15255       else
15256         arm_block_move_unaligned_straight (operands[0], operands[1], length,
15257                                            interleave_factor);
15258     }
15259   else
15260     {
15261       /* Note that the loop created by arm_block_move_unaligned_loop may be
15262          subject to loop unrolling, which makes tuning this condition a little
15263          redundant.  */
15264       if (length > 32)
15265         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15266       else
15267         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15268     }
15269
15270   return 1;
15271 }
15272
15273 int
15274 arm_gen_cpymemqi (rtx *operands)
15275 {
15276   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15277   HOST_WIDE_INT srcoffset, dstoffset;
15278   rtx src, dst, srcbase, dstbase;
15279   rtx part_bytes_reg = NULL;
15280   rtx mem;
15281
15282   if (!CONST_INT_P (operands[2])
15283       || !CONST_INT_P (operands[3])
15284       || INTVAL (operands[2]) > 64)
15285     return 0;
15286
15287   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15288     return arm_cpymemqi_unaligned (operands);
15289
15290   if (INTVAL (operands[3]) & 3)
15291     return 0;
15292
15293   dstbase = operands[0];
15294   srcbase = operands[1];
15295
15296   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15297   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15298
15299   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15300   out_words_to_go = INTVAL (operands[2]) / 4;
15301   last_bytes = INTVAL (operands[2]) & 3;
15302   dstoffset = srcoffset = 0;
15303
15304   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15305     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15306
15307   while (in_words_to_go >= 2)
15308     {
15309       if (in_words_to_go > 4)
15310         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15311                                           TRUE, srcbase, &srcoffset));
15312       else
15313         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15314                                           src, FALSE, srcbase,
15315                                           &srcoffset));
15316
15317       if (out_words_to_go)
15318         {
15319           if (out_words_to_go > 4)
15320             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15321                                                TRUE, dstbase, &dstoffset));
15322           else if (out_words_to_go != 1)
15323             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15324                                                out_words_to_go, dst,
15325                                                (last_bytes == 0
15326                                                 ? FALSE : TRUE),
15327                                                dstbase, &dstoffset));
15328           else
15329             {
15330               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15331               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15332               if (last_bytes != 0)
15333                 {
15334                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15335                   dstoffset += 4;
15336                 }
15337             }
15338         }
15339
15340       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15341       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15342     }
15343
15344   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
15345   if (out_words_to_go)
15346     {
15347       rtx sreg;
15348
15349       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15350       sreg = copy_to_reg (mem);
15351
15352       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15353       emit_move_insn (mem, sreg);
15354       in_words_to_go--;
15355
15356       gcc_assert (!in_words_to_go);     /* Sanity check */
15357     }
15358
15359   if (in_words_to_go)
15360     {
15361       gcc_assert (in_words_to_go > 0);
15362
15363       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15364       part_bytes_reg = copy_to_mode_reg (SImode, mem);
15365     }
15366
15367   gcc_assert (!last_bytes || part_bytes_reg);
15368
15369   if (BYTES_BIG_ENDIAN && last_bytes)
15370     {
15371       rtx tmp = gen_reg_rtx (SImode);
15372
15373       /* The bytes we want are in the top end of the word.  */
15374       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15375                               GEN_INT (8 * (4 - last_bytes))));
15376       part_bytes_reg = tmp;
15377
15378       while (last_bytes)
15379         {
15380           mem = adjust_automodify_address (dstbase, QImode,
15381                                            plus_constant (Pmode, dst,
15382                                                           last_bytes - 1),
15383                                            dstoffset + last_bytes - 1);
15384           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15385
15386           if (--last_bytes)
15387             {
15388               tmp = gen_reg_rtx (SImode);
15389               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15390               part_bytes_reg = tmp;
15391             }
15392         }
15393
15394     }
15395   else
15396     {
15397       if (last_bytes > 1)
15398         {
15399           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15400           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15401           last_bytes -= 2;
15402           if (last_bytes)
15403             {
15404               rtx tmp = gen_reg_rtx (SImode);
15405               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15406               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15407               part_bytes_reg = tmp;
15408               dstoffset += 2;
15409             }
15410         }
15411
15412       if (last_bytes)
15413         {
15414           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15415           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15416         }
15417     }
15418
15419   return 1;
15420 }
15421
15422 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15423 by mode size.  */
15424 inline static rtx
15425 next_consecutive_mem (rtx mem)
15426 {
15427   machine_mode mode = GET_MODE (mem);
15428   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15429   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15430
15431   return adjust_automodify_address (mem, mode, addr, offset);
15432 }
15433
15434 /* Copy using LDRD/STRD instructions whenever possible.
15435    Returns true upon success. */
15436 bool
15437 gen_cpymem_ldrd_strd (rtx *operands)
15438 {
15439   unsigned HOST_WIDE_INT len;
15440   HOST_WIDE_INT align;
15441   rtx src, dst, base;
15442   rtx reg0;
15443   bool src_aligned, dst_aligned;
15444   bool src_volatile, dst_volatile;
15445
15446   gcc_assert (CONST_INT_P (operands[2]));
15447   gcc_assert (CONST_INT_P (operands[3]));
15448
15449   len = UINTVAL (operands[2]);
15450   if (len > 64)
15451     return false;
15452
15453   /* Maximum alignment we can assume for both src and dst buffers.  */
15454   align = INTVAL (operands[3]);
15455
15456   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15457     return false;
15458
15459   /* Place src and dst addresses in registers
15460      and update the corresponding mem rtx.  */
15461   dst = operands[0];
15462   dst_volatile = MEM_VOLATILE_P (dst);
15463   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15464   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15465   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15466
15467   src = operands[1];
15468   src_volatile = MEM_VOLATILE_P (src);
15469   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15470   base = copy_to_mode_reg (SImode, XEXP (src, 0));
15471   src = adjust_automodify_address (src, VOIDmode, base, 0);
15472
15473   if (!unaligned_access && !(src_aligned && dst_aligned))
15474     return false;
15475
15476   if (src_volatile || dst_volatile)
15477     return false;
15478
15479   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
15480   if (!(dst_aligned || src_aligned))
15481     return arm_gen_cpymemqi (operands);
15482
15483   /* If the either src or dst is unaligned we'll be accessing it as pairs
15484      of unaligned SImode accesses.  Otherwise we can generate DImode
15485      ldrd/strd instructions.  */
15486   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15487   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15488
15489   while (len >= 8)
15490     {
15491       len -= 8;
15492       reg0 = gen_reg_rtx (DImode);
15493       rtx low_reg = NULL_RTX;
15494       rtx hi_reg = NULL_RTX;
15495
15496       if (!src_aligned || !dst_aligned)
15497         {
15498           low_reg = gen_lowpart (SImode, reg0);
15499           hi_reg = gen_highpart_mode (SImode, DImode, reg0);
15500         }
15501       if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15502         emit_move_insn (reg0, src);
15503       else if (src_aligned)
15504         emit_insn (gen_unaligned_loaddi (reg0, src));
15505       else
15506         {
15507           emit_insn (gen_unaligned_loadsi (low_reg, src));
15508           src = next_consecutive_mem (src);
15509           emit_insn (gen_unaligned_loadsi (hi_reg, src));
15510         }
15511
15512       if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15513         emit_move_insn (dst, reg0);
15514       else if (dst_aligned)
15515         emit_insn (gen_unaligned_storedi (dst, reg0));
15516       else
15517         {
15518           emit_insn (gen_unaligned_storesi (dst, low_reg));
15519           dst = next_consecutive_mem (dst);
15520           emit_insn (gen_unaligned_storesi (dst, hi_reg));
15521         }
15522
15523       src = next_consecutive_mem (src);
15524       dst = next_consecutive_mem (dst);
15525     }
15526
15527   gcc_assert (len < 8);
15528   if (len >= 4)
15529     {
15530       /* More than a word but less than a double-word to copy.  Copy a word.  */
15531       reg0 = gen_reg_rtx (SImode);
15532       src = adjust_address (src, SImode, 0);
15533       dst = adjust_address (dst, SImode, 0);
15534       if (src_aligned)
15535         emit_move_insn (reg0, src);
15536       else
15537         emit_insn (gen_unaligned_loadsi (reg0, src));
15538
15539       if (dst_aligned)
15540         emit_move_insn (dst, reg0);
15541       else
15542         emit_insn (gen_unaligned_storesi (dst, reg0));
15543
15544       src = next_consecutive_mem (src);
15545       dst = next_consecutive_mem (dst);
15546       len -= 4;
15547     }
15548
15549   if (len == 0)
15550     return true;
15551
15552   /* Copy the remaining bytes.  */
15553   if (len >= 2)
15554     {
15555       dst = adjust_address (dst, HImode, 0);
15556       src = adjust_address (src, HImode, 0);
15557       reg0 = gen_reg_rtx (SImode);
15558       if (src_aligned)
15559         emit_insn (gen_zero_extendhisi2 (reg0, src));
15560       else
15561         emit_insn (gen_unaligned_loadhiu (reg0, src));
15562
15563       if (dst_aligned)
15564         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15565       else
15566         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15567
15568       src = next_consecutive_mem (src);
15569       dst = next_consecutive_mem (dst);
15570       if (len == 2)
15571         return true;
15572     }
15573
15574   dst = adjust_address (dst, QImode, 0);
15575   src = adjust_address (src, QImode, 0);
15576   reg0 = gen_reg_rtx (QImode);
15577   emit_move_insn (reg0, src);
15578   emit_move_insn (dst, reg0);
15579   return true;
15580 }
15581
15582 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15583    into its component 32-bit subregs.  OP2 may be an immediate
15584    constant and we want to simplify it in that case.  */
15585 void
15586 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15587                         rtx *lo_op2, rtx *hi_op2)
15588 {
15589   *lo_op1 = gen_lowpart (SImode, op1);
15590   *hi_op1 = gen_highpart (SImode, op1);
15591   *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15592                                  subreg_lowpart_offset (SImode, DImode));
15593   *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15594                                  subreg_highpart_offset (SImode, DImode));
15595 }
15596
15597 /* Select a dominance comparison mode if possible for a test of the general
15598    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
15599    COND_OR == DOM_CC_X_AND_Y => (X && Y)
15600    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15601    COND_OR == DOM_CC_X_OR_Y => (X || Y)
15602    In all cases OP will be either EQ or NE, but we don't need to know which
15603    here.  If we are unable to support a dominance comparison we return
15604    CC mode.  This will then fail to match for the RTL expressions that
15605    generate this call.  */
15606 machine_mode
15607 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15608 {
15609   enum rtx_code cond1, cond2;
15610   int swapped = 0;
15611
15612   /* Currently we will probably get the wrong result if the individual
15613      comparisons are not simple.  This also ensures that it is safe to
15614      reverse a comparison if necessary.  */
15615   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15616        != CCmode)
15617       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15618           != CCmode))
15619     return CCmode;
15620
15621   /* The if_then_else variant of this tests the second condition if the
15622      first passes, but is true if the first fails.  Reverse the first
15623      condition to get a true "inclusive-or" expression.  */
15624   if (cond_or == DOM_CC_NX_OR_Y)
15625     cond1 = reverse_condition (cond1);
15626
15627   /* If the comparisons are not equal, and one doesn't dominate the other,
15628      then we can't do this.  */
15629   if (cond1 != cond2
15630       && !comparison_dominates_p (cond1, cond2)
15631       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15632     return CCmode;
15633
15634   if (swapped)
15635     std::swap (cond1, cond2);
15636
15637   switch (cond1)
15638     {
15639     case EQ:
15640       if (cond_or == DOM_CC_X_AND_Y)
15641         return CC_DEQmode;
15642
15643       switch (cond2)
15644         {
15645         case EQ: return CC_DEQmode;
15646         case LE: return CC_DLEmode;
15647         case LEU: return CC_DLEUmode;
15648         case GE: return CC_DGEmode;
15649         case GEU: return CC_DGEUmode;
15650         default: gcc_unreachable ();
15651         }
15652
15653     case LT:
15654       if (cond_or == DOM_CC_X_AND_Y)
15655         return CC_DLTmode;
15656
15657       switch (cond2)
15658         {
15659         case  LT:
15660             return CC_DLTmode;
15661         case LE:
15662           return CC_DLEmode;
15663         case NE:
15664           return CC_DNEmode;
15665         default:
15666           gcc_unreachable ();
15667         }
15668
15669     case GT:
15670       if (cond_or == DOM_CC_X_AND_Y)
15671         return CC_DGTmode;
15672
15673       switch (cond2)
15674         {
15675         case GT:
15676           return CC_DGTmode;
15677         case GE:
15678           return CC_DGEmode;
15679         case NE:
15680           return CC_DNEmode;
15681         default:
15682           gcc_unreachable ();
15683         }
15684
15685     case LTU:
15686       if (cond_or == DOM_CC_X_AND_Y)
15687         return CC_DLTUmode;
15688
15689       switch (cond2)
15690         {
15691         case LTU:
15692           return CC_DLTUmode;
15693         case LEU:
15694           return CC_DLEUmode;
15695         case NE:
15696           return CC_DNEmode;
15697         default:
15698           gcc_unreachable ();
15699         }
15700
15701     case GTU:
15702       if (cond_or == DOM_CC_X_AND_Y)
15703         return CC_DGTUmode;
15704
15705       switch (cond2)
15706         {
15707         case GTU:
15708           return CC_DGTUmode;
15709         case GEU:
15710           return CC_DGEUmode;
15711         case NE:
15712           return CC_DNEmode;
15713         default:
15714           gcc_unreachable ();
15715         }
15716
15717     /* The remaining cases only occur when both comparisons are the
15718        same.  */
15719     case NE:
15720       gcc_assert (cond1 == cond2);
15721       return CC_DNEmode;
15722
15723     case LE:
15724       gcc_assert (cond1 == cond2);
15725       return CC_DLEmode;
15726
15727     case GE:
15728       gcc_assert (cond1 == cond2);
15729       return CC_DGEmode;
15730
15731     case LEU:
15732       gcc_assert (cond1 == cond2);
15733       return CC_DLEUmode;
15734
15735     case GEU:
15736       gcc_assert (cond1 == cond2);
15737       return CC_DGEUmode;
15738
15739     default:
15740       gcc_unreachable ();
15741     }
15742 }
15743
15744 machine_mode
15745 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15746 {
15747   /* All floating point compares return CCFP if it is an equality
15748      comparison, and CCFPE otherwise.  */
15749   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15750     {
15751       switch (op)
15752         {
15753         case EQ:
15754         case NE:
15755         case UNORDERED:
15756         case ORDERED:
15757         case UNLT:
15758         case UNLE:
15759         case UNGT:
15760         case UNGE:
15761         case UNEQ:
15762         case LTGT:
15763           return CCFPmode;
15764
15765         case LT:
15766         case LE:
15767         case GT:
15768         case GE:
15769           return CCFPEmode;
15770
15771         default:
15772           gcc_unreachable ();
15773         }
15774     }
15775
15776   /* A compare with a shifted operand.  Because of canonicalization, the
15777      comparison will have to be swapped when we emit the assembler.  */
15778   if (GET_MODE (y) == SImode
15779       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15780       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15781           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15782           || GET_CODE (x) == ROTATERT))
15783     return CC_SWPmode;
15784
15785   /* A widened compare of the sum of a value plus a carry against a
15786      constant.  This is a representation of RSC.  We want to swap the
15787      result of the comparison at output.  Not valid if the Z bit is
15788      needed.  */
15789   if (GET_MODE (x) == DImode
15790       && GET_CODE (x) == PLUS
15791       && arm_borrow_operation (XEXP (x, 1), DImode)
15792       && CONST_INT_P (y)
15793       && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
15794            && (op == LE || op == GT))
15795           || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
15796               && (op == LEU || op == GTU))))
15797     return CC_SWPmode;
15798
15799   /* If X is a constant we want to use CC_RSBmode.  This is
15800      non-canonical, but arm_gen_compare_reg uses this to generate the
15801      correct canonical form.  */
15802   if (GET_MODE (y) == SImode
15803       && (REG_P (y) || GET_CODE (y) == SUBREG)
15804       && CONST_INT_P (x))
15805     return CC_RSBmode;
15806
15807   /* This operation is performed swapped, but since we only rely on the Z
15808      flag we don't need an additional mode.  */
15809   if (GET_MODE (y) == SImode
15810       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15811       && GET_CODE (x) == NEG
15812       && (op == EQ || op == NE))
15813     return CC_Zmode;
15814
15815   /* This is a special case that is used by combine to allow a
15816      comparison of a shifted byte load to be split into a zero-extend
15817      followed by a comparison of the shifted integer (only valid for
15818      equalities and unsigned inequalities).  */
15819   if (GET_MODE (x) == SImode
15820       && GET_CODE (x) == ASHIFT
15821       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15822       && GET_CODE (XEXP (x, 0)) == SUBREG
15823       && MEM_P (SUBREG_REG (XEXP (x, 0)))
15824       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15825       && (op == EQ || op == NE
15826           || op == GEU || op == GTU || op == LTU || op == LEU)
15827       && CONST_INT_P (y))
15828     return CC_Zmode;
15829
15830   /* A construct for a conditional compare, if the false arm contains
15831      0, then both conditions must be true, otherwise either condition
15832      must be true.  Not all conditions are possible, so CCmode is
15833      returned if it can't be done.  */
15834   if (GET_CODE (x) == IF_THEN_ELSE
15835       && (XEXP (x, 2) == const0_rtx
15836           || XEXP (x, 2) == const1_rtx)
15837       && COMPARISON_P (XEXP (x, 0))
15838       && COMPARISON_P (XEXP (x, 1)))
15839     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15840                                          INTVAL (XEXP (x, 2)));
15841
15842   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
15843   if (GET_CODE (x) == AND
15844       && (op == EQ || op == NE)
15845       && COMPARISON_P (XEXP (x, 0))
15846       && COMPARISON_P (XEXP (x, 1)))
15847     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15848                                          DOM_CC_X_AND_Y);
15849
15850   if (GET_CODE (x) == IOR
15851       && (op == EQ || op == NE)
15852       && COMPARISON_P (XEXP (x, 0))
15853       && COMPARISON_P (XEXP (x, 1)))
15854     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15855                                          DOM_CC_X_OR_Y);
15856
15857   /* An operation (on Thumb) where we want to test for a single bit.
15858      This is done by shifting that bit up into the top bit of a
15859      scratch register; we can then branch on the sign bit.  */
15860   if (TARGET_THUMB1
15861       && GET_MODE (x) == SImode
15862       && (op == EQ || op == NE)
15863       && GET_CODE (x) == ZERO_EXTRACT
15864       && XEXP (x, 1) == const1_rtx)
15865     return CC_Nmode;
15866
15867   /* An operation that sets the condition codes as a side-effect, the
15868      V flag is not set correctly, so we can only use comparisons where
15869      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
15870      instead.)  */
15871   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
15872   if (GET_MODE (x) == SImode
15873       && y == const0_rtx
15874       && (op == EQ || op == NE || op == LT || op == GE)
15875       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15876           || GET_CODE (x) == AND || GET_CODE (x) == IOR
15877           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15878           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15879           || GET_CODE (x) == LSHIFTRT
15880           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15881           || GET_CODE (x) == ROTATERT
15882           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15883     return CC_NZmode;
15884
15885   /* A comparison of ~reg with a const is really a special
15886      canoncialization of compare (~const, reg), which is a reverse
15887      subtract operation.  We may not get here if CONST is 0, but that
15888      doesn't matter because ~0 isn't a valid immediate for RSB.  */
15889   if (GET_MODE (x) == SImode
15890       && GET_CODE (x) == NOT
15891       && CONST_INT_P (y))
15892     return CC_RSBmode;
15893
15894   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15895     return CC_Zmode;
15896
15897   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15898       && GET_CODE (x) == PLUS
15899       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15900     return CC_Cmode;
15901
15902   if (GET_MODE (x) == DImode
15903       && GET_CODE (x) == PLUS
15904       && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
15905       && CONST_INT_P (y)
15906       && UINTVAL (y) == 0x800000000
15907       && (op == GEU || op == LTU))
15908     return CC_ADCmode;
15909
15910   if (GET_MODE (x) == DImode
15911       && (op == GE || op == LT)
15912       && GET_CODE (x) == SIGN_EXTEND
15913       && ((GET_CODE (y) == PLUS
15914            && arm_borrow_operation (XEXP (y, 0), DImode))
15915           || arm_borrow_operation (y, DImode)))
15916     return CC_NVmode;
15917
15918   if (GET_MODE (x) == DImode
15919       && (op == GEU || op == LTU)
15920       && GET_CODE (x) == ZERO_EXTEND
15921       && ((GET_CODE (y) == PLUS
15922            && arm_borrow_operation (XEXP (y, 0), DImode))
15923           || arm_borrow_operation (y, DImode)))
15924     return CC_Bmode;
15925
15926   if (GET_MODE (x) == DImode
15927       && (op == EQ || op == NE)
15928       && (GET_CODE (x) == PLUS
15929           || GET_CODE (x) == MINUS)
15930       && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
15931           || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
15932       && GET_CODE (y) == SIGN_EXTEND
15933       && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
15934     return CC_Vmode;
15935
15936   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15937     return GET_MODE (x);
15938
15939   return CCmode;
15940 }
15941
15942 /* X and Y are two (DImode) things to compare for the condition CODE.  Emit
15943    the sequence of instructions needed to generate a suitable condition
15944    code register.  Return the CC register result.  */
15945 static rtx
15946 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
15947 {
15948   machine_mode mode;
15949   rtx cc_reg;
15950
15951     /* We don't currently handle DImode in thumb1, but rely on libgcc.  */
15952   gcc_assert (TARGET_32BIT);
15953   gcc_assert (!CONST_INT_P (x));
15954
15955   rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
15956                                   subreg_lowpart_offset (SImode, DImode));
15957   rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
15958                                   subreg_highpart_offset (SImode, DImode));
15959   rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
15960                                   subreg_lowpart_offset (SImode, DImode));
15961   rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
15962                                   subreg_highpart_offset (SImode, DImode));
15963   switch (code)
15964     {
15965     case EQ:
15966     case NE:
15967       {
15968         if (y_lo == const0_rtx || y_hi == const0_rtx)
15969           {
15970             if (y_lo != const0_rtx)
15971               {
15972                 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
15973
15974                 gcc_assert (y_hi == const0_rtx);
15975                 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
15976                 if (!arm_add_operand (y_lo, SImode))
15977                   y_lo = force_reg (SImode, y_lo);
15978                 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
15979                 x_lo = scratch2;
15980               }
15981             else if (y_hi != const0_rtx)
15982               {
15983                 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
15984
15985                 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
15986                 if (!arm_add_operand (y_hi, SImode))
15987                   y_hi = force_reg (SImode, y_hi);
15988                 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
15989                 x_hi = scratch2;
15990               }
15991
15992             if (!scratch)
15993               {
15994                 gcc_assert (!reload_completed);
15995                 scratch = gen_rtx_SCRATCH (SImode);
15996               }
15997
15998             rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15999             cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16000
16001             rtx set
16002               = gen_rtx_SET (cc_reg,
16003                              gen_rtx_COMPARE (CC_NZmode,
16004                                               gen_rtx_IOR (SImode, x_lo, x_hi),
16005                                               const0_rtx));
16006             emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16007                                                               clobber)));
16008             return cc_reg;
16009           }
16010
16011         if (!arm_add_operand (y_lo, SImode))
16012           y_lo = force_reg (SImode, y_lo);
16013
16014         if (!arm_add_operand (y_hi, SImode))
16015           y_hi = force_reg (SImode, y_hi);
16016
16017         rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16018         rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16019         rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16020         mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16021         cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16022
16023         emit_insn (gen_rtx_SET (cc_reg,
16024                                 gen_rtx_COMPARE (mode, conjunction,
16025                                                  const0_rtx)));
16026         return cc_reg;
16027       }
16028
16029     case LT:
16030     case GE:
16031       {
16032         if (y_lo == const0_rtx)
16033           {
16034             /* If the low word of y is 0, then this is simply a normal
16035                compare of the upper words.  */
16036             if (!arm_add_operand (y_hi, SImode))
16037               y_hi = force_reg (SImode, y_hi);
16038
16039             return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16040           }
16041
16042         if (!arm_add_operand (y_lo, SImode))
16043           y_lo = force_reg (SImode, y_lo);
16044
16045         rtx cmp1
16046           = gen_rtx_LTU (DImode,
16047                          arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16048                          const0_rtx);
16049
16050         if (!scratch)
16051           scratch = gen_rtx_SCRATCH (SImode);
16052
16053         if (!arm_not_operand (y_hi, SImode))
16054           y_hi = force_reg (SImode, y_hi);
16055
16056         rtx_insn *insn;
16057         if (y_hi == const0_rtx)
16058           insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16059                                                            cmp1));
16060         else if (CONST_INT_P (y_hi))
16061           insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16062                                                              y_hi, cmp1));
16063         else
16064           insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16065                                                          cmp1));
16066         return SET_DEST (single_set (insn));
16067       }
16068
16069     case LE:
16070     case GT:
16071       {
16072         /* During expansion, we only expect to get here if y is a
16073            constant that we want to handle, otherwise we should have
16074            swapped the operands already.  */
16075         gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16076
16077         if (!const_ok_for_arm (INTVAL (y_lo)))
16078           y_lo = force_reg (SImode, y_lo);
16079
16080         /* Perform a reverse subtract and compare.  */
16081         rtx cmp1
16082           = gen_rtx_LTU (DImode,
16083                          arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16084                          const0_rtx);
16085         rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16086                                                                  x_hi, cmp1));
16087         return SET_DEST (single_set (insn));
16088       }
16089
16090     case LTU:
16091     case GEU:
16092       {
16093         if (y_lo == const0_rtx)
16094           {
16095             /* If the low word of y is 0, then this is simply a normal
16096                compare of the upper words.  */
16097             if (!arm_add_operand (y_hi, SImode))
16098               y_hi = force_reg (SImode, y_hi);
16099
16100             return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16101           }
16102
16103         if (!arm_add_operand (y_lo, SImode))
16104           y_lo = force_reg (SImode, y_lo);
16105
16106         rtx cmp1
16107           = gen_rtx_LTU (DImode,
16108                          arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16109                          const0_rtx);
16110
16111         if (!scratch)
16112           scratch = gen_rtx_SCRATCH (SImode);
16113         if (!arm_not_operand (y_hi, SImode))
16114           y_hi = force_reg (SImode, y_hi);
16115
16116         rtx_insn *insn;
16117         if (y_hi == const0_rtx)
16118           insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16119                                                           cmp1));
16120         else if (CONST_INT_P (y_hi))
16121           {
16122             /* Constant is viewed as unsigned when zero-extended.  */
16123             y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16124             insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16125                                                               y_hi, cmp1));
16126           }
16127         else
16128           insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16129                                                         cmp1));
16130         return SET_DEST (single_set (insn));
16131       }
16132
16133     case LEU:
16134     case GTU:
16135       {
16136         /* During expansion, we only expect to get here if y is a
16137            constant that we want to handle, otherwise we should have
16138            swapped the operands already.  */
16139         gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16140
16141         if (!const_ok_for_arm (INTVAL (y_lo)))
16142           y_lo = force_reg (SImode, y_lo);
16143
16144         /* Perform a reverse subtract and compare.  */
16145         rtx cmp1
16146           = gen_rtx_LTU (DImode,
16147                          arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16148                          const0_rtx);
16149         y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16150         rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16151                                                                 x_hi, cmp1));
16152         return SET_DEST (single_set (insn));
16153       }
16154
16155     default:
16156       gcc_unreachable ();
16157     }
16158 }
16159
16160 /* X and Y are two things to compare using CODE.  Emit the compare insn and
16161    return the rtx for register 0 in the proper mode.  */
16162 rtx
16163 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16164 {
16165   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16166     return arm_gen_dicompare_reg (code, x, y, scratch);
16167
16168   machine_mode mode = SELECT_CC_MODE (code, x, y);
16169   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16170   if (mode == CC_RSBmode)
16171     {
16172       if (!scratch)
16173         scratch = gen_rtx_SCRATCH (SImode);
16174       emit_insn (gen_rsb_imm_compare_scratch (scratch,
16175                                               GEN_INT (~UINTVAL (x)), y));
16176     }
16177   else
16178     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16179
16180   return cc_reg;
16181 }
16182
16183 /* Generate a sequence of insns that will generate the correct return
16184    address mask depending on the physical architecture that the program
16185    is running on.  */
16186 rtx
16187 arm_gen_return_addr_mask (void)
16188 {
16189   rtx reg = gen_reg_rtx (Pmode);
16190
16191   emit_insn (gen_return_addr_mask (reg));
16192   return reg;
16193 }
16194
16195 void
16196 arm_reload_in_hi (rtx *operands)
16197 {
16198   rtx ref = operands[1];
16199   rtx base, scratch;
16200   HOST_WIDE_INT offset = 0;
16201
16202   if (GET_CODE (ref) == SUBREG)
16203     {
16204       offset = SUBREG_BYTE (ref);
16205       ref = SUBREG_REG (ref);
16206     }
16207
16208   if (REG_P (ref))
16209     {
16210       /* We have a pseudo which has been spilt onto the stack; there
16211          are two cases here: the first where there is a simple
16212          stack-slot replacement and a second where the stack-slot is
16213          out of range, or is used as a subreg.  */
16214       if (reg_equiv_mem (REGNO (ref)))
16215         {
16216           ref = reg_equiv_mem (REGNO (ref));
16217           base = find_replacement (&XEXP (ref, 0));
16218         }
16219       else
16220         /* The slot is out of range, or was dressed up in a SUBREG.  */
16221         base = reg_equiv_address (REGNO (ref));
16222
16223       /* PR 62554: If there is no equivalent memory location then just move
16224          the value as an SImode register move.  This happens when the target
16225          architecture variant does not have an HImode register move.  */
16226       if (base == NULL)
16227         {
16228           gcc_assert (REG_P (operands[0]));
16229           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16230                                 gen_rtx_SUBREG (SImode, ref, 0)));
16231           return;
16232         }
16233     }
16234   else
16235     base = find_replacement (&XEXP (ref, 0));
16236
16237   /* Handle the case where the address is too complex to be offset by 1.  */
16238   if (GET_CODE (base) == MINUS
16239       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16240     {
16241       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16242
16243       emit_set_insn (base_plus, base);
16244       base = base_plus;
16245     }
16246   else if (GET_CODE (base) == PLUS)
16247     {
16248       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16249       HOST_WIDE_INT hi, lo;
16250
16251       offset += INTVAL (XEXP (base, 1));
16252       base = XEXP (base, 0);
16253
16254       /* Rework the address into a legal sequence of insns.  */
16255       /* Valid range for lo is -4095 -> 4095 */
16256       lo = (offset >= 0
16257             ? (offset & 0xfff)
16258             : -((-offset) & 0xfff));
16259
16260       /* Corner case, if lo is the max offset then we would be out of range
16261          once we have added the additional 1 below, so bump the msb into the
16262          pre-loading insn(s).  */
16263       if (lo == 4095)
16264         lo &= 0x7ff;
16265
16266       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16267              ^ (HOST_WIDE_INT) 0x80000000)
16268             - (HOST_WIDE_INT) 0x80000000);
16269
16270       gcc_assert (hi + lo == offset);
16271
16272       if (hi != 0)
16273         {
16274           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16275
16276           /* Get the base address; addsi3 knows how to handle constants
16277              that require more than one insn.  */
16278           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16279           base = base_plus;
16280           offset = lo;
16281         }
16282     }
16283
16284   /* Operands[2] may overlap operands[0] (though it won't overlap
16285      operands[1]), that's why we asked for a DImode reg -- so we can
16286      use the bit that does not overlap.  */
16287   if (REGNO (operands[2]) == REGNO (operands[0]))
16288     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16289   else
16290     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16291
16292   emit_insn (gen_zero_extendqisi2 (scratch,
16293                                    gen_rtx_MEM (QImode,
16294                                                 plus_constant (Pmode, base,
16295                                                                offset))));
16296   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16297                                    gen_rtx_MEM (QImode,
16298                                                 plus_constant (Pmode, base,
16299                                                                offset + 1))));
16300   if (!BYTES_BIG_ENDIAN)
16301     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16302                    gen_rtx_IOR (SImode,
16303                                 gen_rtx_ASHIFT
16304                                 (SImode,
16305                                  gen_rtx_SUBREG (SImode, operands[0], 0),
16306                                  GEN_INT (8)),
16307                                 scratch));
16308   else
16309     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16310                    gen_rtx_IOR (SImode,
16311                                 gen_rtx_ASHIFT (SImode, scratch,
16312                                                 GEN_INT (8)),
16313                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
16314 }
16315
16316 /* Handle storing a half-word to memory during reload by synthesizing as two
16317    byte stores.  Take care not to clobber the input values until after we
16318    have moved them somewhere safe.  This code assumes that if the DImode
16319    scratch in operands[2] overlaps either the input value or output address
16320    in some way, then that value must die in this insn (we absolutely need
16321    two scratch registers for some corner cases).  */
16322 void
16323 arm_reload_out_hi (rtx *operands)
16324 {
16325   rtx ref = operands[0];
16326   rtx outval = operands[1];
16327   rtx base, scratch;
16328   HOST_WIDE_INT offset = 0;
16329
16330   if (GET_CODE (ref) == SUBREG)
16331     {
16332       offset = SUBREG_BYTE (ref);
16333       ref = SUBREG_REG (ref);
16334     }
16335
16336   if (REG_P (ref))
16337     {
16338       /* We have a pseudo which has been spilt onto the stack; there
16339          are two cases here: the first where there is a simple
16340          stack-slot replacement and a second where the stack-slot is
16341          out of range, or is used as a subreg.  */
16342       if (reg_equiv_mem (REGNO (ref)))
16343         {
16344           ref = reg_equiv_mem (REGNO (ref));
16345           base = find_replacement (&XEXP (ref, 0));
16346         }
16347       else
16348         /* The slot is out of range, or was dressed up in a SUBREG.  */
16349         base = reg_equiv_address (REGNO (ref));
16350
16351       /* PR 62254: If there is no equivalent memory location then just move
16352          the value as an SImode register move.  This happens when the target
16353          architecture variant does not have an HImode register move.  */
16354       if (base == NULL)
16355         {
16356           gcc_assert (REG_P (outval) || SUBREG_P (outval));
16357
16358           if (REG_P (outval))
16359             {
16360               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16361                                     gen_rtx_SUBREG (SImode, outval, 0)));
16362             }
16363           else /* SUBREG_P (outval)  */
16364             {
16365               if (GET_MODE (SUBREG_REG (outval)) == SImode)
16366                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16367                                       SUBREG_REG (outval)));
16368               else
16369                 /* FIXME: Handle other cases ?  */
16370                 gcc_unreachable ();
16371             }
16372           return;
16373         }
16374     }
16375   else
16376     base = find_replacement (&XEXP (ref, 0));
16377
16378   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16379
16380   /* Handle the case where the address is too complex to be offset by 1.  */
16381   if (GET_CODE (base) == MINUS
16382       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16383     {
16384       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16385
16386       /* Be careful not to destroy OUTVAL.  */
16387       if (reg_overlap_mentioned_p (base_plus, outval))
16388         {
16389           /* Updating base_plus might destroy outval, see if we can
16390              swap the scratch and base_plus.  */
16391           if (!reg_overlap_mentioned_p (scratch, outval))
16392             std::swap (scratch, base_plus);
16393           else
16394             {
16395               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16396
16397               /* Be conservative and copy OUTVAL into the scratch now,
16398                  this should only be necessary if outval is a subreg
16399                  of something larger than a word.  */
16400               /* XXX Might this clobber base?  I can't see how it can,
16401                  since scratch is known to overlap with OUTVAL, and
16402                  must be wider than a word.  */
16403               emit_insn (gen_movhi (scratch_hi, outval));
16404               outval = scratch_hi;
16405             }
16406         }
16407
16408       emit_set_insn (base_plus, base);
16409       base = base_plus;
16410     }
16411   else if (GET_CODE (base) == PLUS)
16412     {
16413       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16414       HOST_WIDE_INT hi, lo;
16415
16416       offset += INTVAL (XEXP (base, 1));
16417       base = XEXP (base, 0);
16418
16419       /* Rework the address into a legal sequence of insns.  */
16420       /* Valid range for lo is -4095 -> 4095 */
16421       lo = (offset >= 0
16422             ? (offset & 0xfff)
16423             : -((-offset) & 0xfff));
16424
16425       /* Corner case, if lo is the max offset then we would be out of range
16426          once we have added the additional 1 below, so bump the msb into the
16427          pre-loading insn(s).  */
16428       if (lo == 4095)
16429         lo &= 0x7ff;
16430
16431       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16432              ^ (HOST_WIDE_INT) 0x80000000)
16433             - (HOST_WIDE_INT) 0x80000000);
16434
16435       gcc_assert (hi + lo == offset);
16436
16437       if (hi != 0)
16438         {
16439           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16440
16441           /* Be careful not to destroy OUTVAL.  */
16442           if (reg_overlap_mentioned_p (base_plus, outval))
16443             {
16444               /* Updating base_plus might destroy outval, see if we
16445                  can swap the scratch and base_plus.  */
16446               if (!reg_overlap_mentioned_p (scratch, outval))
16447                 std::swap (scratch, base_plus);
16448               else
16449                 {
16450                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16451
16452                   /* Be conservative and copy outval into scratch now,
16453                      this should only be necessary if outval is a
16454                      subreg of something larger than a word.  */
16455                   /* XXX Might this clobber base?  I can't see how it
16456                      can, since scratch is known to overlap with
16457                      outval.  */
16458                   emit_insn (gen_movhi (scratch_hi, outval));
16459                   outval = scratch_hi;
16460                 }
16461             }
16462
16463           /* Get the base address; addsi3 knows how to handle constants
16464              that require more than one insn.  */
16465           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16466           base = base_plus;
16467           offset = lo;
16468         }
16469     }
16470
16471   if (BYTES_BIG_ENDIAN)
16472     {
16473       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16474                                          plus_constant (Pmode, base,
16475                                                         offset + 1)),
16476                             gen_lowpart (QImode, outval)));
16477       emit_insn (gen_lshrsi3 (scratch,
16478                               gen_rtx_SUBREG (SImode, outval, 0),
16479                               GEN_INT (8)));
16480       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16481                                                                 offset)),
16482                             gen_lowpart (QImode, scratch)));
16483     }
16484   else
16485     {
16486       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16487                                                                 offset)),
16488                             gen_lowpart (QImode, outval)));
16489       emit_insn (gen_lshrsi3 (scratch,
16490                               gen_rtx_SUBREG (SImode, outval, 0),
16491                               GEN_INT (8)));
16492       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16493                                          plus_constant (Pmode, base,
16494                                                         offset + 1)),
16495                             gen_lowpart (QImode, scratch)));
16496     }
16497 }
16498
16499 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16500    (padded to the size of a word) should be passed in a register.  */
16501
16502 static bool
16503 arm_must_pass_in_stack (const function_arg_info &arg)
16504 {
16505   if (TARGET_AAPCS_BASED)
16506     return must_pass_in_stack_var_size (arg);
16507   else
16508     return must_pass_in_stack_var_size_or_pad (arg);
16509 }
16510
16511
16512 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16513    byte of a stack argument has useful data.  For legacy APCS ABIs we use
16514    the default.  For AAPCS based ABIs small aggregate types are placed
16515    in the lowest memory address.  */
16516
16517 static pad_direction
16518 arm_function_arg_padding (machine_mode mode, const_tree type)
16519 {
16520   if (!TARGET_AAPCS_BASED)
16521     return default_function_arg_padding (mode, type);
16522
16523   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16524     return PAD_DOWNWARD;
16525
16526   return PAD_UPWARD;
16527 }
16528
16529
16530 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16531    Return !BYTES_BIG_ENDIAN if the least significant byte of the
16532    register has useful data, and return the opposite if the most
16533    significant byte does.  */
16534
16535 bool
16536 arm_pad_reg_upward (machine_mode mode,
16537                     tree type, int first ATTRIBUTE_UNUSED)
16538 {
16539   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16540     {
16541       /* For AAPCS, small aggregates, small fixed-point types,
16542          and small complex types are always padded upwards.  */
16543       if (type)
16544         {
16545           if ((AGGREGATE_TYPE_P (type)
16546                || TREE_CODE (type) == COMPLEX_TYPE
16547                || FIXED_POINT_TYPE_P (type))
16548               && int_size_in_bytes (type) <= 4)
16549             return true;
16550         }
16551       else
16552         {
16553           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16554               && GET_MODE_SIZE (mode) <= 4)
16555             return true;
16556         }
16557     }
16558
16559   /* Otherwise, use default padding.  */
16560   return !BYTES_BIG_ENDIAN;
16561 }
16562
16563 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16564    assuming that the address in the base register is word aligned.  */
16565 bool
16566 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16567 {
16568   HOST_WIDE_INT max_offset;
16569
16570   /* Offset must be a multiple of 4 in Thumb mode.  */
16571   if (TARGET_THUMB2 && ((offset & 3) != 0))
16572     return false;
16573
16574   if (TARGET_THUMB2)
16575     max_offset = 1020;
16576   else if (TARGET_ARM)
16577     max_offset = 255;
16578   else
16579     return false;
16580
16581   return ((offset <= max_offset) && (offset >= -max_offset));
16582 }
16583
16584 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16585    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
16586    Assumes that the address in the base register RN is word aligned.  Pattern
16587    guarantees that both memory accesses use the same base register,
16588    the offsets are constants within the range, and the gap between the offsets is 4.
16589    If preload complete then check that registers are legal.  WBACK indicates whether
16590    address is updated.  LOAD indicates whether memory access is load or store.  */
16591 bool
16592 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16593                        bool wback, bool load)
16594 {
16595   unsigned int t, t2, n;
16596
16597   if (!reload_completed)
16598     return true;
16599
16600   if (!offset_ok_for_ldrd_strd (offset))
16601     return false;
16602
16603   t = REGNO (rt);
16604   t2 = REGNO (rt2);
16605   n = REGNO (rn);
16606
16607   if ((TARGET_THUMB2)
16608       && ((wback && (n == t || n == t2))
16609           || (t == SP_REGNUM)
16610           || (t == PC_REGNUM)
16611           || (t2 == SP_REGNUM)
16612           || (t2 == PC_REGNUM)
16613           || (!load && (n == PC_REGNUM))
16614           || (load && (t == t2))
16615           /* Triggers Cortex-M3 LDRD errata.  */
16616           || (!wback && load && fix_cm3_ldrd && (n == t))))
16617     return false;
16618
16619   if ((TARGET_ARM)
16620       && ((wback && (n == t || n == t2))
16621           || (t2 == PC_REGNUM)
16622           || (t % 2 != 0)   /* First destination register is not even.  */
16623           || (t2 != t + 1)
16624           /* PC can be used as base register (for offset addressing only),
16625              but it is depricated.  */
16626           || (n == PC_REGNUM)))
16627     return false;
16628
16629   return true;
16630 }
16631
16632 /* Return true if a 64-bit access with alignment ALIGN and with a
16633    constant offset OFFSET from the base pointer is permitted on this
16634    architecture.  */
16635 static bool
16636 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
16637 {
16638   return (unaligned_access
16639           ? (align >= BITS_PER_WORD && (offset & 3) == 0)
16640           : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
16641 }
16642
16643 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
16644    operand MEM's address contains an immediate offset from the base
16645    register and has no side effects, in which case it sets BASE,
16646    OFFSET and ALIGN accordingly.  */
16647 static bool
16648 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
16649 {
16650   rtx addr;
16651
16652   gcc_assert (base != NULL && offset != NULL);
16653
16654   /* TODO: Handle more general memory operand patterns, such as
16655      PRE_DEC and PRE_INC.  */
16656
16657   if (side_effects_p (mem))
16658     return false;
16659
16660   /* Can't deal with subregs.  */
16661   if (GET_CODE (mem) == SUBREG)
16662     return false;
16663
16664   gcc_assert (MEM_P (mem));
16665
16666   *offset = const0_rtx;
16667   *align = MEM_ALIGN (mem);
16668
16669   addr = XEXP (mem, 0);
16670
16671   /* If addr isn't valid for DImode, then we can't handle it.  */
16672   if (!arm_legitimate_address_p (DImode, addr,
16673                                  reload_in_progress || reload_completed))
16674     return false;
16675
16676   if (REG_P (addr))
16677     {
16678       *base = addr;
16679       return true;
16680     }
16681   else if (GET_CODE (addr) == PLUS)
16682     {
16683       *base = XEXP (addr, 0);
16684       *offset = XEXP (addr, 1);
16685       return (REG_P (*base) && CONST_INT_P (*offset));
16686     }
16687
16688   return false;
16689 }
16690
16691 /* Called from a peephole2 to replace two word-size accesses with a
16692    single LDRD/STRD instruction.  Returns true iff we can generate a
16693    new instruction sequence.  That is, both accesses use the same base
16694    register and the gap between constant offsets is 4.  This function
16695    may reorder its operands to match ldrd/strd RTL templates.
16696    OPERANDS are the operands found by the peephole matcher;
16697    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16698    corresponding memory operands.  LOAD indicaates whether the access
16699    is load or store.  CONST_STORE indicates a store of constant
16700    integer values held in OPERANDS[4,5] and assumes that the pattern
16701    is of length 4 insn, for the purpose of checking dead registers.
16702    COMMUTE indicates that register operands may be reordered.  */
16703 bool
16704 gen_operands_ldrd_strd (rtx *operands, bool load,
16705                         bool const_store, bool commute)
16706 {
16707   int nops = 2;
16708   HOST_WIDE_INT offsets[2], offset, align[2];
16709   rtx base = NULL_RTX;
16710   rtx cur_base, cur_offset, tmp;
16711   int i, gap;
16712   HARD_REG_SET regset;
16713
16714   gcc_assert (!const_store || !load);
16715   /* Check that the memory references are immediate offsets from the
16716      same base register.  Extract the base register, the destination
16717      registers, and the corresponding memory offsets.  */
16718   for (i = 0; i < nops; i++)
16719     {
16720       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16721                                  &align[i]))
16722         return false;
16723
16724       if (i == 0)
16725         base = cur_base;
16726       else if (REGNO (base) != REGNO (cur_base))
16727         return false;
16728
16729       offsets[i] = INTVAL (cur_offset);
16730       if (GET_CODE (operands[i]) == SUBREG)
16731         {
16732           tmp = SUBREG_REG (operands[i]);
16733           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
16734           operands[i] = tmp;
16735         }
16736     }
16737
16738   /* Make sure there is no dependency between the individual loads.  */
16739   if (load && REGNO (operands[0]) == REGNO (base))
16740     return false; /* RAW */
16741
16742   if (load && REGNO (operands[0]) == REGNO (operands[1]))
16743     return false; /* WAW */
16744
16745   /* If the same input register is used in both stores
16746      when storing different constants, try to find a free register.
16747      For example, the code
16748         mov r0, 0
16749         str r0, [r2]
16750         mov r0, 1
16751         str r0, [r2, #4]
16752      can be transformed into
16753         mov r1, 0
16754         mov r0, 1
16755         strd r1, r0, [r2]
16756      in Thumb mode assuming that r1 is free.
16757      For ARM mode do the same but only if the starting register
16758      can be made to be even.  */
16759   if (const_store
16760       && REGNO (operands[0]) == REGNO (operands[1])
16761       && INTVAL (operands[4]) != INTVAL (operands[5]))
16762     {
16763     if (TARGET_THUMB2)
16764       {
16765         CLEAR_HARD_REG_SET (regset);
16766         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16767         if (tmp == NULL_RTX)
16768           return false;
16769
16770         /* Use the new register in the first load to ensure that
16771            if the original input register is not dead after peephole,
16772            then it will have the correct constant value.  */
16773         operands[0] = tmp;
16774       }
16775     else if (TARGET_ARM)
16776       {
16777         int regno = REGNO (operands[0]);
16778         if (!peep2_reg_dead_p (4, operands[0]))
16779           {
16780             /* When the input register is even and is not dead after the
16781                pattern, it has to hold the second constant but we cannot
16782                form a legal STRD in ARM mode with this register as the second
16783                register.  */
16784             if (regno % 2 == 0)
16785               return false;
16786
16787             /* Is regno-1 free? */
16788             SET_HARD_REG_SET (regset);
16789             CLEAR_HARD_REG_BIT(regset, regno - 1);
16790             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16791             if (tmp == NULL_RTX)
16792               return false;
16793
16794             operands[0] = tmp;
16795           }
16796         else
16797           {
16798             /* Find a DImode register.  */
16799             CLEAR_HARD_REG_SET (regset);
16800             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16801             if (tmp != NULL_RTX)
16802               {
16803                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16804                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16805               }
16806             else
16807               {
16808                 /* Can we use the input register to form a DI register?  */
16809                 SET_HARD_REG_SET (regset);
16810                 CLEAR_HARD_REG_BIT(regset,
16811                                    regno % 2 == 0 ? regno + 1 : regno - 1);
16812                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16813                 if (tmp == NULL_RTX)
16814                   return false;
16815                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
16816               }
16817           }
16818
16819         gcc_assert (operands[0] != NULL_RTX);
16820         gcc_assert (operands[1] != NULL_RTX);
16821         gcc_assert (REGNO (operands[0]) % 2 == 0);
16822         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
16823       }
16824     }
16825
16826   /* Make sure the instructions are ordered with lower memory access first.  */
16827   if (offsets[0] > offsets[1])
16828     {
16829       gap = offsets[0] - offsets[1];
16830       offset = offsets[1];
16831
16832       /* Swap the instructions such that lower memory is accessed first.  */
16833       std::swap (operands[0], operands[1]);
16834       std::swap (operands[2], operands[3]);
16835       std::swap (align[0], align[1]);
16836       if (const_store)
16837         std::swap (operands[4], operands[5]);
16838     }
16839   else
16840     {
16841       gap = offsets[1] - offsets[0];
16842       offset = offsets[0];
16843     }
16844
16845   /* Make sure accesses are to consecutive memory locations.  */
16846   if (gap != GET_MODE_SIZE (SImode))
16847     return false;
16848
16849   if (!align_ok_ldrd_strd (align[0], offset))
16850     return false;
16851
16852   /* Make sure we generate legal instructions.  */
16853   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16854                              false, load))
16855     return true;
16856
16857   /* In Thumb state, where registers are almost unconstrained, there
16858      is little hope to fix it.  */
16859   if (TARGET_THUMB2)
16860     return false;
16861
16862   if (load && commute)
16863     {
16864       /* Try reordering registers.  */
16865       std::swap (operands[0], operands[1]);
16866       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16867                                  false, load))
16868         return true;
16869     }
16870
16871   if (const_store)
16872     {
16873       /* If input registers are dead after this pattern, they can be
16874          reordered or replaced by other registers that are free in the
16875          current pattern.  */
16876       if (!peep2_reg_dead_p (4, operands[0])
16877           || !peep2_reg_dead_p (4, operands[1]))
16878         return false;
16879
16880       /* Try to reorder the input registers.  */
16881       /* For example, the code
16882            mov r0, 0
16883            mov r1, 1
16884            str r1, [r2]
16885            str r0, [r2, #4]
16886          can be transformed into
16887            mov r1, 0
16888            mov r0, 1
16889            strd r0, [r2]
16890       */
16891       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
16892                                   false, false))
16893         {
16894           std::swap (operands[0], operands[1]);
16895           return true;
16896         }
16897
16898       /* Try to find a free DI register.  */
16899       CLEAR_HARD_REG_SET (regset);
16900       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16901       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16902       while (true)
16903         {
16904           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16905           if (tmp == NULL_RTX)
16906             return false;
16907
16908           /* DREG must be an even-numbered register in DImode.
16909              Split it into SI registers.  */
16910           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16911           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16912           gcc_assert (operands[0] != NULL_RTX);
16913           gcc_assert (operands[1] != NULL_RTX);
16914           gcc_assert (REGNO (operands[0]) % 2 == 0);
16915           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16916
16917           return (operands_ok_ldrd_strd (operands[0], operands[1],
16918                                          base, offset,
16919                                          false, load));
16920         }
16921     }
16922
16923   return false;
16924 }
16925
16926
16927 /* Return true if parallel execution of the two word-size accesses provided
16928    could be satisfied with a single LDRD/STRD instruction.  Two word-size
16929    accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
16930    register operands and OPERANDS[2,3] are the corresponding memory operands.
16931    */
16932 bool
16933 valid_operands_ldrd_strd (rtx *operands, bool load)
16934 {
16935   int nops = 2;
16936   HOST_WIDE_INT offsets[2], offset, align[2];
16937   rtx base = NULL_RTX;
16938   rtx cur_base, cur_offset;
16939   int i, gap;
16940
16941   /* Check that the memory references are immediate offsets from the
16942      same base register.  Extract the base register, the destination
16943      registers, and the corresponding memory offsets.  */
16944   for (i = 0; i < nops; i++)
16945     {
16946       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16947                                  &align[i]))
16948         return false;
16949
16950       if (i == 0)
16951         base = cur_base;
16952       else if (REGNO (base) != REGNO (cur_base))
16953         return false;
16954
16955       offsets[i] = INTVAL (cur_offset);
16956       if (GET_CODE (operands[i]) == SUBREG)
16957         return false;
16958     }
16959
16960   if (offsets[0] > offsets[1])
16961     return false;
16962
16963   gap = offsets[1] - offsets[0];
16964   offset = offsets[0];
16965
16966   /* Make sure accesses are to consecutive memory locations.  */
16967   if (gap != GET_MODE_SIZE (SImode))
16968     return false;
16969
16970   if (!align_ok_ldrd_strd (align[0], offset))
16971     return false;
16972
16973   return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16974                                 false, load);
16975 }
16976
16977 \f
16978 /* Print a symbolic form of X to the debug file, F.  */
16979 static void
16980 arm_print_value (FILE *f, rtx x)
16981 {
16982   switch (GET_CODE (x))
16983     {
16984     case CONST_INT:
16985       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16986       return;
16987
16988     case CONST_DOUBLE:
16989       {
16990         char fpstr[20];
16991         real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16992                          sizeof (fpstr), 0, 1);
16993         fputs (fpstr, f);
16994       }
16995       return;
16996
16997     case CONST_VECTOR:
16998       {
16999         int i;
17000
17001         fprintf (f, "<");
17002         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17003           {
17004             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17005             if (i < (CONST_VECTOR_NUNITS (x) - 1))
17006               fputc (',', f);
17007           }
17008         fprintf (f, ">");
17009       }
17010       return;
17011
17012     case CONST_STRING:
17013       fprintf (f, "\"%s\"", XSTR (x, 0));
17014       return;
17015
17016     case SYMBOL_REF:
17017       fprintf (f, "`%s'", XSTR (x, 0));
17018       return;
17019
17020     case LABEL_REF:
17021       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17022       return;
17023
17024     case CONST:
17025       arm_print_value (f, XEXP (x, 0));
17026       return;
17027
17028     case PLUS:
17029       arm_print_value (f, XEXP (x, 0));
17030       fprintf (f, "+");
17031       arm_print_value (f, XEXP (x, 1));
17032       return;
17033
17034     case PC:
17035       fprintf (f, "pc");
17036       return;
17037
17038     default:
17039       fprintf (f, "????");
17040       return;
17041     }
17042 }
17043 \f
17044 /* Routines for manipulation of the constant pool.  */
17045
17046 /* Arm instructions cannot load a large constant directly into a
17047    register; they have to come from a pc relative load.  The constant
17048    must therefore be placed in the addressable range of the pc
17049    relative load.  Depending on the precise pc relative load
17050    instruction the range is somewhere between 256 bytes and 4k.  This
17051    means that we often have to dump a constant inside a function, and
17052    generate code to branch around it.
17053
17054    It is important to minimize this, since the branches will slow
17055    things down and make the code larger.
17056
17057    Normally we can hide the table after an existing unconditional
17058    branch so that there is no interruption of the flow, but in the
17059    worst case the code looks like this:
17060
17061         ldr     rn, L1
17062         ...
17063         b       L2
17064         align
17065         L1:     .long value
17066         L2:
17067         ...
17068
17069         ldr     rn, L3
17070         ...
17071         b       L4
17072         align
17073         L3:     .long value
17074         L4:
17075         ...
17076
17077    We fix this by performing a scan after scheduling, which notices
17078    which instructions need to have their operands fetched from the
17079    constant table and builds the table.
17080
17081    The algorithm starts by building a table of all the constants that
17082    need fixing up and all the natural barriers in the function (places
17083    where a constant table can be dropped without breaking the flow).
17084    For each fixup we note how far the pc-relative replacement will be
17085    able to reach and the offset of the instruction into the function.
17086
17087    Having built the table we then group the fixes together to form
17088    tables that are as large as possible (subject to addressing
17089    constraints) and emit each table of constants after the last
17090    barrier that is within range of all the instructions in the group.
17091    If a group does not contain a barrier, then we forcibly create one
17092    by inserting a jump instruction into the flow.  Once the table has
17093    been inserted, the insns are then modified to reference the
17094    relevant entry in the pool.
17095
17096    Possible enhancements to the algorithm (not implemented) are:
17097
17098    1) For some processors and object formats, there may be benefit in
17099    aligning the pools to the start of cache lines; this alignment
17100    would need to be taken into account when calculating addressability
17101    of a pool.  */
17102
17103 /* These typedefs are located at the start of this file, so that
17104    they can be used in the prototypes there.  This comment is to
17105    remind readers of that fact so that the following structures
17106    can be understood more easily.
17107
17108      typedef struct minipool_node    Mnode;
17109      typedef struct minipool_fixup   Mfix;  */
17110
17111 struct minipool_node
17112 {
17113   /* Doubly linked chain of entries.  */
17114   Mnode * next;
17115   Mnode * prev;
17116   /* The maximum offset into the code that this entry can be placed.  While
17117      pushing fixes for forward references, all entries are sorted in order
17118      of increasing max_address.  */
17119   HOST_WIDE_INT max_address;
17120   /* Similarly for an entry inserted for a backwards ref.  */
17121   HOST_WIDE_INT min_address;
17122   /* The number of fixes referencing this entry.  This can become zero
17123      if we "unpush" an entry.  In this case we ignore the entry when we
17124      come to emit the code.  */
17125   int refcount;
17126   /* The offset from the start of the minipool.  */
17127   HOST_WIDE_INT offset;
17128   /* The value in table.  */
17129   rtx value;
17130   /* The mode of value.  */
17131   machine_mode mode;
17132   /* The size of the value.  With iWMMXt enabled
17133      sizes > 4 also imply an alignment of 8-bytes.  */
17134   int fix_size;
17135 };
17136
17137 struct minipool_fixup
17138 {
17139   Mfix *            next;
17140   rtx_insn *        insn;
17141   HOST_WIDE_INT     address;
17142   rtx *             loc;
17143   machine_mode mode;
17144   int               fix_size;
17145   rtx               value;
17146   Mnode *           minipool;
17147   HOST_WIDE_INT     forwards;
17148   HOST_WIDE_INT     backwards;
17149 };
17150
17151 /* Fixes less than a word need padding out to a word boundary.  */
17152 #define MINIPOOL_FIX_SIZE(mode) \
17153   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17154
17155 static Mnode *  minipool_vector_head;
17156 static Mnode *  minipool_vector_tail;
17157 static rtx_code_label   *minipool_vector_label;
17158 static int      minipool_pad;
17159
17160 /* The linked list of all minipool fixes required for this function.  */
17161 Mfix *          minipool_fix_head;
17162 Mfix *          minipool_fix_tail;
17163 /* The fix entry for the current minipool, once it has been placed.  */
17164 Mfix *          minipool_barrier;
17165
17166 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17167 #define JUMP_TABLES_IN_TEXT_SECTION 0
17168 #endif
17169
17170 static HOST_WIDE_INT
17171 get_jump_table_size (rtx_jump_table_data *insn)
17172 {
17173   /* ADDR_VECs only take room if read-only data does into the text
17174      section.  */
17175   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17176     {
17177       rtx body = PATTERN (insn);
17178       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17179       HOST_WIDE_INT size;
17180       HOST_WIDE_INT modesize;
17181
17182       modesize = GET_MODE_SIZE (GET_MODE (body));
17183       size = modesize * XVECLEN (body, elt);
17184       switch (modesize)
17185         {
17186         case 1:
17187           /* Round up size  of TBB table to a halfword boundary.  */
17188           size = (size + 1) & ~HOST_WIDE_INT_1;
17189           break;
17190         case 2:
17191           /* No padding necessary for TBH.  */
17192           break;
17193         case 4:
17194           /* Add two bytes for alignment on Thumb.  */
17195           if (TARGET_THUMB)
17196             size += 2;
17197           break;
17198         default:
17199           gcc_unreachable ();
17200         }
17201       return size;
17202     }
17203
17204   return 0;
17205 }
17206
17207 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17208    function descriptor) into a register and the GOT address into the
17209    FDPIC register, returning an rtx for the register holding the
17210    function address.  */
17211
17212 rtx
17213 arm_load_function_descriptor (rtx funcdesc)
17214 {
17215   rtx fnaddr_reg = gen_reg_rtx (Pmode);
17216   rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17217   rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17218   rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17219
17220   emit_move_insn (fnaddr_reg, fnaddr);
17221
17222   /* The ABI requires the entry point address to be loaded first, but
17223      since we cannot support lazy binding for lack of atomic load of
17224      two 32-bits values, we do not need to bother to prevent the
17225      previous load from being moved after that of the GOT address.  */
17226   emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17227
17228   return fnaddr_reg;
17229 }
17230
17231 /* Return the maximum amount of padding that will be inserted before
17232    label LABEL.  */
17233 static HOST_WIDE_INT
17234 get_label_padding (rtx label)
17235 {
17236   HOST_WIDE_INT align, min_insn_size;
17237
17238   align = 1 << label_to_alignment (label).levels[0].log;
17239   min_insn_size = TARGET_THUMB ? 2 : 4;
17240   return align > min_insn_size ? align - min_insn_size : 0;
17241 }
17242
17243 /* Move a minipool fix MP from its current location to before MAX_MP.
17244    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17245    constraints may need updating.  */
17246 static Mnode *
17247 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17248                                HOST_WIDE_INT max_address)
17249 {
17250   /* The code below assumes these are different.  */
17251   gcc_assert (mp != max_mp);
17252
17253   if (max_mp == NULL)
17254     {
17255       if (max_address < mp->max_address)
17256         mp->max_address = max_address;
17257     }
17258   else
17259     {
17260       if (max_address > max_mp->max_address - mp->fix_size)
17261         mp->max_address = max_mp->max_address - mp->fix_size;
17262       else
17263         mp->max_address = max_address;
17264
17265       /* Unlink MP from its current position.  Since max_mp is non-null,
17266        mp->prev must be non-null.  */
17267       mp->prev->next = mp->next;
17268       if (mp->next != NULL)
17269         mp->next->prev = mp->prev;
17270       else
17271         minipool_vector_tail = mp->prev;
17272
17273       /* Re-insert it before MAX_MP.  */
17274       mp->next = max_mp;
17275       mp->prev = max_mp->prev;
17276       max_mp->prev = mp;
17277
17278       if (mp->prev != NULL)
17279         mp->prev->next = mp;
17280       else
17281         minipool_vector_head = mp;
17282     }
17283
17284   /* Save the new entry.  */
17285   max_mp = mp;
17286
17287   /* Scan over the preceding entries and adjust their addresses as
17288      required.  */
17289   while (mp->prev != NULL
17290          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17291     {
17292       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17293       mp = mp->prev;
17294     }
17295
17296   return max_mp;
17297 }
17298
17299 /* Add a constant to the minipool for a forward reference.  Returns the
17300    node added or NULL if the constant will not fit in this pool.  */
17301 static Mnode *
17302 add_minipool_forward_ref (Mfix *fix)
17303 {
17304   /* If set, max_mp is the first pool_entry that has a lower
17305      constraint than the one we are trying to add.  */
17306   Mnode *       max_mp = NULL;
17307   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17308   Mnode *       mp;
17309
17310   /* If the minipool starts before the end of FIX->INSN then this FIX
17311      cannot be placed into the current pool.  Furthermore, adding the
17312      new constant pool entry may cause the pool to start FIX_SIZE bytes
17313      earlier.  */
17314   if (minipool_vector_head &&
17315       (fix->address + get_attr_length (fix->insn)
17316        >= minipool_vector_head->max_address - fix->fix_size))
17317     return NULL;
17318
17319   /* Scan the pool to see if a constant with the same value has
17320      already been added.  While we are doing this, also note the
17321      location where we must insert the constant if it doesn't already
17322      exist.  */
17323   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17324     {
17325       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17326           && fix->mode == mp->mode
17327           && (!LABEL_P (fix->value)
17328               || (CODE_LABEL_NUMBER (fix->value)
17329                   == CODE_LABEL_NUMBER (mp->value)))
17330           && rtx_equal_p (fix->value, mp->value))
17331         {
17332           /* More than one fix references this entry.  */
17333           mp->refcount++;
17334           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17335         }
17336
17337       /* Note the insertion point if necessary.  */
17338       if (max_mp == NULL
17339           && mp->max_address > max_address)
17340         max_mp = mp;
17341
17342       /* If we are inserting an 8-bytes aligned quantity and
17343          we have not already found an insertion point, then
17344          make sure that all such 8-byte aligned quantities are
17345          placed at the start of the pool.  */
17346       if (ARM_DOUBLEWORD_ALIGN
17347           && max_mp == NULL
17348           && fix->fix_size >= 8
17349           && mp->fix_size < 8)
17350         {
17351           max_mp = mp;
17352           max_address = mp->max_address;
17353         }
17354     }
17355
17356   /* The value is not currently in the minipool, so we need to create
17357      a new entry for it.  If MAX_MP is NULL, the entry will be put on
17358      the end of the list since the placement is less constrained than
17359      any existing entry.  Otherwise, we insert the new fix before
17360      MAX_MP and, if necessary, adjust the constraints on the other
17361      entries.  */
17362   mp = XNEW (Mnode);
17363   mp->fix_size = fix->fix_size;
17364   mp->mode = fix->mode;
17365   mp->value = fix->value;
17366   mp->refcount = 1;
17367   /* Not yet required for a backwards ref.  */
17368   mp->min_address = -65536;
17369
17370   if (max_mp == NULL)
17371     {
17372       mp->max_address = max_address;
17373       mp->next = NULL;
17374       mp->prev = minipool_vector_tail;
17375
17376       if (mp->prev == NULL)
17377         {
17378           minipool_vector_head = mp;
17379           minipool_vector_label = gen_label_rtx ();
17380         }
17381       else
17382         mp->prev->next = mp;
17383
17384       minipool_vector_tail = mp;
17385     }
17386   else
17387     {
17388       if (max_address > max_mp->max_address - mp->fix_size)
17389         mp->max_address = max_mp->max_address - mp->fix_size;
17390       else
17391         mp->max_address = max_address;
17392
17393       mp->next = max_mp;
17394       mp->prev = max_mp->prev;
17395       max_mp->prev = mp;
17396       if (mp->prev != NULL)
17397         mp->prev->next = mp;
17398       else
17399         minipool_vector_head = mp;
17400     }
17401
17402   /* Save the new entry.  */
17403   max_mp = mp;
17404
17405   /* Scan over the preceding entries and adjust their addresses as
17406      required.  */
17407   while (mp->prev != NULL
17408          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17409     {
17410       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17411       mp = mp->prev;
17412     }
17413
17414   return max_mp;
17415 }
17416
17417 static Mnode *
17418 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17419                                 HOST_WIDE_INT  min_address)
17420 {
17421   HOST_WIDE_INT offset;
17422
17423   /* The code below assumes these are different.  */
17424   gcc_assert (mp != min_mp);
17425
17426   if (min_mp == NULL)
17427     {
17428       if (min_address > mp->min_address)
17429         mp->min_address = min_address;
17430     }
17431   else
17432     {
17433       /* We will adjust this below if it is too loose.  */
17434       mp->min_address = min_address;
17435
17436       /* Unlink MP from its current position.  Since min_mp is non-null,
17437          mp->next must be non-null.  */
17438       mp->next->prev = mp->prev;
17439       if (mp->prev != NULL)
17440         mp->prev->next = mp->next;
17441       else
17442         minipool_vector_head = mp->next;
17443
17444       /* Reinsert it after MIN_MP.  */
17445       mp->prev = min_mp;
17446       mp->next = min_mp->next;
17447       min_mp->next = mp;
17448       if (mp->next != NULL)
17449         mp->next->prev = mp;
17450       else
17451         minipool_vector_tail = mp;
17452     }
17453
17454   min_mp = mp;
17455
17456   offset = 0;
17457   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17458     {
17459       mp->offset = offset;
17460       if (mp->refcount > 0)
17461         offset += mp->fix_size;
17462
17463       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17464         mp->next->min_address = mp->min_address + mp->fix_size;
17465     }
17466
17467   return min_mp;
17468 }
17469
17470 /* Add a constant to the minipool for a backward reference.  Returns the
17471    node added or NULL if the constant will not fit in this pool.
17472
17473    Note that the code for insertion for a backwards reference can be
17474    somewhat confusing because the calculated offsets for each fix do
17475    not take into account the size of the pool (which is still under
17476    construction.  */
17477 static Mnode *
17478 add_minipool_backward_ref (Mfix *fix)
17479 {
17480   /* If set, min_mp is the last pool_entry that has a lower constraint
17481      than the one we are trying to add.  */
17482   Mnode *min_mp = NULL;
17483   /* This can be negative, since it is only a constraint.  */
17484   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
17485   Mnode *mp;
17486
17487   /* If we can't reach the current pool from this insn, or if we can't
17488      insert this entry at the end of the pool without pushing other
17489      fixes out of range, then we don't try.  This ensures that we
17490      can't fail later on.  */
17491   if (min_address >= minipool_barrier->address
17492       || (minipool_vector_tail->min_address + fix->fix_size
17493           >= minipool_barrier->address))
17494     return NULL;
17495
17496   /* Scan the pool to see if a constant with the same value has
17497      already been added.  While we are doing this, also note the
17498      location where we must insert the constant if it doesn't already
17499      exist.  */
17500   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17501     {
17502       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17503           && fix->mode == mp->mode
17504           && (!LABEL_P (fix->value)
17505               || (CODE_LABEL_NUMBER (fix->value)
17506                   == CODE_LABEL_NUMBER (mp->value)))
17507           && rtx_equal_p (fix->value, mp->value)
17508           /* Check that there is enough slack to move this entry to the
17509              end of the table (this is conservative).  */
17510           && (mp->max_address
17511               > (minipool_barrier->address
17512                  + minipool_vector_tail->offset
17513                  + minipool_vector_tail->fix_size)))
17514         {
17515           mp->refcount++;
17516           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17517         }
17518
17519       if (min_mp != NULL)
17520         mp->min_address += fix->fix_size;
17521       else
17522         {
17523           /* Note the insertion point if necessary.  */
17524           if (mp->min_address < min_address)
17525             {
17526               /* For now, we do not allow the insertion of 8-byte alignment
17527                  requiring nodes anywhere but at the start of the pool.  */
17528               if (ARM_DOUBLEWORD_ALIGN
17529                   && fix->fix_size >= 8 && mp->fix_size < 8)
17530                 return NULL;
17531               else
17532                 min_mp = mp;
17533             }
17534           else if (mp->max_address
17535                    < minipool_barrier->address + mp->offset + fix->fix_size)
17536             {
17537               /* Inserting before this entry would push the fix beyond
17538                  its maximum address (which can happen if we have
17539                  re-located a forwards fix); force the new fix to come
17540                  after it.  */
17541               if (ARM_DOUBLEWORD_ALIGN
17542                   && fix->fix_size >= 8 && mp->fix_size < 8)
17543                 return NULL;
17544               else
17545                 {
17546                   min_mp = mp;
17547                   min_address = mp->min_address + fix->fix_size;
17548                 }
17549             }
17550           /* Do not insert a non-8-byte aligned quantity before 8-byte
17551              aligned quantities.  */
17552           else if (ARM_DOUBLEWORD_ALIGN
17553                    && fix->fix_size < 8
17554                    && mp->fix_size >= 8)
17555             {
17556               min_mp = mp;
17557               min_address = mp->min_address + fix->fix_size;
17558             }
17559         }
17560     }
17561
17562   /* We need to create a new entry.  */
17563   mp = XNEW (Mnode);
17564   mp->fix_size = fix->fix_size;
17565   mp->mode = fix->mode;
17566   mp->value = fix->value;
17567   mp->refcount = 1;
17568   mp->max_address = minipool_barrier->address + 65536;
17569
17570   mp->min_address = min_address;
17571
17572   if (min_mp == NULL)
17573     {
17574       mp->prev = NULL;
17575       mp->next = minipool_vector_head;
17576
17577       if (mp->next == NULL)
17578         {
17579           minipool_vector_tail = mp;
17580           minipool_vector_label = gen_label_rtx ();
17581         }
17582       else
17583         mp->next->prev = mp;
17584
17585       minipool_vector_head = mp;
17586     }
17587   else
17588     {
17589       mp->next = min_mp->next;
17590       mp->prev = min_mp;
17591       min_mp->next = mp;
17592
17593       if (mp->next != NULL)
17594         mp->next->prev = mp;
17595       else
17596         minipool_vector_tail = mp;
17597     }
17598
17599   /* Save the new entry.  */
17600   min_mp = mp;
17601
17602   if (mp->prev)
17603     mp = mp->prev;
17604   else
17605     mp->offset = 0;
17606
17607   /* Scan over the following entries and adjust their offsets.  */
17608   while (mp->next != NULL)
17609     {
17610       if (mp->next->min_address < mp->min_address + mp->fix_size)
17611         mp->next->min_address = mp->min_address + mp->fix_size;
17612
17613       if (mp->refcount)
17614         mp->next->offset = mp->offset + mp->fix_size;
17615       else
17616         mp->next->offset = mp->offset;
17617
17618       mp = mp->next;
17619     }
17620
17621   return min_mp;
17622 }
17623
17624 static void
17625 assign_minipool_offsets (Mfix *barrier)
17626 {
17627   HOST_WIDE_INT offset = 0;
17628   Mnode *mp;
17629
17630   minipool_barrier = barrier;
17631
17632   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17633     {
17634       mp->offset = offset;
17635
17636       if (mp->refcount > 0)
17637         offset += mp->fix_size;
17638     }
17639 }
17640
17641 /* Output the literal table */
17642 static void
17643 dump_minipool (rtx_insn *scan)
17644 {
17645   Mnode * mp;
17646   Mnode * nmp;
17647   int align64 = 0;
17648
17649   if (ARM_DOUBLEWORD_ALIGN)
17650     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17651       if (mp->refcount > 0 && mp->fix_size >= 8)
17652         {
17653           align64 = 1;
17654           break;
17655         }
17656
17657   if (dump_file)
17658     fprintf (dump_file,
17659              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17660              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
17661
17662   scan = emit_label_after (gen_label_rtx (), scan);
17663   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
17664   scan = emit_label_after (minipool_vector_label, scan);
17665
17666   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
17667     {
17668       if (mp->refcount > 0)
17669         {
17670           if (dump_file)
17671             {
17672               fprintf (dump_file,
17673                        ";;  Offset %u, min %ld, max %ld ",
17674                        (unsigned) mp->offset, (unsigned long) mp->min_address,
17675                        (unsigned long) mp->max_address);
17676               arm_print_value (dump_file, mp->value);
17677               fputc ('\n', dump_file);
17678             }
17679
17680           rtx val = copy_rtx (mp->value);
17681
17682           switch (GET_MODE_SIZE (mp->mode))
17683             {
17684 #ifdef HAVE_consttable_1
17685             case 1:
17686               scan = emit_insn_after (gen_consttable_1 (val), scan);
17687               break;
17688
17689 #endif
17690 #ifdef HAVE_consttable_2
17691             case 2:
17692               scan = emit_insn_after (gen_consttable_2 (val), scan);
17693               break;
17694
17695 #endif
17696 #ifdef HAVE_consttable_4
17697             case 4:
17698               scan = emit_insn_after (gen_consttable_4 (val), scan);
17699               break;
17700
17701 #endif
17702 #ifdef HAVE_consttable_8
17703             case 8:
17704               scan = emit_insn_after (gen_consttable_8 (val), scan);
17705               break;
17706
17707 #endif
17708 #ifdef HAVE_consttable_16
17709             case 16:
17710               scan = emit_insn_after (gen_consttable_16 (val), scan);
17711               break;
17712
17713 #endif
17714             default:
17715               gcc_unreachable ();
17716             }
17717         }
17718
17719       nmp = mp->next;
17720       free (mp);
17721     }
17722
17723   minipool_vector_head = minipool_vector_tail = NULL;
17724   scan = emit_insn_after (gen_consttable_end (), scan);
17725   scan = emit_barrier_after (scan);
17726 }
17727
17728 /* Return the cost of forcibly inserting a barrier after INSN.  */
17729 static int
17730 arm_barrier_cost (rtx_insn *insn)
17731 {
17732   /* Basing the location of the pool on the loop depth is preferable,
17733      but at the moment, the basic block information seems to be
17734      corrupt by this stage of the compilation.  */
17735   int base_cost = 50;
17736   rtx_insn *next = next_nonnote_insn (insn);
17737
17738   if (next != NULL && LABEL_P (next))
17739     base_cost -= 20;
17740
17741   switch (GET_CODE (insn))
17742     {
17743     case CODE_LABEL:
17744       /* It will always be better to place the table before the label, rather
17745          than after it.  */
17746       return 50;
17747
17748     case INSN:
17749     case CALL_INSN:
17750       return base_cost;
17751
17752     case JUMP_INSN:
17753       return base_cost - 10;
17754
17755     default:
17756       return base_cost + 10;
17757     }
17758 }
17759
17760 /* Find the best place in the insn stream in the range
17761    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
17762    Create the barrier by inserting a jump and add a new fix entry for
17763    it.  */
17764 static Mfix *
17765 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
17766 {
17767   HOST_WIDE_INT count = 0;
17768   rtx_barrier *barrier;
17769   rtx_insn *from = fix->insn;
17770   /* The instruction after which we will insert the jump.  */
17771   rtx_insn *selected = NULL;
17772   int selected_cost;
17773   /* The address at which the jump instruction will be placed.  */
17774   HOST_WIDE_INT selected_address;
17775   Mfix * new_fix;
17776   HOST_WIDE_INT max_count = max_address - fix->address;
17777   rtx_code_label *label = gen_label_rtx ();
17778
17779   selected_cost = arm_barrier_cost (from);
17780   selected_address = fix->address;
17781
17782   while (from && count < max_count)
17783     {
17784       rtx_jump_table_data *tmp;
17785       int new_cost;
17786
17787       /* This code shouldn't have been called if there was a natural barrier
17788          within range.  */
17789       gcc_assert (!BARRIER_P (from));
17790
17791       /* Count the length of this insn.  This must stay in sync with the
17792          code that pushes minipool fixes.  */
17793       if (LABEL_P (from))
17794         count += get_label_padding (from);
17795       else
17796         count += get_attr_length (from);
17797
17798       /* If there is a jump table, add its length.  */
17799       if (tablejump_p (from, NULL, &tmp))
17800         {
17801           count += get_jump_table_size (tmp);
17802
17803           /* Jump tables aren't in a basic block, so base the cost on
17804              the dispatch insn.  If we select this location, we will
17805              still put the pool after the table.  */
17806           new_cost = arm_barrier_cost (from);
17807
17808           if (count < max_count
17809               && (!selected || new_cost <= selected_cost))
17810             {
17811               selected = tmp;
17812               selected_cost = new_cost;
17813               selected_address = fix->address + count;
17814             }
17815
17816           /* Continue after the dispatch table.  */
17817           from = NEXT_INSN (tmp);
17818           continue;
17819         }
17820
17821       new_cost = arm_barrier_cost (from);
17822
17823       if (count < max_count
17824           && (!selected || new_cost <= selected_cost))
17825         {
17826           selected = from;
17827           selected_cost = new_cost;
17828           selected_address = fix->address + count;
17829         }
17830
17831       from = NEXT_INSN (from);
17832     }
17833
17834   /* Make sure that we found a place to insert the jump.  */
17835   gcc_assert (selected);
17836
17837   /* Create a new JUMP_INSN that branches around a barrier.  */
17838   from = emit_jump_insn_after (gen_jump (label), selected);
17839   JUMP_LABEL (from) = label;
17840   barrier = emit_barrier_after (from);
17841   emit_label_after (label, barrier);
17842
17843   /* Create a minipool barrier entry for the new barrier.  */
17844   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
17845   new_fix->insn = barrier;
17846   new_fix->address = selected_address;
17847   new_fix->next = fix->next;
17848   fix->next = new_fix;
17849
17850   return new_fix;
17851 }
17852
17853 /* Record that there is a natural barrier in the insn stream at
17854    ADDRESS.  */
17855 static void
17856 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
17857 {
17858   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17859
17860   fix->insn = insn;
17861   fix->address = address;
17862
17863   fix->next = NULL;
17864   if (minipool_fix_head != NULL)
17865     minipool_fix_tail->next = fix;
17866   else
17867     minipool_fix_head = fix;
17868
17869   minipool_fix_tail = fix;
17870 }
17871
17872 /* Record INSN, which will need fixing up to load a value from the
17873    minipool.  ADDRESS is the offset of the insn since the start of the
17874    function; LOC is a pointer to the part of the insn which requires
17875    fixing; VALUE is the constant that must be loaded, which is of type
17876    MODE.  */
17877 static void
17878 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
17879                    machine_mode mode, rtx value)
17880 {
17881   gcc_assert (!arm_disable_literal_pool);
17882   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17883
17884   fix->insn = insn;
17885   fix->address = address;
17886   fix->loc = loc;
17887   fix->mode = mode;
17888   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
17889   fix->value = value;
17890   fix->forwards = get_attr_pool_range (insn);
17891   fix->backwards = get_attr_neg_pool_range (insn);
17892   fix->minipool = NULL;
17893
17894   /* If an insn doesn't have a range defined for it, then it isn't
17895      expecting to be reworked by this code.  Better to stop now than
17896      to generate duff assembly code.  */
17897   gcc_assert (fix->forwards || fix->backwards);
17898
17899   /* If an entry requires 8-byte alignment then assume all constant pools
17900      require 4 bytes of padding.  Trying to do this later on a per-pool
17901      basis is awkward because existing pool entries have to be modified.  */
17902   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
17903     minipool_pad = 4;
17904
17905   if (dump_file)
17906     {
17907       fprintf (dump_file,
17908                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17909                GET_MODE_NAME (mode),
17910                INSN_UID (insn), (unsigned long) address,
17911                -1 * (long)fix->backwards, (long)fix->forwards);
17912       arm_print_value (dump_file, fix->value);
17913       fprintf (dump_file, "\n");
17914     }
17915
17916   /* Add it to the chain of fixes.  */
17917   fix->next = NULL;
17918
17919   if (minipool_fix_head != NULL)
17920     minipool_fix_tail->next = fix;
17921   else
17922     minipool_fix_head = fix;
17923
17924   minipool_fix_tail = fix;
17925 }
17926
17927 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17928    Returns the number of insns needed, or 99 if we always want to synthesize
17929    the value.  */
17930 int
17931 arm_max_const_double_inline_cost ()
17932 {
17933   return ((optimize_size || arm_ld_sched) ? 3 : 4);
17934 }
17935
17936 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17937    Returns the number of insns needed, or 99 if we don't know how to
17938    do it.  */
17939 int
17940 arm_const_double_inline_cost (rtx val)
17941 {
17942   rtx lowpart, highpart;
17943   machine_mode mode;
17944
17945   mode = GET_MODE (val);
17946
17947   if (mode == VOIDmode)
17948     mode = DImode;
17949
17950   gcc_assert (GET_MODE_SIZE (mode) == 8);
17951
17952   lowpart = gen_lowpart (SImode, val);
17953   highpart = gen_highpart_mode (SImode, mode, val);
17954
17955   gcc_assert (CONST_INT_P (lowpart));
17956   gcc_assert (CONST_INT_P (highpart));
17957
17958   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
17959                             NULL_RTX, NULL_RTX, 0, 0)
17960           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
17961                               NULL_RTX, NULL_RTX, 0, 0));
17962 }
17963
17964 /* Cost of loading a SImode constant.  */
17965 static inline int
17966 arm_const_inline_cost (enum rtx_code code, rtx val)
17967 {
17968   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17969                            NULL_RTX, NULL_RTX, 1, 0);
17970 }
17971
17972 /* Return true if it is worthwhile to split a 64-bit constant into two
17973    32-bit operations.  This is the case if optimizing for size, or
17974    if we have load delay slots, or if one 32-bit part can be done with
17975    a single data operation.  */
17976 bool
17977 arm_const_double_by_parts (rtx val)
17978 {
17979   machine_mode mode = GET_MODE (val);
17980   rtx part;
17981
17982   if (optimize_size || arm_ld_sched)
17983     return true;
17984
17985   if (mode == VOIDmode)
17986     mode = DImode;
17987
17988   part = gen_highpart_mode (SImode, mode, val);
17989
17990   gcc_assert (CONST_INT_P (part));
17991
17992   if (const_ok_for_arm (INTVAL (part))
17993       || const_ok_for_arm (~INTVAL (part)))
17994     return true;
17995
17996   part = gen_lowpart (SImode, val);
17997
17998   gcc_assert (CONST_INT_P (part));
17999
18000   if (const_ok_for_arm (INTVAL (part))
18001       || const_ok_for_arm (~INTVAL (part)))
18002     return true;
18003
18004   return false;
18005 }
18006
18007 /* Return true if it is possible to inline both the high and low parts
18008    of a 64-bit constant into 32-bit data processing instructions.  */
18009 bool
18010 arm_const_double_by_immediates (rtx val)
18011 {
18012   machine_mode mode = GET_MODE (val);
18013   rtx part;
18014
18015   if (mode == VOIDmode)
18016     mode = DImode;
18017
18018   part = gen_highpart_mode (SImode, mode, val);
18019
18020   gcc_assert (CONST_INT_P (part));
18021
18022   if (!const_ok_for_arm (INTVAL (part)))
18023     return false;
18024
18025   part = gen_lowpart (SImode, val);
18026
18027   gcc_assert (CONST_INT_P (part));
18028
18029   if (!const_ok_for_arm (INTVAL (part)))
18030     return false;
18031
18032   return true;
18033 }
18034
18035 /* Scan INSN and note any of its operands that need fixing.
18036    If DO_PUSHES is false we do not actually push any of the fixups
18037    needed.  */
18038 static void
18039 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18040 {
18041   int opno;
18042
18043   extract_constrain_insn (insn);
18044
18045   if (recog_data.n_alternatives == 0)
18046     return;
18047
18048   /* Fill in recog_op_alt with information about the constraints of
18049      this insn.  */
18050   preprocess_constraints (insn);
18051
18052   const operand_alternative *op_alt = which_op_alt ();
18053   for (opno = 0; opno < recog_data.n_operands; opno++)
18054     {
18055       /* Things we need to fix can only occur in inputs.  */
18056       if (recog_data.operand_type[opno] != OP_IN)
18057         continue;
18058
18059       /* If this alternative is a memory reference, then any mention
18060          of constants in this alternative is really to fool reload
18061          into allowing us to accept one there.  We need to fix them up
18062          now so that we output the right code.  */
18063       if (op_alt[opno].memory_ok)
18064         {
18065           rtx op = recog_data.operand[opno];
18066
18067           if (CONSTANT_P (op))
18068             {
18069               if (do_pushes)
18070                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18071                                    recog_data.operand_mode[opno], op);
18072             }
18073           else if (MEM_P (op)
18074                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18075                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18076             {
18077               if (do_pushes)
18078                 {
18079                   rtx cop = avoid_constant_pool_reference (op);
18080
18081                   /* Casting the address of something to a mode narrower
18082                      than a word can cause avoid_constant_pool_reference()
18083                      to return the pool reference itself.  That's no good to
18084                      us here.  Lets just hope that we can use the
18085                      constant pool value directly.  */
18086                   if (op == cop)
18087                     cop = get_pool_constant (XEXP (op, 0));
18088
18089                   push_minipool_fix (insn, address,
18090                                      recog_data.operand_loc[opno],
18091                                      recog_data.operand_mode[opno], cop);
18092                 }
18093
18094             }
18095         }
18096     }
18097
18098   return;
18099 }
18100
18101 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18102    and unions in the context of ARMv8-M Security Extensions.  It is used as a
18103    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18104    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18105    or four masks, depending on whether it is being computed for a
18106    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18107    respectively.  The tree for the type of the argument or a field within an
18108    argument is passed in ARG_TYPE, the current register this argument or field
18109    starts in is kept in the pointer REGNO and updated accordingly, the bit this
18110    argument or field starts at is passed in STARTING_BIT and the last used bit
18111    is kept in LAST_USED_BIT which is also updated accordingly.  */
18112
18113 static unsigned HOST_WIDE_INT
18114 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18115                                uint32_t * padding_bits_to_clear,
18116                                unsigned starting_bit, int * last_used_bit)
18117
18118 {
18119   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18120
18121   if (TREE_CODE (arg_type) == RECORD_TYPE)
18122     {
18123       unsigned current_bit = starting_bit;
18124       tree field;
18125       long int offset, size;
18126
18127
18128       field = TYPE_FIELDS (arg_type);
18129       while (field)
18130         {
18131           /* The offset within a structure is always an offset from
18132              the start of that structure.  Make sure we take that into the
18133              calculation of the register based offset that we use here.  */
18134           offset = starting_bit;
18135           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18136           offset %= 32;
18137
18138           /* This is the actual size of the field, for bitfields this is the
18139              bitfield width and not the container size.  */
18140           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18141
18142           if (*last_used_bit != offset)
18143             {
18144               if (offset < *last_used_bit)
18145                 {
18146                   /* This field's offset is before the 'last_used_bit', that
18147                      means this field goes on the next register.  So we need to
18148                      pad the rest of the current register and increase the
18149                      register number.  */
18150                   uint32_t mask;
18151                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18152                   mask++;
18153
18154                   padding_bits_to_clear[*regno] |= mask;
18155                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18156                   (*regno)++;
18157                 }
18158               else
18159                 {
18160                   /* Otherwise we pad the bits between the last field's end and
18161                      the start of the new field.  */
18162                   uint32_t mask;
18163
18164                   mask = ((uint32_t)-1) >> (32 - offset);
18165                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18166                   padding_bits_to_clear[*regno] |= mask;
18167                 }
18168               current_bit = offset;
18169             }
18170
18171           /* Calculate further padding bits for inner structs/unions too.  */
18172           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18173             {
18174               *last_used_bit = current_bit;
18175               not_to_clear_reg_mask
18176                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18177                                                   padding_bits_to_clear, offset,
18178                                                   last_used_bit);
18179             }
18180           else
18181             {
18182               /* Update 'current_bit' with this field's size.  If the
18183                  'current_bit' lies in a subsequent register, update 'regno' and
18184                  reset 'current_bit' to point to the current bit in that new
18185                  register.  */
18186               current_bit += size;
18187               while (current_bit >= 32)
18188                 {
18189                   current_bit-=32;
18190                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18191                   (*regno)++;
18192                 }
18193               *last_used_bit = current_bit;
18194             }
18195
18196           field = TREE_CHAIN (field);
18197         }
18198       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18199     }
18200   else if (TREE_CODE (arg_type) == UNION_TYPE)
18201     {
18202       tree field, field_t;
18203       int i, regno_t, field_size;
18204       int max_reg = -1;
18205       int max_bit = -1;
18206       uint32_t mask;
18207       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18208         = {-1, -1, -1, -1};
18209
18210       /* To compute the padding bits in a union we only consider bits as
18211          padding bits if they are always either a padding bit or fall outside a
18212          fields size for all fields in the union.  */
18213       field = TYPE_FIELDS (arg_type);
18214       while (field)
18215         {
18216           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18217             = {0U, 0U, 0U, 0U};
18218           int last_used_bit_t = *last_used_bit;
18219           regno_t = *regno;
18220           field_t = TREE_TYPE (field);
18221
18222           /* If the field's type is either a record or a union make sure to
18223              compute their padding bits too.  */
18224           if (RECORD_OR_UNION_TYPE_P (field_t))
18225             not_to_clear_reg_mask
18226               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18227                                                 &padding_bits_to_clear_t[0],
18228                                                 starting_bit, &last_used_bit_t);
18229           else
18230             {
18231               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18232               regno_t = (field_size / 32) + *regno;
18233               last_used_bit_t = (starting_bit + field_size) % 32;
18234             }
18235
18236           for (i = *regno; i < regno_t; i++)
18237             {
18238               /* For all but the last register used by this field only keep the
18239                  padding bits that were padding bits in this field.  */
18240               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18241             }
18242
18243             /* For the last register, keep all padding bits that were padding
18244                bits in this field and any padding bits that are still valid
18245                as padding bits but fall outside of this field's size.  */
18246             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18247             padding_bits_to_clear_res[regno_t]
18248               &= padding_bits_to_clear_t[regno_t] | mask;
18249
18250           /* Update the maximum size of the fields in terms of registers used
18251              ('max_reg') and the 'last_used_bit' in said register.  */
18252           if (max_reg < regno_t)
18253             {
18254               max_reg = regno_t;
18255               max_bit = last_used_bit_t;
18256             }
18257           else if (max_reg == regno_t && max_bit < last_used_bit_t)
18258             max_bit = last_used_bit_t;
18259
18260           field = TREE_CHAIN (field);
18261         }
18262
18263       /* Update the current padding_bits_to_clear using the intersection of the
18264          padding bits of all the fields.  */
18265       for (i=*regno; i < max_reg; i++)
18266         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18267
18268       /* Do not keep trailing padding bits, we do not know yet whether this
18269          is the end of the argument.  */
18270       mask = ((uint32_t) 1 << max_bit) - 1;
18271       padding_bits_to_clear[max_reg]
18272         |= padding_bits_to_clear_res[max_reg] & mask;
18273
18274       *regno = max_reg;
18275       *last_used_bit = max_bit;
18276     }
18277   else
18278     /* This function should only be used for structs and unions.  */
18279     gcc_unreachable ();
18280
18281   return not_to_clear_reg_mask;
18282 }
18283
18284 /* In the context of ARMv8-M Security Extensions, this function is used for both
18285    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18286    registers are used when returning or passing arguments, which is then
18287    returned as a mask.  It will also compute a mask to indicate padding/unused
18288    bits for each of these registers, and passes this through the
18289    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
18290    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18291    the starting register used to pass this argument or return value is passed
18292    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18293    for struct and union types.  */
18294
18295 static unsigned HOST_WIDE_INT
18296 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18297                              uint32_t * padding_bits_to_clear)
18298
18299 {
18300   int last_used_bit = 0;
18301   unsigned HOST_WIDE_INT not_to_clear_mask;
18302
18303   if (RECORD_OR_UNION_TYPE_P (arg_type))
18304     {
18305       not_to_clear_mask
18306         = comp_not_to_clear_mask_str_un (arg_type, &regno,
18307                                          padding_bits_to_clear, 0,
18308                                          &last_used_bit);
18309
18310
18311       /* If the 'last_used_bit' is not zero, that means we are still using a
18312          part of the last 'regno'.  In such cases we must clear the trailing
18313          bits.  Otherwise we are not using regno and we should mark it as to
18314          clear.  */
18315       if (last_used_bit != 0)
18316         padding_bits_to_clear[regno]
18317           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18318       else
18319         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18320     }
18321   else
18322     {
18323       not_to_clear_mask = 0;
18324       /* We are not dealing with structs nor unions.  So these arguments may be
18325          passed in floating point registers too.  In some cases a BLKmode is
18326          used when returning or passing arguments in multiple VFP registers.  */
18327       if (GET_MODE (arg_rtx) == BLKmode)
18328         {
18329           int i, arg_regs;
18330           rtx reg;
18331
18332           /* This should really only occur when dealing with the hard-float
18333              ABI.  */
18334           gcc_assert (TARGET_HARD_FLOAT_ABI);
18335
18336           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18337             {
18338               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18339               gcc_assert (REG_P (reg));
18340
18341               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18342
18343               /* If we are dealing with DF mode, make sure we don't
18344                  clear either of the registers it addresses.  */
18345               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18346               if (arg_regs > 1)
18347                 {
18348                   unsigned HOST_WIDE_INT mask;
18349                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18350                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
18351                   not_to_clear_mask |= mask;
18352                 }
18353             }
18354         }
18355       else
18356         {
18357           /* Otherwise we can rely on the MODE to determine how many registers
18358              are being used by this argument.  */
18359           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18360           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18361           if (arg_regs > 1)
18362             {
18363               unsigned HOST_WIDE_INT
18364               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18365               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18366               not_to_clear_mask |= mask;
18367             }
18368         }
18369     }
18370
18371   return not_to_clear_mask;
18372 }
18373
18374 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18375    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
18376    are to be fully cleared, using the value in register CLEARING_REG if more
18377    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18378    the bits that needs to be cleared in caller-saved core registers, with
18379    SCRATCH_REG used as a scratch register for that clearing.
18380
18381    NOTE: one of three following assertions must hold:
18382    - SCRATCH_REG is a low register
18383    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18384      in TO_CLEAR_BITMAP)
18385    - CLEARING_REG is a low register.  */
18386
18387 static void
18388 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18389                       int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18390 {
18391   bool saved_clearing = false;
18392   rtx saved_clearing_reg = NULL_RTX;
18393   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18394
18395   gcc_assert (arm_arch_cmse);
18396
18397   if (!bitmap_empty_p (to_clear_bitmap))
18398     {
18399       minregno = bitmap_first_set_bit (to_clear_bitmap);
18400       maxregno = bitmap_last_set_bit (to_clear_bitmap);
18401     }
18402   clearing_regno = REGNO (clearing_reg);
18403
18404   /* Clear padding bits.  */
18405   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18406   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18407     {
18408       uint64_t mask;
18409       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18410
18411       if (padding_bits_to_clear[i] == 0)
18412         continue;
18413
18414       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18415          CLEARING_REG as scratch.  */
18416       if (TARGET_THUMB1
18417           && REGNO (scratch_reg) > LAST_LO_REGNUM)
18418         {
18419           /* clearing_reg is not to be cleared, copy its value into scratch_reg
18420              such that we can use clearing_reg to clear the unused bits in the
18421              arguments.  */
18422           if ((clearing_regno > maxregno
18423                || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18424               && !saved_clearing)
18425             {
18426               gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18427               emit_move_insn (scratch_reg, clearing_reg);
18428               saved_clearing = true;
18429               saved_clearing_reg = scratch_reg;
18430             }
18431           scratch_reg = clearing_reg;
18432         }
18433
18434       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
18435       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18436       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18437
18438       /* Fill the top half of the negated padding_bits_to_clear[i].  */
18439       mask = (~padding_bits_to_clear[i]) >> 16;
18440       rtx16 = gen_int_mode (16, SImode);
18441       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18442       if (mask)
18443         emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18444
18445       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18446     }
18447   if (saved_clearing)
18448     emit_move_insn (clearing_reg, saved_clearing_reg);
18449
18450
18451   /* Clear full registers.  */
18452
18453   if (TARGET_HAVE_FPCXT_CMSE)
18454     {
18455       rtvec vunspec_vec;
18456       int i, j, k, nb_regs;
18457       rtx use_seq, par, reg, set, vunspec;
18458       int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18459       auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18460       auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18461
18462       for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18463         {
18464           /* Find next register to clear and exit if none.  */
18465           for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18466           if (i > maxregno)
18467             break;
18468
18469           /* Compute number of consecutive registers to clear.  */
18470           for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18471                j++);
18472           nb_regs = j - i;
18473
18474           /* Create VSCCLRM RTX pattern.  */
18475           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18476           vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18477           vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18478                                              VUNSPEC_VSCCLRM_VPR);
18479           XVECEXP (par, 0, 0) = vunspec;
18480
18481           /* Insert VFP register clearing RTX in the pattern.  */
18482           start_sequence ();
18483           for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18484             {
18485               if (!bitmap_bit_p (to_clear_bitmap, j))
18486                 continue;
18487
18488               reg = gen_rtx_REG (SFmode, j);
18489               set = gen_rtx_SET (reg, const0_rtx);
18490               XVECEXP (par, 0, k++) = set;
18491               emit_use (reg);
18492             }
18493           use_seq = get_insns ();
18494           end_sequence ();
18495
18496           emit_insn_after (use_seq, emit_insn (par));
18497         }
18498
18499       /* Get set of core registers to clear.  */
18500       bitmap_clear (core_regs_bitmap);
18501       bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18502                         IP_REGNUM - R0_REGNUM + 1);
18503       bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18504                   core_regs_bitmap);
18505       gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18506
18507       if (bitmap_empty_p (to_clear_core_bitmap))
18508         return;
18509
18510       /* Create clrm RTX pattern.  */
18511       nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18512       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18513
18514       /* Insert core register clearing RTX in the pattern.  */
18515       start_sequence ();
18516       for (j = 0, i = minregno; j < nb_regs; i++)
18517         {
18518           if (!bitmap_bit_p (to_clear_core_bitmap, i))
18519             continue;
18520
18521           reg = gen_rtx_REG (SImode, i);
18522           set = gen_rtx_SET (reg, const0_rtx);
18523           XVECEXP (par, 0, j++) = set;
18524           emit_use (reg);
18525         }
18526
18527       /* Insert APSR register clearing RTX in the pattern
18528        * along with clobbering CC.  */
18529       vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18530       vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18531                                          VUNSPEC_CLRM_APSR);
18532
18533       XVECEXP (par, 0, j++) = vunspec;
18534
18535       rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18536       rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18537       XVECEXP (par, 0, j) = clobber;
18538
18539       use_seq = get_insns ();
18540       end_sequence ();
18541
18542       emit_insn_after (use_seq, emit_insn (par));
18543     }
18544   else
18545     {
18546       /* If not marked for clearing, clearing_reg already does not contain
18547          any secret.  */
18548       if (clearing_regno <= maxregno
18549           && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18550         {
18551           emit_move_insn (clearing_reg, const0_rtx);
18552           emit_use (clearing_reg);
18553           bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18554         }
18555
18556       for (regno = minregno; regno <= maxregno; regno++)
18557         {
18558           if (!bitmap_bit_p (to_clear_bitmap, regno))
18559             continue;
18560
18561           if (IS_VFP_REGNUM (regno))
18562             {
18563               /* If regno is an even vfp register and its successor is also to
18564                  be cleared, use vmov.  */
18565               if (TARGET_VFP_DOUBLE
18566                   && VFP_REGNO_OK_FOR_DOUBLE (regno)
18567                   && bitmap_bit_p (to_clear_bitmap, regno + 1))
18568                 {
18569                   emit_move_insn (gen_rtx_REG (DFmode, regno),
18570                                   CONST1_RTX (DFmode));
18571                   emit_use (gen_rtx_REG (DFmode, regno));
18572                   regno++;
18573                 }
18574               else
18575                 {
18576                   emit_move_insn (gen_rtx_REG (SFmode, regno),
18577                                   CONST1_RTX (SFmode));
18578                   emit_use (gen_rtx_REG (SFmode, regno));
18579                 }
18580             }
18581           else
18582             {
18583               emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18584               emit_use (gen_rtx_REG (SImode, regno));
18585             }
18586         }
18587     }
18588 }
18589
18590 /* Clear core and caller-saved VFP registers not used to pass arguments before
18591    a cmse_nonsecure_call.  Saving, clearing and restoring of VFP callee-saved
18592    registers is done in the __gnu_cmse_nonsecure_call libcall.  See
18593    libgcc/config/arm/cmse_nonsecure_call.S.  */
18594
18595 static void
18596 cmse_nonsecure_call_inline_register_clear (void)
18597 {
18598   basic_block bb;
18599
18600   FOR_EACH_BB_FN (bb, cfun)
18601     {
18602       rtx_insn *insn;
18603
18604       FOR_BB_INSNS (bb, insn)
18605         {
18606           bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18607           /* frame = VFP regs + FPSCR + VPR.  */
18608           unsigned lazy_store_stack_frame_size
18609             = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18610           unsigned long callee_saved_mask
18611             = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18612             & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18613           unsigned address_regnum, regno;
18614           unsigned max_int_regno
18615             = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
18616           unsigned max_fp_regno
18617             = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
18618           unsigned maxregno
18619             = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
18620           auto_sbitmap to_clear_bitmap (maxregno + 1);
18621           rtx_insn *seq;
18622           rtx pat, call, unspec, clearing_reg, ip_reg, shift;
18623           rtx address;
18624           CUMULATIVE_ARGS args_so_far_v;
18625           cumulative_args_t args_so_far;
18626           tree arg_type, fntype;
18627           bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
18628           function_args_iterator args_iter;
18629           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
18630
18631           if (!NONDEBUG_INSN_P (insn))
18632             continue;
18633
18634           if (!CALL_P (insn))
18635             continue;
18636
18637           pat = PATTERN (insn);
18638           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
18639           call = XVECEXP (pat, 0, 0);
18640
18641           /* Get the real call RTX if the insn sets a value, ie. returns.  */
18642           if (GET_CODE (call) == SET)
18643               call = SET_SRC (call);
18644
18645           /* Check if it is a cmse_nonsecure_call.  */
18646           unspec = XEXP (call, 0);
18647           if (GET_CODE (unspec) != UNSPEC
18648               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
18649             continue;
18650
18651           /* Mark registers that needs to be cleared.  Those that holds a
18652              parameter are removed from the set further below.  */
18653           bitmap_clear (to_clear_bitmap);
18654           bitmap_set_range (to_clear_bitmap, R0_REGNUM,
18655                             max_int_regno - R0_REGNUM + 1);
18656
18657           /* Only look at the caller-saved floating point registers in case of
18658              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
18659              lazy store and loads which clear both caller- and callee-saved
18660              registers.  */
18661           if (!lazy_fpclear)
18662             {
18663               auto_sbitmap float_bitmap (maxregno + 1);
18664
18665               bitmap_clear (float_bitmap);
18666               bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
18667                                 max_fp_regno - FIRST_VFP_REGNUM + 1);
18668               bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
18669             }
18670
18671           /* Make sure the register used to hold the function address is not
18672              cleared.  */
18673           address = RTVEC_ELT (XVEC (unspec, 0), 0);
18674           gcc_assert (MEM_P (address));
18675           gcc_assert (REG_P (XEXP (address, 0)));
18676           address_regnum = REGNO (XEXP (address, 0));
18677           if (address_regnum <= max_int_regno)
18678             bitmap_clear_bit (to_clear_bitmap, address_regnum);
18679
18680           /* Set basic block of call insn so that df rescan is performed on
18681              insns inserted here.  */
18682           set_block_for_insn (insn, bb);
18683           df_set_flags (DF_DEFER_INSN_RESCAN);
18684           start_sequence ();
18685
18686           /* Make sure the scheduler doesn't schedule other insns beyond
18687              here.  */
18688           emit_insn (gen_blockage ());
18689
18690           /* Walk through all arguments and clear registers appropriately.
18691           */
18692           fntype = TREE_TYPE (MEM_EXPR (address));
18693           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
18694                                     NULL_TREE);
18695           args_so_far = pack_cumulative_args (&args_so_far_v);
18696           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
18697             {
18698               rtx arg_rtx;
18699               uint64_t to_clear_args_mask;
18700
18701               if (VOID_TYPE_P (arg_type))
18702                 continue;
18703
18704               function_arg_info arg (arg_type, /*named=*/true);
18705               if (!first_param)
18706                 /* ??? We should advance after processing the argument and pass
18707                    the argument we're advancing past.  */
18708                 arm_function_arg_advance (args_so_far, arg);
18709
18710               arg_rtx = arm_function_arg (args_so_far, arg);
18711               gcc_assert (REG_P (arg_rtx));
18712               to_clear_args_mask
18713                 = compute_not_to_clear_mask (arg_type, arg_rtx,
18714                                              REGNO (arg_rtx),
18715                                              &padding_bits_to_clear[0]);
18716               if (to_clear_args_mask)
18717                 {
18718                   for (regno = R0_REGNUM; regno <= maxregno; regno++)
18719                     {
18720                       if (to_clear_args_mask & (1ULL << regno))
18721                         bitmap_clear_bit (to_clear_bitmap, regno);
18722                     }
18723                 }
18724
18725               first_param = false;
18726             }
18727
18728           /* We use right shift and left shift to clear the LSB of the address
18729              we jump to instead of using bic, to avoid having to use an extra
18730              register on Thumb-1.  */
18731           clearing_reg = XEXP (address, 0);
18732           shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
18733           emit_insn (gen_rtx_SET (clearing_reg, shift));
18734           shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
18735           emit_insn (gen_rtx_SET (clearing_reg, shift));
18736
18737           if (clear_callee_saved)
18738             {
18739               rtx push_insn =
18740                 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
18741               /* Disable frame debug info in push because it needs to be
18742                  disabled for pop (see below).  */
18743               RTX_FRAME_RELATED_P (push_insn) = 0;
18744
18745               /* Lazy store multiple.  */
18746               if (lazy_fpclear)
18747                 {
18748                   rtx imm;
18749                   rtx_insn *add_insn;
18750
18751                   imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
18752                   add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
18753                                                     stack_pointer_rtx, imm));
18754                   arm_add_cfa_adjust_cfa_note (add_insn,
18755                                                - lazy_store_stack_frame_size,
18756                                                stack_pointer_rtx,
18757                                                stack_pointer_rtx);
18758                   emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
18759                 }
18760               /* Save VFP callee-saved registers.  */
18761               else
18762                 {
18763                   vfp_emit_fstmd (D7_VFP_REGNUM + 1,
18764                                   (max_fp_regno - D7_VFP_REGNUM) / 2);
18765                   /* Disable frame debug info in push because it needs to be
18766                      disabled for vpop (see below).  */
18767                   RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
18768                 }
18769             }
18770
18771           /* Clear caller-saved registers that leak before doing a non-secure
18772              call.  */
18773           ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
18774           cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
18775                                 NUM_ARG_REGS, ip_reg, clearing_reg);
18776
18777           seq = get_insns ();
18778           end_sequence ();
18779           emit_insn_before (seq, insn);
18780
18781           if (TARGET_HAVE_FPCXT_CMSE)
18782             {
18783               rtx_insn *last, *pop_insn, *after = insn;
18784
18785               start_sequence ();
18786
18787               /* Lazy load multiple done as part of libcall in Armv8-M.  */
18788               if (lazy_fpclear)
18789                 {
18790                   rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
18791                   emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
18792                   rtx_insn *add_insn =
18793                     emit_insn (gen_addsi3 (stack_pointer_rtx,
18794                                            stack_pointer_rtx, imm));
18795                   arm_add_cfa_adjust_cfa_note (add_insn,
18796                                                lazy_store_stack_frame_size,
18797                                                stack_pointer_rtx,
18798                                                stack_pointer_rtx);
18799                 }
18800               /* Restore VFP callee-saved registers.  */
18801               else
18802                 {
18803                   int nb_callee_saved_vfp_regs =
18804                     (max_fp_regno - D7_VFP_REGNUM) / 2;
18805                   arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
18806                                               nb_callee_saved_vfp_regs,
18807                                               stack_pointer_rtx);
18808                   /* Disable frame debug info in vpop because the SP adjustment
18809                      is made using a CFA adjustment note while CFA used is
18810                      sometimes R7.  This then causes an assert failure in the
18811                      CFI note creation code.  */
18812                   RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
18813                 }
18814
18815               arm_emit_multi_reg_pop (callee_saved_mask);
18816               pop_insn = get_last_insn ();
18817
18818               /* Disable frame debug info in pop because they reset the state
18819                  of popped registers to what it was at the beginning of the
18820                  function, before the prologue.  This leads to incorrect state
18821                  when doing the pop after the nonsecure call for registers that
18822                  are pushed both in prologue and before the nonsecure call.
18823
18824                  It also occasionally triggers an assert failure in CFI note
18825                  creation code when there are two codepaths to the epilogue,
18826                  one of which does not go through the nonsecure call.
18827                  Obviously this mean that debugging between the push and pop is
18828                  not reliable.  */
18829               RTX_FRAME_RELATED_P (pop_insn) = 0;
18830
18831               seq = get_insns ();
18832               last = get_last_insn ();
18833               end_sequence ();
18834
18835               emit_insn_after (seq, after);
18836
18837               /* Skip pop we have just inserted after nonsecure call, we know
18838                  it does not contain a nonsecure call.  */
18839               insn = last;
18840             }
18841         }
18842     }
18843 }
18844
18845 /* Rewrite move insn into subtract of 0 if the condition codes will
18846    be useful in next conditional jump insn.  */
18847
18848 static void
18849 thumb1_reorg (void)
18850 {
18851   basic_block bb;
18852
18853   FOR_EACH_BB_FN (bb, cfun)
18854     {
18855       rtx dest, src;
18856       rtx cmp, op0, op1, set = NULL;
18857       rtx_insn *prev, *insn = BB_END (bb);
18858       bool insn_clobbered = false;
18859
18860       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
18861         insn = PREV_INSN (insn);
18862
18863       /* Find the last cbranchsi4_insn in basic block BB.  */
18864       if (insn == BB_HEAD (bb)
18865           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
18866         continue;
18867
18868       /* Get the register with which we are comparing.  */
18869       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
18870       op0 = XEXP (cmp, 0);
18871       op1 = XEXP (cmp, 1);
18872
18873       /* Check that comparison is against ZERO.  */
18874       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
18875         continue;
18876
18877       /* Find the first flag setting insn before INSN in basic block BB.  */
18878       gcc_assert (insn != BB_HEAD (bb));
18879       for (prev = PREV_INSN (insn);
18880            (!insn_clobbered
18881             && prev != BB_HEAD (bb)
18882             && (NOTE_P (prev)
18883                 || DEBUG_INSN_P (prev)
18884                 || ((set = single_set (prev)) != NULL
18885                     && get_attr_conds (prev) == CONDS_NOCOND)));
18886            prev = PREV_INSN (prev))
18887         {
18888           if (reg_set_p (op0, prev))
18889             insn_clobbered = true;
18890         }
18891
18892       /* Skip if op0 is clobbered by insn other than prev. */
18893       if (insn_clobbered)
18894         continue;
18895
18896       if (!set)
18897         continue;
18898
18899       dest = SET_DEST (set);
18900       src = SET_SRC (set);
18901       if (!low_register_operand (dest, SImode)
18902           || !low_register_operand (src, SImode))
18903         continue;
18904
18905       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
18906          in INSN.  Both src and dest of the move insn are checked.  */
18907       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
18908         {
18909           dest = copy_rtx (dest);
18910           src = copy_rtx (src);
18911           src = gen_rtx_MINUS (SImode, src, const0_rtx);
18912           PATTERN (prev) = gen_rtx_SET (dest, src);
18913           INSN_CODE (prev) = -1;
18914           /* Set test register in INSN to dest.  */
18915           XEXP (cmp, 0) = copy_rtx (dest);
18916           INSN_CODE (insn) = -1;
18917         }
18918     }
18919 }
18920
18921 /* Convert instructions to their cc-clobbering variant if possible, since
18922    that allows us to use smaller encodings.  */
18923
18924 static void
18925 thumb2_reorg (void)
18926 {
18927   basic_block bb;
18928   regset_head live;
18929
18930   INIT_REG_SET (&live);
18931
18932   /* We are freeing block_for_insn in the toplev to keep compatibility
18933      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
18934   compute_bb_for_insn ();
18935   df_analyze ();
18936
18937   enum Convert_Action {SKIP, CONV, SWAP_CONV};
18938
18939   FOR_EACH_BB_FN (bb, cfun)
18940     {
18941       if ((current_tune->disparage_flag_setting_t16_encodings
18942            == tune_params::DISPARAGE_FLAGS_ALL)
18943           && optimize_bb_for_speed_p (bb))
18944         continue;
18945
18946       rtx_insn *insn;
18947       Convert_Action action = SKIP;
18948       Convert_Action action_for_partial_flag_setting
18949         = ((current_tune->disparage_flag_setting_t16_encodings
18950             != tune_params::DISPARAGE_FLAGS_NEITHER)
18951            && optimize_bb_for_speed_p (bb))
18952           ? SKIP : CONV;
18953
18954       COPY_REG_SET (&live, DF_LR_OUT (bb));
18955       df_simulate_initialize_backwards (bb, &live);
18956       FOR_BB_INSNS_REVERSE (bb, insn)
18957         {
18958           if (NONJUMP_INSN_P (insn)
18959               && !REGNO_REG_SET_P (&live, CC_REGNUM)
18960               && GET_CODE (PATTERN (insn)) == SET)
18961             {
18962               action = SKIP;
18963               rtx pat = PATTERN (insn);
18964               rtx dst = XEXP (pat, 0);
18965               rtx src = XEXP (pat, 1);
18966               rtx op0 = NULL_RTX, op1 = NULL_RTX;
18967
18968               if (UNARY_P (src) || BINARY_P (src))
18969                   op0 = XEXP (src, 0);
18970
18971               if (BINARY_P (src))
18972                   op1 = XEXP (src, 1);
18973
18974               if (low_register_operand (dst, SImode))
18975                 {
18976                   switch (GET_CODE (src))
18977                     {
18978                     case PLUS:
18979                       /* Adding two registers and storing the result
18980                          in the first source is already a 16-bit
18981                          operation.  */
18982                       if (rtx_equal_p (dst, op0)
18983                           && register_operand (op1, SImode))
18984                         break;
18985
18986                       if (low_register_operand (op0, SImode))
18987                         {
18988                           /* ADDS <Rd>,<Rn>,<Rm>  */
18989                           if (low_register_operand (op1, SImode))
18990                             action = CONV;
18991                           /* ADDS <Rdn>,#<imm8>  */
18992                           /* SUBS <Rdn>,#<imm8>  */
18993                           else if (rtx_equal_p (dst, op0)
18994                                    && CONST_INT_P (op1)
18995                                    && IN_RANGE (INTVAL (op1), -255, 255))
18996                             action = CONV;
18997                           /* ADDS <Rd>,<Rn>,#<imm3>  */
18998                           /* SUBS <Rd>,<Rn>,#<imm3>  */
18999                           else if (CONST_INT_P (op1)
19000                                    && IN_RANGE (INTVAL (op1), -7, 7))
19001                             action = CONV;
19002                         }
19003                       /* ADCS <Rd>, <Rn>  */
19004                       else if (GET_CODE (XEXP (src, 0)) == PLUS
19005                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19006                               && low_register_operand (XEXP (XEXP (src, 0), 1),
19007                                                        SImode)
19008                               && COMPARISON_P (op1)
19009                               && cc_register (XEXP (op1, 0), VOIDmode)
19010                               && maybe_get_arm_condition_code (op1) == ARM_CS
19011                               && XEXP (op1, 1) == const0_rtx)
19012                         action = CONV;
19013                       break;
19014
19015                     case MINUS:
19016                       /* RSBS <Rd>,<Rn>,#0
19017                          Not handled here: see NEG below.  */
19018                       /* SUBS <Rd>,<Rn>,#<imm3>
19019                          SUBS <Rdn>,#<imm8>
19020                          Not handled here: see PLUS above.  */
19021                       /* SUBS <Rd>,<Rn>,<Rm>  */
19022                       if (low_register_operand (op0, SImode)
19023                           && low_register_operand (op1, SImode))
19024                             action = CONV;
19025                       break;
19026
19027                     case MULT:
19028                       /* MULS <Rdm>,<Rn>,<Rdm>
19029                          As an exception to the rule, this is only used
19030                          when optimizing for size since MULS is slow on all
19031                          known implementations.  We do not even want to use
19032                          MULS in cold code, if optimizing for speed, so we
19033                          test the global flag here.  */
19034                       if (!optimize_size)
19035                         break;
19036                       /* Fall through.  */
19037                     case AND:
19038                     case IOR:
19039                     case XOR:
19040                       /* ANDS <Rdn>,<Rm>  */
19041                       if (rtx_equal_p (dst, op0)
19042                           && low_register_operand (op1, SImode))
19043                         action = action_for_partial_flag_setting;
19044                       else if (rtx_equal_p (dst, op1)
19045                                && low_register_operand (op0, SImode))
19046                         action = action_for_partial_flag_setting == SKIP
19047                                  ? SKIP : SWAP_CONV;
19048                       break;
19049
19050                     case ASHIFTRT:
19051                     case ASHIFT:
19052                     case LSHIFTRT:
19053                       /* ASRS <Rdn>,<Rm> */
19054                       /* LSRS <Rdn>,<Rm> */
19055                       /* LSLS <Rdn>,<Rm> */
19056                       if (rtx_equal_p (dst, op0)
19057                           && low_register_operand (op1, SImode))
19058                         action = action_for_partial_flag_setting;
19059                       /* ASRS <Rd>,<Rm>,#<imm5> */
19060                       /* LSRS <Rd>,<Rm>,#<imm5> */
19061                       /* LSLS <Rd>,<Rm>,#<imm5> */
19062                       else if (low_register_operand (op0, SImode)
19063                                && CONST_INT_P (op1)
19064                                && IN_RANGE (INTVAL (op1), 0, 31))
19065                         action = action_for_partial_flag_setting;
19066                       break;
19067
19068                     case ROTATERT:
19069                       /* RORS <Rdn>,<Rm>  */
19070                       if (rtx_equal_p (dst, op0)
19071                           && low_register_operand (op1, SImode))
19072                         action = action_for_partial_flag_setting;
19073                       break;
19074
19075                     case NOT:
19076                       /* MVNS <Rd>,<Rm>  */
19077                       if (low_register_operand (op0, SImode))
19078                         action = action_for_partial_flag_setting;
19079                       break;
19080
19081                     case NEG:
19082                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
19083                       if (low_register_operand (op0, SImode))
19084                         action = CONV;
19085                       break;
19086
19087                     case CONST_INT:
19088                       /* MOVS <Rd>,#<imm8>  */
19089                       if (CONST_INT_P (src)
19090                           && IN_RANGE (INTVAL (src), 0, 255))
19091                         action = action_for_partial_flag_setting;
19092                       break;
19093
19094                     case REG:
19095                       /* MOVS and MOV<c> with registers have different
19096                          encodings, so are not relevant here.  */
19097                       break;
19098
19099                     default:
19100                       break;
19101                     }
19102                 }
19103
19104               if (action != SKIP)
19105                 {
19106                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19107                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19108                   rtvec vec;
19109
19110                   if (action == SWAP_CONV)
19111                     {
19112                       src = copy_rtx (src);
19113                       XEXP (src, 0) = op1;
19114                       XEXP (src, 1) = op0;
19115                       pat = gen_rtx_SET (dst, src);
19116                       vec = gen_rtvec (2, pat, clobber);
19117                     }
19118                   else /* action == CONV */
19119                     vec = gen_rtvec (2, pat, clobber);
19120
19121                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19122                   INSN_CODE (insn) = -1;
19123                 }
19124             }
19125
19126           if (NONDEBUG_INSN_P (insn))
19127             df_simulate_one_insn_backwards (bb, insn, &live);
19128         }
19129     }
19130
19131   CLEAR_REG_SET (&live);
19132 }
19133
19134 /* Gcc puts the pool in the wrong place for ARM, since we can only
19135    load addresses a limited distance around the pc.  We do some
19136    special munging to move the constant pool values to the correct
19137    point in the code.  */
19138 static void
19139 arm_reorg (void)
19140 {
19141   rtx_insn *insn;
19142   HOST_WIDE_INT address = 0;
19143   Mfix * fix;
19144
19145   if (use_cmse)
19146     cmse_nonsecure_call_inline_register_clear ();
19147
19148   /* We cannot run the Thumb passes for thunks because there is no CFG.  */
19149   if (cfun->is_thunk)
19150     ;
19151   else if (TARGET_THUMB1)
19152     thumb1_reorg ();
19153   else if (TARGET_THUMB2)
19154     thumb2_reorg ();
19155
19156   /* Ensure all insns that must be split have been split at this point.
19157      Otherwise, the pool placement code below may compute incorrect
19158      insn lengths.  Note that when optimizing, all insns have already
19159      been split at this point.  */
19160   if (!optimize)
19161     split_all_insns_noflow ();
19162
19163   /* Make sure we do not attempt to create a literal pool even though it should
19164      no longer be necessary to create any.  */
19165   if (arm_disable_literal_pool)
19166     return ;
19167
19168   minipool_fix_head = minipool_fix_tail = NULL;
19169
19170   /* The first insn must always be a note, or the code below won't
19171      scan it properly.  */
19172   insn = get_insns ();
19173   gcc_assert (NOTE_P (insn));
19174   minipool_pad = 0;
19175
19176   /* Scan all the insns and record the operands that will need fixing.  */
19177   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19178     {
19179       if (BARRIER_P (insn))
19180         push_minipool_barrier (insn, address);
19181       else if (INSN_P (insn))
19182         {
19183           rtx_jump_table_data *table;
19184
19185           note_invalid_constants (insn, address, true);
19186           address += get_attr_length (insn);
19187
19188           /* If the insn is a vector jump, add the size of the table
19189              and skip the table.  */
19190           if (tablejump_p (insn, NULL, &table))
19191             {
19192               address += get_jump_table_size (table);
19193               insn = table;
19194             }
19195         }
19196       else if (LABEL_P (insn))
19197         /* Add the worst-case padding due to alignment.  We don't add
19198            the _current_ padding because the minipool insertions
19199            themselves might change it.  */
19200         address += get_label_padding (insn);
19201     }
19202
19203   fix = minipool_fix_head;
19204
19205   /* Now scan the fixups and perform the required changes.  */
19206   while (fix)
19207     {
19208       Mfix * ftmp;
19209       Mfix * fdel;
19210       Mfix *  last_added_fix;
19211       Mfix * last_barrier = NULL;
19212       Mfix * this_fix;
19213
19214       /* Skip any further barriers before the next fix.  */
19215       while (fix && BARRIER_P (fix->insn))
19216         fix = fix->next;
19217
19218       /* No more fixes.  */
19219       if (fix == NULL)
19220         break;
19221
19222       last_added_fix = NULL;
19223
19224       for (ftmp = fix; ftmp; ftmp = ftmp->next)
19225         {
19226           if (BARRIER_P (ftmp->insn))
19227             {
19228               if (ftmp->address >= minipool_vector_head->max_address)
19229                 break;
19230
19231               last_barrier = ftmp;
19232             }
19233           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19234             break;
19235
19236           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
19237         }
19238
19239       /* If we found a barrier, drop back to that; any fixes that we
19240          could have reached but come after the barrier will now go in
19241          the next mini-pool.  */
19242       if (last_barrier != NULL)
19243         {
19244           /* Reduce the refcount for those fixes that won't go into this
19245              pool after all.  */
19246           for (fdel = last_barrier->next;
19247                fdel && fdel != ftmp;
19248                fdel = fdel->next)
19249             {
19250               fdel->minipool->refcount--;
19251               fdel->minipool = NULL;
19252             }
19253
19254           ftmp = last_barrier;
19255         }
19256       else
19257         {
19258           /* ftmp is first fix that we can't fit into this pool and
19259              there no natural barriers that we could use.  Insert a
19260              new barrier in the code somewhere between the previous
19261              fix and this one, and arrange to jump around it.  */
19262           HOST_WIDE_INT max_address;
19263
19264           /* The last item on the list of fixes must be a barrier, so
19265              we can never run off the end of the list of fixes without
19266              last_barrier being set.  */
19267           gcc_assert (ftmp);
19268
19269           max_address = minipool_vector_head->max_address;
19270           /* Check that there isn't another fix that is in range that
19271              we couldn't fit into this pool because the pool was
19272              already too large: we need to put the pool before such an
19273              instruction.  The pool itself may come just after the
19274              fix because create_fix_barrier also allows space for a
19275              jump instruction.  */
19276           if (ftmp->address < max_address)
19277             max_address = ftmp->address + 1;
19278
19279           last_barrier = create_fix_barrier (last_added_fix, max_address);
19280         }
19281
19282       assign_minipool_offsets (last_barrier);
19283
19284       while (ftmp)
19285         {
19286           if (!BARRIER_P (ftmp->insn)
19287               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19288                   == NULL))
19289             break;
19290
19291           ftmp = ftmp->next;
19292         }
19293
19294       /* Scan over the fixes we have identified for this pool, fixing them
19295          up and adding the constants to the pool itself.  */
19296       for (this_fix = fix; this_fix && ftmp != this_fix;
19297            this_fix = this_fix->next)
19298         if (!BARRIER_P (this_fix->insn))
19299           {
19300             rtx addr
19301               = plus_constant (Pmode,
19302                                gen_rtx_LABEL_REF (VOIDmode,
19303                                                   minipool_vector_label),
19304                                this_fix->minipool->offset);
19305             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19306           }
19307
19308       dump_minipool (last_barrier->insn);
19309       fix = ftmp;
19310     }
19311
19312   /* From now on we must synthesize any constants that we can't handle
19313      directly.  This can happen if the RTL gets split during final
19314      instruction generation.  */
19315   cfun->machine->after_arm_reorg = 1;
19316
19317   /* Free the minipool memory.  */
19318   obstack_free (&minipool_obstack, minipool_startobj);
19319 }
19320 \f
19321 /* Routines to output assembly language.  */
19322
19323 /* Return string representation of passed in real value.  */
19324 static const char *
19325 fp_const_from_val (REAL_VALUE_TYPE *r)
19326 {
19327   if (!fp_consts_inited)
19328     init_fp_table ();
19329
19330   gcc_assert (real_equal (r, &value_fp0));
19331   return "0";
19332 }
19333
19334 /* OPERANDS[0] is the entire list of insns that constitute pop,
19335    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19336    is in the list, UPDATE is true iff the list contains explicit
19337    update of base register.  */
19338 void
19339 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19340                          bool update)
19341 {
19342   int i;
19343   char pattern[100];
19344   int offset;
19345   const char *conditional;
19346   int num_saves = XVECLEN (operands[0], 0);
19347   unsigned int regno;
19348   unsigned int regno_base = REGNO (operands[1]);
19349   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19350
19351   offset = 0;
19352   offset += update ? 1 : 0;
19353   offset += return_pc ? 1 : 0;
19354
19355   /* Is the base register in the list?  */
19356   for (i = offset; i < num_saves; i++)
19357     {
19358       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19359       /* If SP is in the list, then the base register must be SP.  */
19360       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19361       /* If base register is in the list, there must be no explicit update.  */
19362       if (regno == regno_base)
19363         gcc_assert (!update);
19364     }
19365
19366   conditional = reverse ? "%?%D0" : "%?%d0";
19367   /* Can't use POP if returning from an interrupt.  */
19368   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19369     sprintf (pattern, "pop%s\t{", conditional);
19370   else
19371     {
19372       /* Output ldmfd when the base register is SP, otherwise output ldmia.
19373          It's just a convention, their semantics are identical.  */
19374       if (regno_base == SP_REGNUM)
19375         sprintf (pattern, "ldmfd%s\t", conditional);
19376       else if (update)
19377         sprintf (pattern, "ldmia%s\t", conditional);
19378       else
19379         sprintf (pattern, "ldm%s\t", conditional);
19380
19381       strcat (pattern, reg_names[regno_base]);
19382       if (update)
19383         strcat (pattern, "!, {");
19384       else
19385         strcat (pattern, ", {");
19386     }
19387
19388   /* Output the first destination register.  */
19389   strcat (pattern,
19390           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19391
19392   /* Output the rest of the destination registers.  */
19393   for (i = offset + 1; i < num_saves; i++)
19394     {
19395       strcat (pattern, ", ");
19396       strcat (pattern,
19397               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19398     }
19399
19400   strcat (pattern, "}");
19401
19402   if (interrupt_p && return_pc)
19403     strcat (pattern, "^");
19404
19405   output_asm_insn (pattern, &cond);
19406 }
19407
19408
19409 /* Output the assembly for a store multiple.  */
19410
19411 const char *
19412 vfp_output_vstmd (rtx * operands)
19413 {
19414   char pattern[100];
19415   int p;
19416   int base;
19417   int i;
19418   rtx addr_reg = REG_P (XEXP (operands[0], 0))
19419                    ? XEXP (operands[0], 0)
19420                    : XEXP (XEXP (operands[0], 0), 0);
19421   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
19422
19423   if (push_p)
19424     strcpy (pattern, "vpush%?.64\t{%P1");
19425   else
19426     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19427
19428   p = strlen (pattern);
19429
19430   gcc_assert (REG_P (operands[1]));
19431
19432   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19433   for (i = 1; i < XVECLEN (operands[2], 0); i++)
19434     {
19435       p += sprintf (&pattern[p], ", d%d", base + i);
19436     }
19437   strcpy (&pattern[p], "}");
19438
19439   output_asm_insn (pattern, operands);
19440   return "";
19441 }
19442
19443
19444 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
19445    number of bytes pushed.  */
19446
19447 static int
19448 vfp_emit_fstmd (int base_reg, int count)
19449 {
19450   rtx par;
19451   rtx dwarf;
19452   rtx tmp, reg;
19453   int i;
19454
19455   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
19456      register pairs are stored by a store multiple insn.  We avoid this
19457      by pushing an extra pair.  */
19458   if (count == 2 && !arm_arch6)
19459     {
19460       if (base_reg == LAST_VFP_REGNUM - 3)
19461         base_reg -= 2;
19462       count++;
19463     }
19464
19465   /* FSTMD may not store more than 16 doubleword registers at once.  Split
19466      larger stores into multiple parts (up to a maximum of two, in
19467      practice).  */
19468   if (count > 16)
19469     {
19470       int saved;
19471       /* NOTE: base_reg is an internal register number, so each D register
19472          counts as 2.  */
19473       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19474       saved += vfp_emit_fstmd (base_reg, 16);
19475       return saved;
19476     }
19477
19478   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19479   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19480
19481   reg = gen_rtx_REG (DFmode, base_reg);
19482   base_reg += 2;
19483
19484   XVECEXP (par, 0, 0)
19485     = gen_rtx_SET (gen_frame_mem
19486                    (BLKmode,
19487                     gen_rtx_PRE_MODIFY (Pmode,
19488                                         stack_pointer_rtx,
19489                                         plus_constant
19490                                         (Pmode, stack_pointer_rtx,
19491                                          - (count * 8)))
19492                     ),
19493                    gen_rtx_UNSPEC (BLKmode,
19494                                    gen_rtvec (1, reg),
19495                                    UNSPEC_PUSH_MULT));
19496
19497   tmp = gen_rtx_SET (stack_pointer_rtx,
19498                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19499   RTX_FRAME_RELATED_P (tmp) = 1;
19500   XVECEXP (dwarf, 0, 0) = tmp;
19501
19502   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19503   RTX_FRAME_RELATED_P (tmp) = 1;
19504   XVECEXP (dwarf, 0, 1) = tmp;
19505
19506   for (i = 1; i < count; i++)
19507     {
19508       reg = gen_rtx_REG (DFmode, base_reg);
19509       base_reg += 2;
19510       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19511
19512       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19513                                         plus_constant (Pmode,
19514                                                        stack_pointer_rtx,
19515                                                        i * 8)),
19516                          reg);
19517       RTX_FRAME_RELATED_P (tmp) = 1;
19518       XVECEXP (dwarf, 0, i + 1) = tmp;
19519     }
19520
19521   par = emit_insn (par);
19522   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19523   RTX_FRAME_RELATED_P (par) = 1;
19524
19525   return count * 8;
19526 }
19527
19528 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19529    has the cmse_nonsecure_call attribute and returns false otherwise.  */
19530
19531 bool
19532 detect_cmse_nonsecure_call (tree addr)
19533 {
19534   if (!addr)
19535     return FALSE;
19536
19537   tree fntype = TREE_TYPE (addr);
19538   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19539                                     TYPE_ATTRIBUTES (fntype)))
19540     return TRUE;
19541   return FALSE;
19542 }
19543
19544
19545 /* Emit a call instruction with pattern PAT.  ADDR is the address of
19546    the call target.  */
19547
19548 void
19549 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19550 {
19551   rtx insn;
19552
19553   insn = emit_call_insn (pat);
19554
19555   /* The PIC register is live on entry to VxWorks PIC PLT entries.
19556      If the call might use such an entry, add a use of the PIC register
19557      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
19558   if (TARGET_VXWORKS_RTP
19559       && flag_pic
19560       && !sibcall
19561       && GET_CODE (addr) == SYMBOL_REF
19562       && (SYMBOL_REF_DECL (addr)
19563           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19564           : !SYMBOL_REF_LOCAL_P (addr)))
19565     {
19566       require_pic_register (NULL_RTX, false /*compute_now*/);
19567       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19568     }
19569
19570   if (TARGET_FDPIC)
19571     {
19572       rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19573       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19574     }
19575
19576   if (TARGET_AAPCS_BASED)
19577     {
19578       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19579          linker.  We need to add an IP clobber to allow setting
19580          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
19581          is not needed since it's a fixed register.  */
19582       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19583       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19584     }
19585 }
19586
19587 /* Output a 'call' insn.  */
19588 const char *
19589 output_call (rtx *operands)
19590 {
19591   gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly.  */
19592
19593   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
19594   if (REGNO (operands[0]) == LR_REGNUM)
19595     {
19596       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19597       output_asm_insn ("mov%?\t%0, %|lr", operands);
19598     }
19599
19600   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19601
19602   if (TARGET_INTERWORK || arm_arch4t)
19603     output_asm_insn ("bx%?\t%0", operands);
19604   else
19605     output_asm_insn ("mov%?\t%|pc, %0", operands);
19606
19607   return "";
19608 }
19609
19610 /* Output a move from arm registers to arm registers of a long double
19611    OPERANDS[0] is the destination.
19612    OPERANDS[1] is the source.  */
19613 const char *
19614 output_mov_long_double_arm_from_arm (rtx *operands)
19615 {
19616   /* We have to be careful here because the two might overlap.  */
19617   int dest_start = REGNO (operands[0]);
19618   int src_start = REGNO (operands[1]);
19619   rtx ops[2];
19620   int i;
19621
19622   if (dest_start < src_start)
19623     {
19624       for (i = 0; i < 3; i++)
19625         {
19626           ops[0] = gen_rtx_REG (SImode, dest_start + i);
19627           ops[1] = gen_rtx_REG (SImode, src_start + i);
19628           output_asm_insn ("mov%?\t%0, %1", ops);
19629         }
19630     }
19631   else
19632     {
19633       for (i = 2; i >= 0; i--)
19634         {
19635           ops[0] = gen_rtx_REG (SImode, dest_start + i);
19636           ops[1] = gen_rtx_REG (SImode, src_start + i);
19637           output_asm_insn ("mov%?\t%0, %1", ops);
19638         }
19639     }
19640
19641   return "";
19642 }
19643
19644 void
19645 arm_emit_movpair (rtx dest, rtx src)
19646  {
19647   /* If the src is an immediate, simplify it.  */
19648   if (CONST_INT_P (src))
19649     {
19650       HOST_WIDE_INT val = INTVAL (src);
19651       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
19652       if ((val >> 16) & 0x0000ffff)
19653         {
19654           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
19655                                                GEN_INT (16)),
19656                          GEN_INT ((val >> 16) & 0x0000ffff));
19657           rtx_insn *insn = get_last_insn ();
19658           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19659         }
19660       return;
19661     }
19662    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
19663    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
19664    rtx_insn *insn = get_last_insn ();
19665    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19666  }
19667
19668 /* Output a move between double words.  It must be REG<-MEM
19669    or MEM<-REG.  */
19670 const char *
19671 output_move_double (rtx *operands, bool emit, int *count)
19672 {
19673   enum rtx_code code0 = GET_CODE (operands[0]);
19674   enum rtx_code code1 = GET_CODE (operands[1]);
19675   rtx otherops[3];
19676   if (count)
19677     *count = 1;
19678
19679   /* The only case when this might happen is when
19680      you are looking at the length of a DImode instruction
19681      that has an invalid constant in it.  */
19682   if (code0 == REG && code1 != MEM)
19683     {
19684       gcc_assert (!emit);
19685       *count = 2;
19686       return "";
19687     }
19688
19689   if (code0 == REG)
19690     {
19691       unsigned int reg0 = REGNO (operands[0]);
19692       const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
19693
19694       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
19695
19696       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
19697
19698       switch (GET_CODE (XEXP (operands[1], 0)))
19699         {
19700         case REG:
19701
19702           if (emit)
19703             {
19704               if (can_ldrd
19705                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
19706                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
19707               else
19708                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
19709             }
19710           break;
19711
19712         case PRE_INC:
19713           gcc_assert (can_ldrd);
19714           if (emit)
19715             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
19716           break;
19717
19718         case PRE_DEC:
19719           if (emit)
19720             {
19721               if (can_ldrd)
19722                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
19723               else
19724                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
19725             }
19726           break;
19727
19728         case POST_INC:
19729           if (emit)
19730             {
19731               if (can_ldrd)
19732                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
19733               else
19734                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
19735             }
19736           break;
19737
19738         case POST_DEC:
19739           gcc_assert (can_ldrd);
19740           if (emit)
19741             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
19742           break;
19743
19744         case PRE_MODIFY:
19745         case POST_MODIFY:
19746           /* Autoicrement addressing modes should never have overlapping
19747              base and destination registers, and overlapping index registers
19748              are already prohibited, so this doesn't need to worry about
19749              fix_cm3_ldrd.  */
19750           otherops[0] = operands[0];
19751           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
19752           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
19753
19754           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
19755             {
19756               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
19757                 {
19758                   /* Registers overlap so split out the increment.  */
19759                   if (emit)
19760                     {
19761                       gcc_assert (can_ldrd);
19762                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
19763                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
19764                     }
19765                   if (count)
19766                     *count = 2;
19767                 }
19768               else
19769                 {
19770                   /* Use a single insn if we can.
19771                      FIXME: IWMMXT allows offsets larger than ldrd can
19772                      handle, fix these up with a pair of ldr.  */
19773                   if (can_ldrd
19774                       && (TARGET_THUMB2
19775                       || !CONST_INT_P (otherops[2])
19776                       || (INTVAL (otherops[2]) > -256
19777                           && INTVAL (otherops[2]) < 256)))
19778                     {
19779                       if (emit)
19780                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
19781                     }
19782                   else
19783                     {
19784                       if (emit)
19785                         {
19786                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
19787                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
19788                         }
19789                       if (count)
19790                         *count = 2;
19791
19792                     }
19793                 }
19794             }
19795           else
19796             {
19797               /* Use a single insn if we can.
19798                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
19799                  fix these up with a pair of ldr.  */
19800               if (can_ldrd
19801                   && (TARGET_THUMB2
19802                   || !CONST_INT_P (otherops[2])
19803                   || (INTVAL (otherops[2]) > -256
19804                       && INTVAL (otherops[2]) < 256)))
19805                 {
19806                   if (emit)
19807                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
19808                 }
19809               else
19810                 {
19811                   if (emit)
19812                     {
19813                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
19814                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
19815                     }
19816                   if (count)
19817                     *count = 2;
19818                 }
19819             }
19820           break;
19821
19822         case LABEL_REF:
19823         case CONST:
19824           /* We might be able to use ldrd %0, %1 here.  However the range is
19825              different to ldr/adr, and it is broken on some ARMv7-M
19826              implementations.  */
19827           /* Use the second register of the pair to avoid problematic
19828              overlap.  */
19829           otherops[1] = operands[1];
19830           if (emit)
19831             output_asm_insn ("adr%?\t%0, %1", otherops);
19832           operands[1] = otherops[0];
19833           if (emit)
19834             {
19835               if (can_ldrd)
19836                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
19837               else
19838                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
19839             }
19840
19841           if (count)
19842             *count = 2;
19843           break;
19844
19845           /* ??? This needs checking for thumb2.  */
19846         default:
19847           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
19848                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
19849             {
19850               otherops[0] = operands[0];
19851               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
19852               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
19853
19854               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
19855                 {
19856                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
19857                     {
19858                       switch ((int) INTVAL (otherops[2]))
19859                         {
19860                         case -8:
19861                           if (emit)
19862                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
19863                           return "";
19864                         case -4:
19865                           if (TARGET_THUMB2)
19866                             break;
19867                           if (emit)
19868                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
19869                           return "";
19870                         case 4:
19871                           if (TARGET_THUMB2)
19872                             break;
19873                           if (emit)
19874                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
19875                           return "";
19876                         }
19877                     }
19878                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
19879                   operands[1] = otherops[0];
19880                   if (can_ldrd
19881                       && (REG_P (otherops[2])
19882                           || TARGET_THUMB2
19883                           || (CONST_INT_P (otherops[2])
19884                               && INTVAL (otherops[2]) > -256
19885                               && INTVAL (otherops[2]) < 256)))
19886                     {
19887                       if (reg_overlap_mentioned_p (operands[0],
19888                                                    otherops[2]))
19889                         {
19890                           /* Swap base and index registers over to
19891                              avoid a conflict.  */
19892                           std::swap (otherops[1], otherops[2]);
19893                         }
19894                       /* If both registers conflict, it will usually
19895                          have been fixed by a splitter.  */
19896                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
19897                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
19898                         {
19899                           if (emit)
19900                             {
19901                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
19902                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
19903                             }
19904                           if (count)
19905                             *count = 2;
19906                         }
19907                       else
19908                         {
19909                           otherops[0] = operands[0];
19910                           if (emit)
19911                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
19912                         }
19913                       return "";
19914                     }
19915
19916                   if (CONST_INT_P (otherops[2]))
19917                     {
19918                       if (emit)
19919                         {
19920                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
19921                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
19922                           else
19923                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
19924                         }
19925                     }
19926                   else
19927                     {
19928                       if (emit)
19929                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
19930                     }
19931                 }
19932               else
19933                 {
19934                   if (emit)
19935                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
19936                 }
19937
19938               if (count)
19939                 *count = 2;
19940
19941               if (can_ldrd)
19942                 return "ldrd%?\t%0, [%1]";
19943
19944               return "ldmia%?\t%1, %M0";
19945             }
19946           else
19947             {
19948               otherops[1] = adjust_address (operands[1], SImode, 4);
19949               /* Take care of overlapping base/data reg.  */
19950               if (reg_mentioned_p (operands[0], operands[1]))
19951                 {
19952                   if (emit)
19953                     {
19954                       output_asm_insn ("ldr%?\t%0, %1", otherops);
19955                       output_asm_insn ("ldr%?\t%0, %1", operands);
19956                     }
19957                   if (count)
19958                     *count = 2;
19959
19960                 }
19961               else
19962                 {
19963                   if (emit)
19964                     {
19965                       output_asm_insn ("ldr%?\t%0, %1", operands);
19966                       output_asm_insn ("ldr%?\t%0, %1", otherops);
19967                     }
19968                   if (count)
19969                     *count = 2;
19970                 }
19971             }
19972         }
19973     }
19974   else
19975     {
19976       /* Constraints should ensure this.  */
19977       gcc_assert (code0 == MEM && code1 == REG);
19978       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
19979                   || (TARGET_ARM && TARGET_LDRD));
19980
19981       /* For TARGET_ARM the first source register of an STRD
19982          must be even.  This is usually the case for double-word
19983          values but user assembly constraints can force an odd
19984          starting register.  */
19985       bool allow_strd = TARGET_LDRD
19986                          && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
19987       switch (GET_CODE (XEXP (operands[0], 0)))
19988         {
19989         case REG:
19990           if (emit)
19991             {
19992               if (allow_strd)
19993                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
19994               else
19995                 output_asm_insn ("stm%?\t%m0, %M1", operands);
19996             }
19997           break;
19998
19999         case PRE_INC:
20000           gcc_assert (allow_strd);
20001           if (emit)
20002             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20003           break;
20004
20005         case PRE_DEC:
20006           if (emit)
20007             {
20008               if (allow_strd)
20009                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20010               else
20011                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20012             }
20013           break;
20014
20015         case POST_INC:
20016           if (emit)
20017             {
20018               if (allow_strd)
20019                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20020               else
20021                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20022             }
20023           break;
20024
20025         case POST_DEC:
20026           gcc_assert (allow_strd);
20027           if (emit)
20028             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20029           break;
20030
20031         case PRE_MODIFY:
20032         case POST_MODIFY:
20033           otherops[0] = operands[1];
20034           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20035           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20036
20037           /* IWMMXT allows offsets larger than strd can handle,
20038              fix these up with a pair of str.  */
20039           if (!TARGET_THUMB2
20040               && CONST_INT_P (otherops[2])
20041               && (INTVAL(otherops[2]) <= -256
20042                   || INTVAL(otherops[2]) >= 256))
20043             {
20044               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20045                 {
20046                   if (emit)
20047                     {
20048                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20049                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20050                     }
20051                   if (count)
20052                     *count = 2;
20053                 }
20054               else
20055                 {
20056                   if (emit)
20057                     {
20058                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20059                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20060                     }
20061                   if (count)
20062                     *count = 2;
20063                 }
20064             }
20065           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20066             {
20067               if (emit)
20068                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20069             }
20070           else
20071             {
20072               if (emit)
20073                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20074             }
20075           break;
20076
20077         case PLUS:
20078           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20079           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20080             {
20081               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20082                 {
20083                 case -8:
20084                   if (emit)
20085                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20086                   return "";
20087
20088                 case -4:
20089                   if (TARGET_THUMB2)
20090                     break;
20091                   if (emit)
20092                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
20093                   return "";
20094
20095                 case 4:
20096                   if (TARGET_THUMB2)
20097                     break;
20098                   if (emit)
20099                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
20100                   return "";
20101                 }
20102             }
20103           if (allow_strd
20104               && (REG_P (otherops[2])
20105                   || TARGET_THUMB2
20106                   || (CONST_INT_P (otherops[2])
20107                       && INTVAL (otherops[2]) > -256
20108                       && INTVAL (otherops[2]) < 256)))
20109             {
20110               otherops[0] = operands[1];
20111               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20112               if (emit)
20113                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20114               return "";
20115             }
20116           /* Fall through */
20117
20118         default:
20119           otherops[0] = adjust_address (operands[0], SImode, 4);
20120           otherops[1] = operands[1];
20121           if (emit)
20122             {
20123               output_asm_insn ("str%?\t%1, %0", operands);
20124               output_asm_insn ("str%?\t%H1, %0", otherops);
20125             }
20126           if (count)
20127             *count = 2;
20128         }
20129     }
20130
20131   return "";
20132 }
20133
20134 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
20135    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
20136
20137 const char *
20138 output_move_quad (rtx *operands)
20139 {
20140   if (REG_P (operands[0]))
20141     {
20142       /* Load, or reg->reg move.  */
20143
20144       if (MEM_P (operands[1]))
20145         {
20146           switch (GET_CODE (XEXP (operands[1], 0)))
20147             {
20148             case REG:
20149               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20150               break;
20151
20152             case LABEL_REF:
20153             case CONST:
20154               output_asm_insn ("adr%?\t%0, %1", operands);
20155               output_asm_insn ("ldmia%?\t%0, %M0", operands);
20156               break;
20157
20158             default:
20159               gcc_unreachable ();
20160             }
20161         }
20162       else
20163         {
20164           rtx ops[2];
20165           int dest, src, i;
20166
20167           gcc_assert (REG_P (operands[1]));
20168
20169           dest = REGNO (operands[0]);
20170           src = REGNO (operands[1]);
20171
20172           /* This seems pretty dumb, but hopefully GCC won't try to do it
20173              very often.  */
20174           if (dest < src)
20175             for (i = 0; i < 4; i++)
20176               {
20177                 ops[0] = gen_rtx_REG (SImode, dest + i);
20178                 ops[1] = gen_rtx_REG (SImode, src + i);
20179                 output_asm_insn ("mov%?\t%0, %1", ops);
20180               }
20181           else
20182             for (i = 3; i >= 0; i--)
20183               {
20184                 ops[0] = gen_rtx_REG (SImode, dest + i);
20185                 ops[1] = gen_rtx_REG (SImode, src + i);
20186                 output_asm_insn ("mov%?\t%0, %1", ops);
20187               }
20188         }
20189     }
20190   else
20191     {
20192       gcc_assert (MEM_P (operands[0]));
20193       gcc_assert (REG_P (operands[1]));
20194       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20195
20196       switch (GET_CODE (XEXP (operands[0], 0)))
20197         {
20198         case REG:
20199           output_asm_insn ("stm%?\t%m0, %M1", operands);
20200           break;
20201
20202         default:
20203           gcc_unreachable ();
20204         }
20205     }
20206
20207   return "";
20208 }
20209
20210 /* Output a VFP load or store instruction.  */
20211
20212 const char *
20213 output_move_vfp (rtx *operands)
20214 {
20215   rtx reg, mem, addr, ops[2];
20216   int load = REG_P (operands[0]);
20217   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20218   int sp = (!TARGET_VFP_FP16INST
20219             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20220   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20221   const char *templ;
20222   char buff[50];
20223   machine_mode mode;
20224
20225   reg = operands[!load];
20226   mem = operands[load];
20227
20228   mode = GET_MODE (reg);
20229
20230   gcc_assert (REG_P (reg));
20231   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20232   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20233               || mode == SFmode
20234               || mode == DFmode
20235               || mode == HImode
20236               || mode == SImode
20237               || mode == DImode
20238               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20239   gcc_assert (MEM_P (mem));
20240
20241   addr = XEXP (mem, 0);
20242
20243   switch (GET_CODE (addr))
20244     {
20245     case PRE_DEC:
20246       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20247       ops[0] = XEXP (addr, 0);
20248       ops[1] = reg;
20249       break;
20250
20251     case POST_INC:
20252       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20253       ops[0] = XEXP (addr, 0);
20254       ops[1] = reg;
20255       break;
20256
20257     default:
20258       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20259       ops[0] = reg;
20260       ops[1] = mem;
20261       break;
20262     }
20263
20264   sprintf (buff, templ,
20265            load ? "ld" : "st",
20266            dp ? "64" : sp ? "32" : "16",
20267            dp ? "P" : "",
20268            integer_p ? "\t%@ int" : "");
20269   output_asm_insn (buff, ops);
20270
20271   return "";
20272 }
20273
20274 /* Output a Neon double-word or quad-word load or store, or a load
20275    or store for larger structure modes.
20276
20277    WARNING: The ordering of elements is weird in big-endian mode,
20278    because the EABI requires that vectors stored in memory appear
20279    as though they were stored by a VSTM, as required by the EABI.
20280    GCC RTL defines element ordering based on in-memory order.
20281    This can be different from the architectural ordering of elements
20282    within a NEON register. The intrinsics defined in arm_neon.h use the
20283    NEON register element ordering, not the GCC RTL element ordering.
20284
20285    For example, the in-memory ordering of a big-endian a quadword
20286    vector with 16-bit elements when stored from register pair {d0,d1}
20287    will be (lowest address first, d0[N] is NEON register element N):
20288
20289      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20290
20291    When necessary, quadword registers (dN, dN+1) are moved to ARM
20292    registers from rN in the order:
20293
20294      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20295
20296    So that STM/LDM can be used on vectors in ARM registers, and the
20297    same memory layout will result as if VSTM/VLDM were used.
20298
20299    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20300    possible, which allows use of appropriate alignment tags.
20301    Note that the choice of "64" is independent of the actual vector
20302    element size; this size simply ensures that the behavior is
20303    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20304
20305    Due to limitations of those instructions, use of VST1.64/VLD1.64
20306    is not possible if:
20307     - the address contains PRE_DEC, or
20308     - the mode refers to more than 4 double-word registers
20309
20310    In those cases, it would be possible to replace VSTM/VLDM by a
20311    sequence of instructions; this is not currently implemented since
20312    this is not certain to actually improve performance.  */
20313
20314 const char *
20315 output_move_neon (rtx *operands)
20316 {
20317   rtx reg, mem, addr, ops[2];
20318   int regno, nregs, load = REG_P (operands[0]);
20319   const char *templ;
20320   char buff[50];
20321   machine_mode mode;
20322
20323   reg = operands[!load];
20324   mem = operands[load];
20325
20326   mode = GET_MODE (reg);
20327
20328   gcc_assert (REG_P (reg));
20329   regno = REGNO (reg);
20330   nregs = REG_NREGS (reg) / 2;
20331   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20332               || NEON_REGNO_OK_FOR_QUAD (regno));
20333   gcc_assert (VALID_NEON_DREG_MODE (mode)
20334               || VALID_NEON_QREG_MODE (mode)
20335               || VALID_NEON_STRUCT_MODE (mode));
20336   gcc_assert (MEM_P (mem));
20337
20338   addr = XEXP (mem, 0);
20339
20340   /* Strip off const from addresses like (const (plus (...))).  */
20341   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20342     addr = XEXP (addr, 0);
20343
20344   switch (GET_CODE (addr))
20345     {
20346     case POST_INC:
20347       /* We have to use vldm / vstm for too-large modes.  */
20348       if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20349         {
20350           templ = "v%smia%%?\t%%0!, %%h1";
20351           ops[0] = XEXP (addr, 0);
20352         }
20353       else
20354         {
20355           templ = "v%s1.64\t%%h1, %%A0";
20356           ops[0] = mem;
20357         }
20358       ops[1] = reg;
20359       break;
20360
20361     case PRE_DEC:
20362       /* We have to use vldm / vstm in this case, since there is no
20363          pre-decrement form of the vld1 / vst1 instructions.  */
20364       templ = "v%smdb%%?\t%%0!, %%h1";
20365       ops[0] = XEXP (addr, 0);
20366       ops[1] = reg;
20367       break;
20368
20369     case POST_MODIFY:
20370       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
20371       gcc_unreachable ();
20372
20373     case REG:
20374       /* We have to use vldm / vstm for too-large modes.  */
20375       if (nregs > 1)
20376         {
20377           if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20378             templ = "v%smia%%?\t%%m0, %%h1";
20379           else
20380             templ = "v%s1.64\t%%h1, %%A0";
20381
20382           ops[0] = mem;
20383           ops[1] = reg;
20384           break;
20385         }
20386       /* Fall through.  */
20387     case PLUS:
20388       if (GET_CODE (addr) == PLUS)
20389         addr = XEXP (addr, 0);
20390       /* Fall through.  */
20391     case LABEL_REF:
20392       {
20393         int i;
20394         int overlap = -1;
20395         for (i = 0; i < nregs; i++)
20396           {
20397             /* We're only using DImode here because it's a convenient
20398                size.  */
20399             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20400             ops[1] = adjust_address (mem, DImode, 8 * i);
20401             if (reg_overlap_mentioned_p (ops[0], mem))
20402               {
20403                 gcc_assert (overlap == -1);
20404                 overlap = i;
20405               }
20406             else
20407               {
20408                 if (TARGET_HAVE_MVE && GET_CODE (addr) == LABEL_REF)
20409                   sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20410                 else
20411                   sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20412                 output_asm_insn (buff, ops);
20413               }
20414           }
20415         if (overlap != -1)
20416           {
20417             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20418             ops[1] = adjust_address (mem, SImode, 8 * overlap);
20419             if (TARGET_HAVE_MVE && GET_CODE (addr) == LABEL_REF)
20420               sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20421             else
20422               sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20423             output_asm_insn (buff, ops);
20424           }
20425
20426         return "";
20427       }
20428
20429     default:
20430       gcc_unreachable ();
20431     }
20432
20433   sprintf (buff, templ, load ? "ld" : "st");
20434   output_asm_insn (buff, ops);
20435
20436   return "";
20437 }
20438
20439 /* Compute and return the length of neon_mov<mode>, where <mode> is
20440    one of VSTRUCT modes: EI, OI, CI or XI.  */
20441 int
20442 arm_attr_length_move_neon (rtx_insn *insn)
20443 {
20444   rtx reg, mem, addr;
20445   int load;
20446   machine_mode mode;
20447
20448   extract_insn_cached (insn);
20449
20450   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20451     {
20452       mode = GET_MODE (recog_data.operand[0]);
20453       switch (mode)
20454         {
20455         case E_EImode:
20456         case E_OImode:
20457           return 8;
20458         case E_CImode:
20459           return 12;
20460         case E_XImode:
20461           return 16;
20462         default:
20463           gcc_unreachable ();
20464         }
20465     }
20466
20467   load = REG_P (recog_data.operand[0]);
20468   reg = recog_data.operand[!load];
20469   mem = recog_data.operand[load];
20470
20471   gcc_assert (MEM_P (mem));
20472
20473   addr = XEXP (mem, 0);
20474
20475   /* Strip off const from addresses like (const (plus (...))).  */
20476   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20477     addr = XEXP (addr, 0);
20478
20479   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
20480     {
20481       int insns = REG_NREGS (reg) / 2;
20482       return insns * 4;
20483     }
20484   else
20485     return 4;
20486 }
20487
20488 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
20489    return zero.  */
20490
20491 int
20492 arm_address_offset_is_imm (rtx_insn *insn)
20493 {
20494   rtx mem, addr;
20495
20496   extract_insn_cached (insn);
20497
20498   if (REG_P (recog_data.operand[0]))
20499     return 0;
20500
20501   mem = recog_data.operand[0];
20502
20503   gcc_assert (MEM_P (mem));
20504
20505   addr = XEXP (mem, 0);
20506
20507   if (REG_P (addr)
20508       || (GET_CODE (addr) == PLUS
20509           && REG_P (XEXP (addr, 0))
20510           && CONST_INT_P (XEXP (addr, 1))))
20511     return 1;
20512   else
20513     return 0;
20514 }
20515
20516 /* Output an ADD r, s, #n where n may be too big for one instruction.
20517    If adding zero to one register, output nothing.  */
20518 const char *
20519 output_add_immediate (rtx *operands)
20520 {
20521   HOST_WIDE_INT n = INTVAL (operands[2]);
20522
20523   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20524     {
20525       if (n < 0)
20526         output_multi_immediate (operands,
20527                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20528                                 -n);
20529       else
20530         output_multi_immediate (operands,
20531                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20532                                 n);
20533     }
20534
20535   return "";
20536 }
20537
20538 /* Output a multiple immediate operation.
20539    OPERANDS is the vector of operands referred to in the output patterns.
20540    INSTR1 is the output pattern to use for the first constant.
20541    INSTR2 is the output pattern to use for subsequent constants.
20542    IMMED_OP is the index of the constant slot in OPERANDS.
20543    N is the constant value.  */
20544 static const char *
20545 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20546                         int immed_op, HOST_WIDE_INT n)
20547 {
20548 #if HOST_BITS_PER_WIDE_INT > 32
20549   n &= 0xffffffff;
20550 #endif
20551
20552   if (n == 0)
20553     {
20554       /* Quick and easy output.  */
20555       operands[immed_op] = const0_rtx;
20556       output_asm_insn (instr1, operands);
20557     }
20558   else
20559     {
20560       int i;
20561       const char * instr = instr1;
20562
20563       /* Note that n is never zero here (which would give no output).  */
20564       for (i = 0; i < 32; i += 2)
20565         {
20566           if (n & (3 << i))
20567             {
20568               operands[immed_op] = GEN_INT (n & (255 << i));
20569               output_asm_insn (instr, operands);
20570               instr = instr2;
20571               i += 6;
20572             }
20573         }
20574     }
20575
20576   return "";
20577 }
20578
20579 /* Return the name of a shifter operation.  */
20580 static const char *
20581 arm_shift_nmem(enum rtx_code code)
20582 {
20583   switch (code)
20584     {
20585     case ASHIFT:
20586       return ARM_LSL_NAME;
20587
20588     case ASHIFTRT:
20589       return "asr";
20590
20591     case LSHIFTRT:
20592       return "lsr";
20593
20594     case ROTATERT:
20595       return "ror";
20596
20597     default:
20598       abort();
20599     }
20600 }
20601
20602 /* Return the appropriate ARM instruction for the operation code.
20603    The returned result should not be overwritten.  OP is the rtx of the
20604    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20605    was shifted.  */
20606 const char *
20607 arithmetic_instr (rtx op, int shift_first_arg)
20608 {
20609   switch (GET_CODE (op))
20610     {
20611     case PLUS:
20612       return "add";
20613
20614     case MINUS:
20615       return shift_first_arg ? "rsb" : "sub";
20616
20617     case IOR:
20618       return "orr";
20619
20620     case XOR:
20621       return "eor";
20622
20623     case AND:
20624       return "and";
20625
20626     case ASHIFT:
20627     case ASHIFTRT:
20628     case LSHIFTRT:
20629     case ROTATERT:
20630       return arm_shift_nmem(GET_CODE(op));
20631
20632     default:
20633       gcc_unreachable ();
20634     }
20635 }
20636
20637 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20638    for the operation code.  The returned result should not be overwritten.
20639    OP is the rtx code of the shift.
20640    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20641    shift.  */
20642 static const char *
20643 shift_op (rtx op, HOST_WIDE_INT *amountp)
20644 {
20645   const char * mnem;
20646   enum rtx_code code = GET_CODE (op);
20647
20648   switch (code)
20649     {
20650     case ROTATE:
20651       if (!CONST_INT_P (XEXP (op, 1)))
20652         {
20653           output_operand_lossage ("invalid shift operand");
20654           return NULL;
20655         }
20656
20657       code = ROTATERT;
20658       *amountp = 32 - INTVAL (XEXP (op, 1));
20659       mnem = "ror";
20660       break;
20661
20662     case ASHIFT:
20663     case ASHIFTRT:
20664     case LSHIFTRT:
20665     case ROTATERT:
20666       mnem = arm_shift_nmem(code);
20667       if (CONST_INT_P (XEXP (op, 1)))
20668         {
20669           *amountp = INTVAL (XEXP (op, 1));
20670         }
20671       else if (REG_P (XEXP (op, 1)))
20672         {
20673           *amountp = -1;
20674           return mnem;
20675         }
20676       else
20677         {
20678           output_operand_lossage ("invalid shift operand");
20679           return NULL;
20680         }
20681       break;
20682
20683     case MULT:
20684       /* We never have to worry about the amount being other than a
20685          power of 2, since this case can never be reloaded from a reg.  */
20686       if (!CONST_INT_P (XEXP (op, 1)))
20687         {
20688           output_operand_lossage ("invalid shift operand");
20689           return NULL;
20690         }
20691
20692       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
20693
20694       /* Amount must be a power of two.  */
20695       if (*amountp & (*amountp - 1))
20696         {
20697           output_operand_lossage ("invalid shift operand");
20698           return NULL;
20699         }
20700
20701       *amountp = exact_log2 (*amountp);
20702       gcc_assert (IN_RANGE (*amountp, 0, 31));
20703       return ARM_LSL_NAME;
20704
20705     default:
20706       output_operand_lossage ("invalid shift operand");
20707       return NULL;
20708     }
20709
20710   /* This is not 100% correct, but follows from the desire to merge
20711      multiplication by a power of 2 with the recognizer for a
20712      shift.  >=32 is not a valid shift for "lsl", so we must try and
20713      output a shift that produces the correct arithmetical result.
20714      Using lsr #32 is identical except for the fact that the carry bit
20715      is not set correctly if we set the flags; but we never use the
20716      carry bit from such an operation, so we can ignore that.  */
20717   if (code == ROTATERT)
20718     /* Rotate is just modulo 32.  */
20719     *amountp &= 31;
20720   else if (*amountp != (*amountp & 31))
20721     {
20722       if (code == ASHIFT)
20723         mnem = "lsr";
20724       *amountp = 32;
20725     }
20726
20727   /* Shifts of 0 are no-ops.  */
20728   if (*amountp == 0)
20729     return NULL;
20730
20731   return mnem;
20732 }
20733
20734 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
20735    because /bin/as is horribly restrictive.  The judgement about
20736    whether or not each character is 'printable' (and can be output as
20737    is) or not (and must be printed with an octal escape) must be made
20738    with reference to the *host* character set -- the situation is
20739    similar to that discussed in the comments above pp_c_char in
20740    c-pretty-print.c.  */
20741
20742 #define MAX_ASCII_LEN 51
20743
20744 void
20745 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
20746 {
20747   int i;
20748   int len_so_far = 0;
20749
20750   fputs ("\t.ascii\t\"", stream);
20751
20752   for (i = 0; i < len; i++)
20753     {
20754       int c = p[i];
20755
20756       if (len_so_far >= MAX_ASCII_LEN)
20757         {
20758           fputs ("\"\n\t.ascii\t\"", stream);
20759           len_so_far = 0;
20760         }
20761
20762       if (ISPRINT (c))
20763         {
20764           if (c == '\\' || c == '\"')
20765             {
20766               putc ('\\', stream);
20767               len_so_far++;
20768             }
20769           putc (c, stream);
20770           len_so_far++;
20771         }
20772       else
20773         {
20774           fprintf (stream, "\\%03o", c);
20775           len_so_far += 4;
20776         }
20777     }
20778
20779   fputs ("\"\n", stream);
20780 }
20781 \f
20782
20783 /* Compute the register save mask for registers 0 through 12
20784    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
20785
20786 static unsigned long
20787 arm_compute_save_reg0_reg12_mask (void)
20788 {
20789   unsigned long func_type = arm_current_func_type ();
20790   unsigned long save_reg_mask = 0;
20791   unsigned int reg;
20792
20793   if (IS_INTERRUPT (func_type))
20794     {
20795       unsigned int max_reg;
20796       /* Interrupt functions must not corrupt any registers,
20797          even call clobbered ones.  If this is a leaf function
20798          we can just examine the registers used by the RTL, but
20799          otherwise we have to assume that whatever function is
20800          called might clobber anything, and so we have to save
20801          all the call-clobbered registers as well.  */
20802       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
20803         /* FIQ handlers have registers r8 - r12 banked, so
20804            we only need to check r0 - r7, Normal ISRs only
20805            bank r14 and r15, so we must check up to r12.
20806            r13 is the stack pointer which is always preserved,
20807            so we do not need to consider it here.  */
20808         max_reg = 7;
20809       else
20810         max_reg = 12;
20811
20812       for (reg = 0; reg <= max_reg; reg++)
20813         if (reg_needs_saving_p (reg))
20814           save_reg_mask |= (1 << reg);
20815
20816       /* Also save the pic base register if necessary.  */
20817       if (PIC_REGISTER_MAY_NEED_SAVING
20818           && crtl->uses_pic_offset_table)
20819         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
20820     }
20821   else if (IS_VOLATILE(func_type))
20822     {
20823       /* For noreturn functions we historically omitted register saves
20824          altogether.  However this really messes up debugging.  As a
20825          compromise save just the frame pointers.  Combined with the link
20826          register saved elsewhere this should be sufficient to get
20827          a backtrace.  */
20828       if (frame_pointer_needed)
20829         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
20830       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
20831         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
20832       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
20833         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
20834     }
20835   else
20836     {
20837       /* In the normal case we only need to save those registers
20838          which are call saved and which are used by this function.  */
20839       for (reg = 0; reg <= 11; reg++)
20840         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
20841           save_reg_mask |= (1 << reg);
20842
20843       /* Handle the frame pointer as a special case.  */
20844       if (frame_pointer_needed)
20845         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
20846
20847       /* If we aren't loading the PIC register,
20848          don't stack it even though it may be live.  */
20849       if (PIC_REGISTER_MAY_NEED_SAVING
20850           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
20851               || crtl->uses_pic_offset_table))
20852         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
20853
20854       /* The prologue will copy SP into R0, so save it.  */
20855       if (IS_STACKALIGN (func_type))
20856         save_reg_mask |= 1;
20857     }
20858
20859   /* Save registers so the exception handler can modify them.  */
20860   if (crtl->calls_eh_return)
20861     {
20862       unsigned int i;
20863
20864       for (i = 0; ; i++)
20865         {
20866           reg = EH_RETURN_DATA_REGNO (i);
20867           if (reg == INVALID_REGNUM)
20868             break;
20869           save_reg_mask |= 1 << reg;
20870         }
20871     }
20872
20873   return save_reg_mask;
20874 }
20875
20876 /* Return true if r3 is live at the start of the function.  */
20877
20878 static bool
20879 arm_r3_live_at_start_p (void)
20880 {
20881   /* Just look at cfg info, which is still close enough to correct at this
20882      point.  This gives false positives for broken functions that might use
20883      uninitialized data that happens to be allocated in r3, but who cares?  */
20884   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
20885 }
20886
20887 /* Compute the number of bytes used to store the static chain register on the
20888    stack, above the stack frame.  We need to know this accurately to get the
20889    alignment of the rest of the stack frame correct.  */
20890
20891 static int
20892 arm_compute_static_chain_stack_bytes (void)
20893 {
20894   /* Once the value is updated from the init value of -1, do not
20895      re-compute.  */
20896   if (cfun->machine->static_chain_stack_bytes != -1)
20897     return cfun->machine->static_chain_stack_bytes;
20898
20899   /* See the defining assertion in arm_expand_prologue.  */
20900   if (IS_NESTED (arm_current_func_type ())
20901       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20902           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
20903                || flag_stack_clash_protection)
20904               && !df_regs_ever_live_p (LR_REGNUM)))
20905       && arm_r3_live_at_start_p ()
20906       && crtl->args.pretend_args_size == 0)
20907     return 4;
20908
20909   return 0;
20910 }
20911
20912 /* Compute a bit mask of which core registers need to be
20913    saved on the stack for the current function.
20914    This is used by arm_compute_frame_layout, which may add extra registers.  */
20915
20916 static unsigned long
20917 arm_compute_save_core_reg_mask (void)
20918 {
20919   unsigned int save_reg_mask = 0;
20920   unsigned long func_type = arm_current_func_type ();
20921   unsigned int reg;
20922
20923   if (IS_NAKED (func_type))
20924     /* This should never really happen.  */
20925     return 0;
20926
20927   /* If we are creating a stack frame, then we must save the frame pointer,
20928      IP (which will hold the old stack pointer), LR and the PC.  */
20929   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20930     save_reg_mask |=
20931       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
20932       | (1 << IP_REGNUM)
20933       | (1 << LR_REGNUM)
20934       | (1 << PC_REGNUM);
20935
20936   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
20937
20938   /* Decide if we need to save the link register.
20939      Interrupt routines have their own banked link register,
20940      so they never need to save it.
20941      Otherwise if we do not use the link register we do not need to save
20942      it.  If we are pushing other registers onto the stack however, we
20943      can save an instruction in the epilogue by pushing the link register
20944      now and then popping it back into the PC.  This incurs extra memory
20945      accesses though, so we only do it when optimizing for size, and only
20946      if we know that we will not need a fancy return sequence.  */
20947   if (df_regs_ever_live_p (LR_REGNUM)
20948       || (save_reg_mask
20949           && optimize_size
20950           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
20951           && !crtl->tail_call_emit
20952           && !crtl->calls_eh_return))
20953     save_reg_mask |= 1 << LR_REGNUM;
20954
20955   if (cfun->machine->lr_save_eliminated)
20956     save_reg_mask &= ~ (1 << LR_REGNUM);
20957
20958   if (TARGET_REALLY_IWMMXT
20959       && ((bit_count (save_reg_mask)
20960            + ARM_NUM_INTS (crtl->args.pretend_args_size +
20961                            arm_compute_static_chain_stack_bytes())
20962            ) % 2) != 0)
20963     {
20964       /* The total number of registers that are going to be pushed
20965          onto the stack is odd.  We need to ensure that the stack
20966          is 64-bit aligned before we start to save iWMMXt registers,
20967          and also before we start to create locals.  (A local variable
20968          might be a double or long long which we will load/store using
20969          an iWMMXt instruction).  Therefore we need to push another
20970          ARM register, so that the stack will be 64-bit aligned.  We
20971          try to avoid using the arg registers (r0 -r3) as they might be
20972          used to pass values in a tail call.  */
20973       for (reg = 4; reg <= 12; reg++)
20974         if ((save_reg_mask & (1 << reg)) == 0)
20975           break;
20976
20977       if (reg <= 12)
20978         save_reg_mask |= (1 << reg);
20979       else
20980         {
20981           cfun->machine->sibcall_blocked = 1;
20982           save_reg_mask |= (1 << 3);
20983         }
20984     }
20985
20986   /* We may need to push an additional register for use initializing the
20987      PIC base register.  */
20988   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
20989       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
20990     {
20991       reg = thumb_find_work_register (1 << 4);
20992       if (!call_used_or_fixed_reg_p (reg))
20993         save_reg_mask |= (1 << reg);
20994     }
20995
20996   return save_reg_mask;
20997 }
20998
20999 /* Compute a bit mask of which core registers need to be
21000    saved on the stack for the current function.  */
21001 static unsigned long
21002 thumb1_compute_save_core_reg_mask (void)
21003 {
21004   unsigned long mask;
21005   unsigned reg;
21006
21007   mask = 0;
21008   for (reg = 0; reg < 12; reg ++)
21009     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21010       mask |= 1 << reg;
21011
21012   /* Handle the frame pointer as a special case.  */
21013   if (frame_pointer_needed)
21014     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21015
21016   if (flag_pic
21017       && !TARGET_SINGLE_PIC_BASE
21018       && arm_pic_register != INVALID_REGNUM
21019       && crtl->uses_pic_offset_table)
21020     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21021
21022   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
21023   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21024     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21025
21026   /* LR will also be pushed if any lo regs are pushed.  */
21027   if (mask & 0xff || thumb_force_lr_save ())
21028     mask |= (1 << LR_REGNUM);
21029
21030   bool call_clobbered_scratch
21031     = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21032        && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21033
21034   /* Make sure we have a low work register if we need one.  We will
21035      need one if we are going to push a high register, but we are not
21036      currently intending to push a low register.  However if both the
21037      prologue and epilogue have a spare call-clobbered low register,
21038      then we won't need to find an additional work register.  It does
21039      not need to be the same register in the prologue and
21040      epilogue.  */
21041   if ((mask & 0xff) == 0
21042       && !call_clobbered_scratch
21043       && ((mask & 0x0f00) || TARGET_BACKTRACE))
21044     {
21045       /* Use thumb_find_work_register to choose which register
21046          we will use.  If the register is live then we will
21047          have to push it.  Use LAST_LO_REGNUM as our fallback
21048          choice for the register to select.  */
21049       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21050       /* Make sure the register returned by thumb_find_work_register is
21051          not part of the return value.  */
21052       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21053         reg = LAST_LO_REGNUM;
21054
21055       if (callee_saved_reg_p (reg))
21056         mask |= 1 << reg;
21057     }
21058
21059   /* The 504 below is 8 bytes less than 512 because there are two possible
21060      alignment words.  We can't tell here if they will be present or not so we
21061      have to play it safe and assume that they are. */
21062   if ((CALLER_INTERWORKING_SLOT_SIZE +
21063        ROUND_UP_WORD (get_frame_size ()) +
21064        crtl->outgoing_args_size) >= 504)
21065     {
21066       /* This is the same as the code in thumb1_expand_prologue() which
21067          determines which register to use for stack decrement. */
21068       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21069         if (mask & (1 << reg))
21070           break;
21071
21072       if (reg > LAST_LO_REGNUM)
21073         {
21074           /* Make sure we have a register available for stack decrement. */
21075           mask |= 1 << LAST_LO_REGNUM;
21076         }
21077     }
21078
21079   return mask;
21080 }
21081
21082 /* Return the number of bytes required to save VFP registers.  */
21083 static int
21084 arm_get_vfp_saved_size (void)
21085 {
21086   unsigned int regno;
21087   int count;
21088   int saved;
21089
21090   saved = 0;
21091   /* Space for saved VFP registers.  */
21092   if (TARGET_VFP_BASE)
21093     {
21094       count = 0;
21095       for (regno = FIRST_VFP_REGNUM;
21096            regno < LAST_VFP_REGNUM;
21097            regno += 2)
21098         {
21099           if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21100             {
21101               if (count > 0)
21102                 {
21103                   /* Workaround ARM10 VFPr1 bug.  */
21104                   if (count == 2 && !arm_arch6)
21105                     count++;
21106                   saved += count * 8;
21107                 }
21108               count = 0;
21109             }
21110           else
21111             count++;
21112         }
21113       if (count > 0)
21114         {
21115           if (count == 2 && !arm_arch6)
21116             count++;
21117           saved += count * 8;
21118         }
21119     }
21120   return saved;
21121 }
21122
21123
21124 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
21125    everything bar the final return instruction.  If simple_return is true,
21126    then do not output epilogue, because it has already been emitted in RTL.
21127
21128    Note: do not forget to update length attribute of corresponding insn pattern
21129    when changing assembly output (eg. length attribute of
21130    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21131    register clearing sequences).  */
21132 const char *
21133 output_return_instruction (rtx operand, bool really_return, bool reverse,
21134                            bool simple_return)
21135 {
21136   char conditional[10];
21137   char instr[100];
21138   unsigned reg;
21139   unsigned long live_regs_mask;
21140   unsigned long func_type;
21141   arm_stack_offsets *offsets;
21142
21143   func_type = arm_current_func_type ();
21144
21145   if (IS_NAKED (func_type))
21146     return "";
21147
21148   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21149     {
21150       /* If this function was declared non-returning, and we have
21151          found a tail call, then we have to trust that the called
21152          function won't return.  */
21153       if (really_return)
21154         {
21155           rtx ops[2];
21156
21157           /* Otherwise, trap an attempted return by aborting.  */
21158           ops[0] = operand;
21159           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21160                                        : "abort");
21161           assemble_external_libcall (ops[1]);
21162           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21163         }
21164
21165       return "";
21166     }
21167
21168   gcc_assert (!cfun->calls_alloca || really_return);
21169
21170   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21171
21172   cfun->machine->return_used_this_function = 1;
21173
21174   offsets = arm_get_frame_offsets ();
21175   live_regs_mask = offsets->saved_regs_mask;
21176
21177   if (!simple_return && live_regs_mask)
21178     {
21179       const char * return_reg;
21180
21181       /* If we do not have any special requirements for function exit
21182          (e.g. interworking) then we can load the return address
21183          directly into the PC.  Otherwise we must load it into LR.  */
21184       if (really_return
21185           && !IS_CMSE_ENTRY (func_type)
21186           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21187         return_reg = reg_names[PC_REGNUM];
21188       else
21189         return_reg = reg_names[LR_REGNUM];
21190
21191       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21192         {
21193           /* There are three possible reasons for the IP register
21194              being saved.  1) a stack frame was created, in which case
21195              IP contains the old stack pointer, or 2) an ISR routine
21196              corrupted it, or 3) it was saved to align the stack on
21197              iWMMXt.  In case 1, restore IP into SP, otherwise just
21198              restore IP.  */
21199           if (frame_pointer_needed)
21200             {
21201               live_regs_mask &= ~ (1 << IP_REGNUM);
21202               live_regs_mask |=   (1 << SP_REGNUM);
21203             }
21204           else
21205             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21206         }
21207
21208       /* On some ARM architectures it is faster to use LDR rather than
21209          LDM to load a single register.  On other architectures, the
21210          cost is the same.  In 26 bit mode, or for exception handlers,
21211          we have to use LDM to load the PC so that the CPSR is also
21212          restored.  */
21213       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21214         if (live_regs_mask == (1U << reg))
21215           break;
21216
21217       if (reg <= LAST_ARM_REGNUM
21218           && (reg != LR_REGNUM
21219               || ! really_return
21220               || ! IS_INTERRUPT (func_type)))
21221         {
21222           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21223                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21224         }
21225       else
21226         {
21227           char *p;
21228           int first = 1;
21229
21230           /* Generate the load multiple instruction to restore the
21231              registers.  Note we can get here, even if
21232              frame_pointer_needed is true, but only if sp already
21233              points to the base of the saved core registers.  */
21234           if (live_regs_mask & (1 << SP_REGNUM))
21235             {
21236               unsigned HOST_WIDE_INT stack_adjust;
21237
21238               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21239               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21240
21241               if (stack_adjust && arm_arch5t && TARGET_ARM)
21242                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21243               else
21244                 {
21245                   /* If we can't use ldmib (SA110 bug),
21246                      then try to pop r3 instead.  */
21247                   if (stack_adjust)
21248                     live_regs_mask |= 1 << 3;
21249
21250                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21251                 }
21252             }
21253           /* For interrupt returns we have to use an LDM rather than
21254              a POP so that we can use the exception return variant.  */
21255           else if (IS_INTERRUPT (func_type))
21256             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21257           else
21258             sprintf (instr, "pop%s\t{", conditional);
21259
21260           p = instr + strlen (instr);
21261
21262           for (reg = 0; reg <= SP_REGNUM; reg++)
21263             if (live_regs_mask & (1 << reg))
21264               {
21265                 int l = strlen (reg_names[reg]);
21266
21267                 if (first)
21268                   first = 0;
21269                 else
21270                   {
21271                     memcpy (p, ", ", 2);
21272                     p += 2;
21273                   }
21274
21275                 memcpy (p, "%|", 2);
21276                 memcpy (p + 2, reg_names[reg], l);
21277                 p += l + 2;
21278               }
21279
21280           if (live_regs_mask & (1 << LR_REGNUM))
21281             {
21282               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21283               /* If returning from an interrupt, restore the CPSR.  */
21284               if (IS_INTERRUPT (func_type))
21285                 strcat (p, "^");
21286             }
21287           else
21288             strcpy (p, "}");
21289         }
21290
21291       output_asm_insn (instr, & operand);
21292
21293       /* See if we need to generate an extra instruction to
21294          perform the actual function return.  */
21295       if (really_return
21296           && func_type != ARM_FT_INTERWORKED
21297           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21298         {
21299           /* The return has already been handled
21300              by loading the LR into the PC.  */
21301           return "";
21302         }
21303     }
21304
21305   if (really_return)
21306     {
21307       switch ((int) ARM_FUNC_TYPE (func_type))
21308         {
21309         case ARM_FT_ISR:
21310         case ARM_FT_FIQ:
21311           /* ??? This is wrong for unified assembly syntax.  */
21312           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21313           break;
21314
21315         case ARM_FT_INTERWORKED:
21316           gcc_assert (arm_arch5t || arm_arch4t);
21317           sprintf (instr, "bx%s\t%%|lr", conditional);
21318           break;
21319
21320         case ARM_FT_EXCEPTION:
21321           /* ??? This is wrong for unified assembly syntax.  */
21322           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21323           break;
21324
21325         default:
21326           if (IS_CMSE_ENTRY (func_type))
21327             {
21328               /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21329                  emitted by cmse_nonsecure_entry_clear_before_return () and the
21330                  VSTR/VLDR instructions in the prologue and epilogue.  */
21331               if (!TARGET_HAVE_FPCXT_CMSE)
21332                 {
21333                   /* Check if we have to clear the 'GE bits' which is only used if
21334                      parallel add and subtraction instructions are available.  */
21335                   if (TARGET_INT_SIMD)
21336                     snprintf (instr, sizeof (instr),
21337                               "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21338                   else
21339                     snprintf (instr, sizeof (instr),
21340                               "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21341
21342                   output_asm_insn (instr, & operand);
21343                   /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21344                      care of it.  */
21345                   if (TARGET_HARD_FLOAT)
21346                     {
21347                       /* Clear the cumulative exception-status bits (0-4,7) and
21348                          the condition code bits (28-31) of the FPSCR.  We need
21349                          to remember to clear the first scratch register used
21350                          (IP) and save and restore the second (r4).
21351
21352                          Important note: the length of the
21353                          thumb2_cmse_entry_return insn pattern must account for
21354                          the size of the below instructions.  */
21355                       output_asm_insn ("push\t{%|r4}", & operand);
21356                       output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21357                       output_asm_insn ("movw\t%|r4, #65376", & operand);
21358                       output_asm_insn ("movt\t%|r4, #4095", & operand);
21359                       output_asm_insn ("and\t%|ip, %|r4", & operand);
21360                       output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21361                       output_asm_insn ("pop\t{%|r4}", & operand);
21362                       output_asm_insn ("mov\t%|ip, %|lr", & operand);
21363                     }
21364                 }
21365               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21366             }
21367           /* Use bx if it's available.  */
21368           else if (arm_arch5t || arm_arch4t)
21369             sprintf (instr, "bx%s\t%%|lr", conditional);
21370           else
21371             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21372           break;
21373         }
21374
21375       output_asm_insn (instr, & operand);
21376     }
21377
21378   return "";
21379 }
21380
21381 /* Output in FILE asm statements needed to declare the NAME of the function
21382    defined by its DECL node.  */
21383
21384 void
21385 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21386 {
21387   size_t cmse_name_len;
21388   char *cmse_name = 0;
21389   char cmse_prefix[] = "__acle_se_";
21390
21391   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21392      extra function label for each function with the 'cmse_nonsecure_entry'
21393      attribute.  This extra function label should be prepended with
21394      '__acle_se_', telling the linker that it needs to create secure gateway
21395      veneers for this function.  */
21396   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21397                                     DECL_ATTRIBUTES (decl)))
21398     {
21399       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21400       cmse_name = XALLOCAVEC (char, cmse_name_len);
21401       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21402       targetm.asm_out.globalize_label (file, cmse_name);
21403
21404       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21405       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21406     }
21407
21408   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21409   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21410   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21411   ASM_OUTPUT_LABEL (file, name);
21412
21413   if (cmse_name)
21414     ASM_OUTPUT_LABEL (file, cmse_name);
21415
21416   ARM_OUTPUT_FN_UNWIND (file, TRUE);
21417 }
21418
21419 /* Write the function name into the code section, directly preceding
21420    the function prologue.
21421
21422    Code will be output similar to this:
21423      t0
21424          .ascii "arm_poke_function_name", 0
21425          .align
21426      t1
21427          .word 0xff000000 + (t1 - t0)
21428      arm_poke_function_name
21429          mov     ip, sp
21430          stmfd   sp!, {fp, ip, lr, pc}
21431          sub     fp, ip, #4
21432
21433    When performing a stack backtrace, code can inspect the value
21434    of 'pc' stored at 'fp' + 0.  If the trace function then looks
21435    at location pc - 12 and the top 8 bits are set, then we know
21436    that there is a function name embedded immediately preceding this
21437    location and has length ((pc[-3]) & 0xff000000).
21438
21439    We assume that pc is declared as a pointer to an unsigned long.
21440
21441    It is of no benefit to output the function name if we are assembling
21442    a leaf function.  These function types will not contain a stack
21443    backtrace structure, therefore it is not possible to determine the
21444    function name.  */
21445 void
21446 arm_poke_function_name (FILE *stream, const char *name)
21447 {
21448   unsigned long alignlength;
21449   unsigned long length;
21450   rtx           x;
21451
21452   length      = strlen (name) + 1;
21453   alignlength = ROUND_UP_WORD (length);
21454
21455   ASM_OUTPUT_ASCII (stream, name, length);
21456   ASM_OUTPUT_ALIGN (stream, 2);
21457   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21458   assemble_aligned_integer (UNITS_PER_WORD, x);
21459 }
21460
21461 /* Place some comments into the assembler stream
21462    describing the current function.  */
21463 static void
21464 arm_output_function_prologue (FILE *f)
21465 {
21466   unsigned long func_type;
21467
21468   /* Sanity check.  */
21469   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21470
21471   func_type = arm_current_func_type ();
21472
21473   switch ((int) ARM_FUNC_TYPE (func_type))
21474     {
21475     default:
21476     case ARM_FT_NORMAL:
21477       break;
21478     case ARM_FT_INTERWORKED:
21479       asm_fprintf (f, "\t%@ Function supports interworking.\n");
21480       break;
21481     case ARM_FT_ISR:
21482       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21483       break;
21484     case ARM_FT_FIQ:
21485       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21486       break;
21487     case ARM_FT_EXCEPTION:
21488       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21489       break;
21490     }
21491
21492   if (IS_NAKED (func_type))
21493     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21494
21495   if (IS_VOLATILE (func_type))
21496     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21497
21498   if (IS_NESTED (func_type))
21499     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21500   if (IS_STACKALIGN (func_type))
21501     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21502   if (IS_CMSE_ENTRY (func_type))
21503     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21504
21505   asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21506                (HOST_WIDE_INT) crtl->args.size,
21507                crtl->args.pretend_args_size,
21508                (HOST_WIDE_INT) get_frame_size ());
21509
21510   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21511                frame_pointer_needed,
21512                cfun->machine->uses_anonymous_args);
21513
21514   if (cfun->machine->lr_save_eliminated)
21515     asm_fprintf (f, "\t%@ link register save eliminated.\n");
21516
21517   if (crtl->calls_eh_return)
21518     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21519
21520 }
21521
21522 static void
21523 arm_output_function_epilogue (FILE *)
21524 {
21525   arm_stack_offsets *offsets;
21526
21527   if (TARGET_THUMB1)
21528     {
21529       int regno;
21530
21531       /* Emit any call-via-reg trampolines that are needed for v4t support
21532          of call_reg and call_value_reg type insns.  */
21533       for (regno = 0; regno < LR_REGNUM; regno++)
21534         {
21535           rtx label = cfun->machine->call_via[regno];
21536
21537           if (label != NULL)
21538             {
21539               switch_to_section (function_section (current_function_decl));
21540               targetm.asm_out.internal_label (asm_out_file, "L",
21541                                               CODE_LABEL_NUMBER (label));
21542               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21543             }
21544         }
21545
21546       /* ??? Probably not safe to set this here, since it assumes that a
21547          function will be emitted as assembly immediately after we generate
21548          RTL for it.  This does not happen for inline functions.  */
21549       cfun->machine->return_used_this_function = 0;
21550     }
21551   else /* TARGET_32BIT */
21552     {
21553       /* We need to take into account any stack-frame rounding.  */
21554       offsets = arm_get_frame_offsets ();
21555
21556       gcc_assert (!use_return_insn (FALSE, NULL)
21557                   || (cfun->machine->return_used_this_function != 0)
21558                   || offsets->saved_regs == offsets->outgoing_args
21559                   || frame_pointer_needed);
21560     }
21561 }
21562
21563 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21564    STR and STRD.  If an even number of registers are being pushed, one
21565    or more STRD patterns are created for each register pair.  If an
21566    odd number of registers are pushed, emit an initial STR followed by
21567    as many STRD instructions as are needed.  This works best when the
21568    stack is initially 64-bit aligned (the normal case), since it
21569    ensures that each STRD is also 64-bit aligned.  */
21570 static void
21571 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21572 {
21573   int num_regs = 0;
21574   int i;
21575   int regno;
21576   rtx par = NULL_RTX;
21577   rtx dwarf = NULL_RTX;
21578   rtx tmp;
21579   bool first = true;
21580
21581   num_regs = bit_count (saved_regs_mask);
21582
21583   /* Must be at least one register to save, and can't save SP or PC.  */
21584   gcc_assert (num_regs > 0 && num_regs <= 14);
21585   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21586   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21587
21588   /* Create sequence for DWARF info.  All the frame-related data for
21589      debugging is held in this wrapper.  */
21590   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21591
21592   /* Describe the stack adjustment.  */
21593   tmp = gen_rtx_SET (stack_pointer_rtx,
21594                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21595   RTX_FRAME_RELATED_P (tmp) = 1;
21596   XVECEXP (dwarf, 0, 0) = tmp;
21597
21598   /* Find the first register.  */
21599   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21600     ;
21601
21602   i = 0;
21603
21604   /* If there's an odd number of registers to push.  Start off by
21605      pushing a single register.  This ensures that subsequent strd
21606      operations are dword aligned (assuming that SP was originally
21607      64-bit aligned).  */
21608   if ((num_regs & 1) != 0)
21609     {
21610       rtx reg, mem, insn;
21611
21612       reg = gen_rtx_REG (SImode, regno);
21613       if (num_regs == 1)
21614         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
21615                                                      stack_pointer_rtx));
21616       else
21617         mem = gen_frame_mem (Pmode,
21618                              gen_rtx_PRE_MODIFY
21619                              (Pmode, stack_pointer_rtx,
21620                               plus_constant (Pmode, stack_pointer_rtx,
21621                                              -4 * num_regs)));
21622
21623       tmp = gen_rtx_SET (mem, reg);
21624       RTX_FRAME_RELATED_P (tmp) = 1;
21625       insn = emit_insn (tmp);
21626       RTX_FRAME_RELATED_P (insn) = 1;
21627       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21628       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
21629       RTX_FRAME_RELATED_P (tmp) = 1;
21630       i++;
21631       regno++;
21632       XVECEXP (dwarf, 0, i) = tmp;
21633       first = false;
21634     }
21635
21636   while (i < num_regs)
21637     if (saved_regs_mask & (1 << regno))
21638       {
21639         rtx reg1, reg2, mem1, mem2;
21640         rtx tmp0, tmp1, tmp2;
21641         int regno2;
21642
21643         /* Find the register to pair with this one.  */
21644         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
21645              regno2++)
21646           ;
21647
21648         reg1 = gen_rtx_REG (SImode, regno);
21649         reg2 = gen_rtx_REG (SImode, regno2);
21650
21651         if (first)
21652           {
21653             rtx insn;
21654
21655             first = false;
21656             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21657                                                         stack_pointer_rtx,
21658                                                         -4 * num_regs));
21659             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21660                                                         stack_pointer_rtx,
21661                                                         -4 * (num_regs - 1)));
21662             tmp0 = gen_rtx_SET (stack_pointer_rtx,
21663                                 plus_constant (Pmode, stack_pointer_rtx,
21664                                                -4 * (num_regs)));
21665             tmp1 = gen_rtx_SET (mem1, reg1);
21666             tmp2 = gen_rtx_SET (mem2, reg2);
21667             RTX_FRAME_RELATED_P (tmp0) = 1;
21668             RTX_FRAME_RELATED_P (tmp1) = 1;
21669             RTX_FRAME_RELATED_P (tmp2) = 1;
21670             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
21671             XVECEXP (par, 0, 0) = tmp0;
21672             XVECEXP (par, 0, 1) = tmp1;
21673             XVECEXP (par, 0, 2) = tmp2;
21674             insn = emit_insn (par);
21675             RTX_FRAME_RELATED_P (insn) = 1;
21676             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21677           }
21678         else
21679           {
21680             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21681                                                         stack_pointer_rtx,
21682                                                         4 * i));
21683             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21684                                                         stack_pointer_rtx,
21685                                                         4 * (i + 1)));
21686             tmp1 = gen_rtx_SET (mem1, reg1);
21687             tmp2 = gen_rtx_SET (mem2, reg2);
21688             RTX_FRAME_RELATED_P (tmp1) = 1;
21689             RTX_FRAME_RELATED_P (tmp2) = 1;
21690             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21691             XVECEXP (par, 0, 0) = tmp1;
21692             XVECEXP (par, 0, 1) = tmp2;
21693             emit_insn (par);
21694           }
21695
21696         /* Create unwind information.  This is an approximation.  */
21697         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
21698                                            plus_constant (Pmode,
21699                                                           stack_pointer_rtx,
21700                                                           4 * i)),
21701                             reg1);
21702         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
21703                                            plus_constant (Pmode,
21704                                                           stack_pointer_rtx,
21705                                                           4 * (i + 1))),
21706                             reg2);
21707
21708         RTX_FRAME_RELATED_P (tmp1) = 1;
21709         RTX_FRAME_RELATED_P (tmp2) = 1;
21710         XVECEXP (dwarf, 0, i + 1) = tmp1;
21711         XVECEXP (dwarf, 0, i + 2) = tmp2;
21712         i += 2;
21713         regno = regno2 + 1;
21714       }
21715     else
21716       regno++;
21717
21718   return;
21719 }
21720
21721 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
21722    whenever possible, otherwise it emits single-word stores.  The first store
21723    also allocates stack space for all saved registers, using writeback with
21724    post-addressing mode.  All other stores use offset addressing.  If no STRD
21725    can be emitted, this function emits a sequence of single-word stores,
21726    and not an STM as before, because single-word stores provide more freedom
21727    scheduling and can be turned into an STM by peephole optimizations.  */
21728 static void
21729 arm_emit_strd_push (unsigned long saved_regs_mask)
21730 {
21731   int num_regs = 0;
21732   int i, j, dwarf_index  = 0;
21733   int offset = 0;
21734   rtx dwarf = NULL_RTX;
21735   rtx insn = NULL_RTX;
21736   rtx tmp, mem;
21737
21738   /* TODO: A more efficient code can be emitted by changing the
21739      layout, e.g., first push all pairs that can use STRD to keep the
21740      stack aligned, and then push all other registers.  */
21741   for (i = 0; i <= LAST_ARM_REGNUM; i++)
21742     if (saved_regs_mask & (1 << i))
21743       num_regs++;
21744
21745   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21746   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21747   gcc_assert (num_regs > 0);
21748
21749   /* Create sequence for DWARF info.  */
21750   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21751
21752   /* For dwarf info, we generate explicit stack update.  */
21753   tmp = gen_rtx_SET (stack_pointer_rtx,
21754                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21755   RTX_FRAME_RELATED_P (tmp) = 1;
21756   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21757
21758   /* Save registers.  */
21759   offset = - 4 * num_regs;
21760   j = 0;
21761   while (j <= LAST_ARM_REGNUM)
21762     if (saved_regs_mask & (1 << j))
21763       {
21764         if ((j % 2 == 0)
21765             && (saved_regs_mask & (1 << (j + 1))))
21766           {
21767             /* Current register and previous register form register pair for
21768                which STRD can be generated.  */
21769             if (offset < 0)
21770               {
21771                 /* Allocate stack space for all saved registers.  */
21772                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
21773                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21774                 mem = gen_frame_mem (DImode, tmp);
21775                 offset = 0;
21776               }
21777             else if (offset > 0)
21778               mem = gen_frame_mem (DImode,
21779                                    plus_constant (Pmode,
21780                                                   stack_pointer_rtx,
21781                                                   offset));
21782             else
21783               mem = gen_frame_mem (DImode, stack_pointer_rtx);
21784
21785             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
21786             RTX_FRAME_RELATED_P (tmp) = 1;
21787             tmp = emit_insn (tmp);
21788
21789             /* Record the first store insn.  */
21790             if (dwarf_index == 1)
21791               insn = tmp;
21792
21793             /* Generate dwarf info.  */
21794             mem = gen_frame_mem (SImode,
21795                                  plus_constant (Pmode,
21796                                                 stack_pointer_rtx,
21797                                                 offset));
21798             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21799             RTX_FRAME_RELATED_P (tmp) = 1;
21800             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21801
21802             mem = gen_frame_mem (SImode,
21803                                  plus_constant (Pmode,
21804                                                 stack_pointer_rtx,
21805                                                 offset + 4));
21806             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
21807             RTX_FRAME_RELATED_P (tmp) = 1;
21808             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21809
21810             offset += 8;
21811             j += 2;
21812           }
21813         else
21814           {
21815             /* Emit a single word store.  */
21816             if (offset < 0)
21817               {
21818                 /* Allocate stack space for all saved registers.  */
21819                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
21820                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21821                 mem = gen_frame_mem (SImode, tmp);
21822                 offset = 0;
21823               }
21824             else if (offset > 0)
21825               mem = gen_frame_mem (SImode,
21826                                    plus_constant (Pmode,
21827                                                   stack_pointer_rtx,
21828                                                   offset));
21829             else
21830               mem = gen_frame_mem (SImode, stack_pointer_rtx);
21831
21832             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21833             RTX_FRAME_RELATED_P (tmp) = 1;
21834             tmp = emit_insn (tmp);
21835
21836             /* Record the first store insn.  */
21837             if (dwarf_index == 1)
21838               insn = tmp;
21839
21840             /* Generate dwarf info.  */
21841             mem = gen_frame_mem (SImode,
21842                                  plus_constant(Pmode,
21843                                                stack_pointer_rtx,
21844                                                offset));
21845             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21846             RTX_FRAME_RELATED_P (tmp) = 1;
21847             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21848
21849             offset += 4;
21850             j += 1;
21851           }
21852       }
21853     else
21854       j++;
21855
21856   /* Attach dwarf info to the first insn we generate.  */
21857   gcc_assert (insn != NULL_RTX);
21858   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21859   RTX_FRAME_RELATED_P (insn) = 1;
21860 }
21861
21862 /* Generate and emit an insn that we will recognize as a push_multi.
21863    Unfortunately, since this insn does not reflect very well the actual
21864    semantics of the operation, we need to annotate the insn for the benefit
21865    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
21866    MASK for registers that should be annotated for DWARF2 frame unwind
21867    information.  */
21868 static rtx
21869 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
21870 {
21871   int num_regs = 0;
21872   int num_dwarf_regs = 0;
21873   int i, j;
21874   rtx par;
21875   rtx dwarf;
21876   int dwarf_par_index;
21877   rtx tmp, reg;
21878
21879   /* We don't record the PC in the dwarf frame information.  */
21880   dwarf_regs_mask &= ~(1 << PC_REGNUM);
21881
21882   for (i = 0; i <= LAST_ARM_REGNUM; i++)
21883     {
21884       if (mask & (1 << i))
21885         num_regs++;
21886       if (dwarf_regs_mask & (1 << i))
21887         num_dwarf_regs++;
21888     }
21889
21890   gcc_assert (num_regs && num_regs <= 16);
21891   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
21892
21893   /* For the body of the insn we are going to generate an UNSPEC in
21894      parallel with several USEs.  This allows the insn to be recognized
21895      by the push_multi pattern in the arm.md file.
21896
21897      The body of the insn looks something like this:
21898
21899        (parallel [
21900            (set (mem:BLK (pre_modify:SI (reg:SI sp)
21901                                         (const_int:SI <num>)))
21902                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
21903            (use (reg:SI XX))
21904            (use (reg:SI YY))
21905            ...
21906         ])
21907
21908      For the frame note however, we try to be more explicit and actually
21909      show each register being stored into the stack frame, plus a (single)
21910      decrement of the stack pointer.  We do it this way in order to be
21911      friendly to the stack unwinding code, which only wants to see a single
21912      stack decrement per instruction.  The RTL we generate for the note looks
21913      something like this:
21914
21915       (sequence [
21916            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
21917            (set (mem:SI (reg:SI sp)) (reg:SI r4))
21918            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
21919            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
21920            ...
21921         ])
21922
21923      FIXME:: In an ideal world the PRE_MODIFY would not exist and
21924      instead we'd have a parallel expression detailing all
21925      the stores to the various memory addresses so that debug
21926      information is more up-to-date. Remember however while writing
21927      this to take care of the constraints with the push instruction.
21928
21929      Note also that this has to be taken care of for the VFP registers.
21930
21931      For more see PR43399.  */
21932
21933   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
21934   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
21935   dwarf_par_index = 1;
21936
21937   for (i = 0; i <= LAST_ARM_REGNUM; i++)
21938     {
21939       if (mask & (1 << i))
21940         {
21941           reg = gen_rtx_REG (SImode, i);
21942
21943           XVECEXP (par, 0, 0)
21944             = gen_rtx_SET (gen_frame_mem
21945                            (BLKmode,
21946                             gen_rtx_PRE_MODIFY (Pmode,
21947                                                 stack_pointer_rtx,
21948                                                 plus_constant
21949                                                 (Pmode, stack_pointer_rtx,
21950                                                  -4 * num_regs))
21951                             ),
21952                            gen_rtx_UNSPEC (BLKmode,
21953                                            gen_rtvec (1, reg),
21954                                            UNSPEC_PUSH_MULT));
21955
21956           if (dwarf_regs_mask & (1 << i))
21957             {
21958               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
21959                                  reg);
21960               RTX_FRAME_RELATED_P (tmp) = 1;
21961               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
21962             }
21963
21964           break;
21965         }
21966     }
21967
21968   for (j = 1, i++; j < num_regs; i++)
21969     {
21970       if (mask & (1 << i))
21971         {
21972           reg = gen_rtx_REG (SImode, i);
21973
21974           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
21975
21976           if (dwarf_regs_mask & (1 << i))
21977             {
21978               tmp
21979                 = gen_rtx_SET (gen_frame_mem
21980                                (SImode,
21981                                 plus_constant (Pmode, stack_pointer_rtx,
21982                                                4 * j)),
21983                                reg);
21984               RTX_FRAME_RELATED_P (tmp) = 1;
21985               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
21986             }
21987
21988           j++;
21989         }
21990     }
21991
21992   par = emit_insn (par);
21993
21994   tmp = gen_rtx_SET (stack_pointer_rtx,
21995                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21996   RTX_FRAME_RELATED_P (tmp) = 1;
21997   XVECEXP (dwarf, 0, 0) = tmp;
21998
21999   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22000
22001   return par;
22002 }
22003
22004 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22005    SIZE is the offset to be adjusted.
22006    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
22007 static void
22008 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22009 {
22010   rtx dwarf;
22011
22012   RTX_FRAME_RELATED_P (insn) = 1;
22013   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22014   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22015 }
22016
22017 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22018    SAVED_REGS_MASK shows which registers need to be restored.
22019
22020    Unfortunately, since this insn does not reflect very well the actual
22021    semantics of the operation, we need to annotate the insn for the benefit
22022    of DWARF2 frame unwind information.  */
22023 static void
22024 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22025 {
22026   int num_regs = 0;
22027   int i, j;
22028   rtx par;
22029   rtx dwarf = NULL_RTX;
22030   rtx tmp, reg;
22031   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22032   int offset_adj;
22033   int emit_update;
22034
22035   offset_adj = return_in_pc ? 1 : 0;
22036   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22037     if (saved_regs_mask & (1 << i))
22038       num_regs++;
22039
22040   gcc_assert (num_regs && num_regs <= 16);
22041
22042   /* If SP is in reglist, then we don't emit SP update insn.  */
22043   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22044
22045   /* The parallel needs to hold num_regs SETs
22046      and one SET for the stack update.  */
22047   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22048
22049   if (return_in_pc)
22050     XVECEXP (par, 0, 0) = ret_rtx;
22051
22052   if (emit_update)
22053     {
22054       /* Increment the stack pointer, based on there being
22055          num_regs 4-byte registers to restore.  */
22056       tmp = gen_rtx_SET (stack_pointer_rtx,
22057                          plus_constant (Pmode,
22058                                         stack_pointer_rtx,
22059                                         4 * num_regs));
22060       RTX_FRAME_RELATED_P (tmp) = 1;
22061       XVECEXP (par, 0, offset_adj) = tmp;
22062     }
22063
22064   /* Now restore every reg, which may include PC.  */
22065   for (j = 0, i = 0; j < num_regs; i++)
22066     if (saved_regs_mask & (1 << i))
22067       {
22068         reg = gen_rtx_REG (SImode, i);
22069         if ((num_regs == 1) && emit_update && !return_in_pc)
22070           {
22071             /* Emit single load with writeback.  */
22072             tmp = gen_frame_mem (SImode,
22073                                  gen_rtx_POST_INC (Pmode,
22074                                                    stack_pointer_rtx));
22075             tmp = emit_insn (gen_rtx_SET (reg, tmp));
22076             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22077             return;
22078           }
22079
22080         tmp = gen_rtx_SET (reg,
22081                            gen_frame_mem
22082                            (SImode,
22083                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22084         RTX_FRAME_RELATED_P (tmp) = 1;
22085         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22086
22087         /* We need to maintain a sequence for DWARF info too.  As dwarf info
22088            should not have PC, skip PC.  */
22089         if (i != PC_REGNUM)
22090           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22091
22092         j++;
22093       }
22094
22095   if (return_in_pc)
22096     par = emit_jump_insn (par);
22097   else
22098     par = emit_insn (par);
22099
22100   REG_NOTES (par) = dwarf;
22101   if (!return_in_pc)
22102     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22103                                  stack_pointer_rtx, stack_pointer_rtx);
22104 }
22105
22106 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22107    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22108
22109    Unfortunately, since this insn does not reflect very well the actual
22110    semantics of the operation, we need to annotate the insn for the benefit
22111    of DWARF2 frame unwind information.  */
22112 static void
22113 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22114 {
22115   int i, j;
22116   rtx par;
22117   rtx dwarf = NULL_RTX;
22118   rtx tmp, reg;
22119
22120   gcc_assert (num_regs && num_regs <= 32);
22121
22122     /* Workaround ARM10 VFPr1 bug.  */
22123   if (num_regs == 2 && !arm_arch6)
22124     {
22125       if (first_reg == 15)
22126         first_reg--;
22127
22128       num_regs++;
22129     }
22130
22131   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22132      there could be up to 32 D-registers to restore.
22133      If there are more than 16 D-registers, make two recursive calls,
22134      each of which emits one pop_multi instruction.  */
22135   if (num_regs > 16)
22136     {
22137       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22138       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22139       return;
22140     }
22141
22142   /* The parallel needs to hold num_regs SETs
22143      and one SET for the stack update.  */
22144   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22145
22146   /* Increment the stack pointer, based on there being
22147      num_regs 8-byte registers to restore.  */
22148   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22149   RTX_FRAME_RELATED_P (tmp) = 1;
22150   XVECEXP (par, 0, 0) = tmp;
22151
22152   /* Now show every reg that will be restored, using a SET for each.  */
22153   for (j = 0, i=first_reg; j < num_regs; i += 2)
22154     {
22155       reg = gen_rtx_REG (DFmode, i);
22156
22157       tmp = gen_rtx_SET (reg,
22158                          gen_frame_mem
22159                          (DFmode,
22160                           plus_constant (Pmode, base_reg, 8 * j)));
22161       RTX_FRAME_RELATED_P (tmp) = 1;
22162       XVECEXP (par, 0, j + 1) = tmp;
22163
22164       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22165
22166       j++;
22167     }
22168
22169   par = emit_insn (par);
22170   REG_NOTES (par) = dwarf;
22171
22172   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
22173   if (REGNO (base_reg) == IP_REGNUM)
22174     {
22175       RTX_FRAME_RELATED_P (par) = 1;
22176       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22177     }
22178   else
22179     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22180                                  base_reg, base_reg);
22181 }
22182
22183 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
22184    number of registers are being popped, multiple LDRD patterns are created for
22185    all register pairs.  If odd number of registers are popped, last register is
22186    loaded by using LDR pattern.  */
22187 static void
22188 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22189 {
22190   int num_regs = 0;
22191   int i, j;
22192   rtx par = NULL_RTX;
22193   rtx dwarf = NULL_RTX;
22194   rtx tmp, reg, tmp1;
22195   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22196
22197   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22198     if (saved_regs_mask & (1 << i))
22199       num_regs++;
22200
22201   gcc_assert (num_regs && num_regs <= 16);
22202
22203   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
22204      to be popped.  So, if num_regs is even, now it will become odd,
22205      and we can generate pop with PC.  If num_regs is odd, it will be
22206      even now, and ldr with return can be generated for PC.  */
22207   if (return_in_pc)
22208     num_regs--;
22209
22210   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22211
22212   /* Var j iterates over all the registers to gather all the registers in
22213      saved_regs_mask.  Var i gives index of saved registers in stack frame.
22214      A PARALLEL RTX of register-pair is created here, so that pattern for
22215      LDRD can be matched.  As PC is always last register to be popped, and
22216      we have already decremented num_regs if PC, we don't have to worry
22217      about PC in this loop.  */
22218   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22219     if (saved_regs_mask & (1 << j))
22220       {
22221         /* Create RTX for memory load.  */
22222         reg = gen_rtx_REG (SImode, j);
22223         tmp = gen_rtx_SET (reg,
22224                            gen_frame_mem (SImode,
22225                                plus_constant (Pmode,
22226                                               stack_pointer_rtx, 4 * i)));
22227         RTX_FRAME_RELATED_P (tmp) = 1;
22228
22229         if (i % 2 == 0)
22230           {
22231             /* When saved-register index (i) is even, the RTX to be emitted is
22232                yet to be created.  Hence create it first.  The LDRD pattern we
22233                are generating is :
22234                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22235                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22236                where target registers need not be consecutive.  */
22237             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22238             dwarf = NULL_RTX;
22239           }
22240
22241         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
22242            added as 0th element and if i is odd, reg_i is added as 1st element
22243            of LDRD pattern shown above.  */
22244         XVECEXP (par, 0, (i % 2)) = tmp;
22245         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22246
22247         if ((i % 2) == 1)
22248           {
22249             /* When saved-register index (i) is odd, RTXs for both the registers
22250                to be loaded are generated in above given LDRD pattern, and the
22251                pattern can be emitted now.  */
22252             par = emit_insn (par);
22253             REG_NOTES (par) = dwarf;
22254             RTX_FRAME_RELATED_P (par) = 1;
22255           }
22256
22257         i++;
22258       }
22259
22260   /* If the number of registers pushed is odd AND return_in_pc is false OR
22261      number of registers are even AND return_in_pc is true, last register is
22262      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
22263      then LDR with post increment.  */
22264
22265   /* Increment the stack pointer, based on there being
22266      num_regs 4-byte registers to restore.  */
22267   tmp = gen_rtx_SET (stack_pointer_rtx,
22268                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22269   RTX_FRAME_RELATED_P (tmp) = 1;
22270   tmp = emit_insn (tmp);
22271   if (!return_in_pc)
22272     {
22273       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22274                                    stack_pointer_rtx, stack_pointer_rtx);
22275     }
22276
22277   dwarf = NULL_RTX;
22278
22279   if (((num_regs % 2) == 1 && !return_in_pc)
22280       || ((num_regs % 2) == 0 && return_in_pc))
22281     {
22282       /* Scan for the single register to be popped.  Skip until the saved
22283          register is found.  */
22284       for (; (saved_regs_mask & (1 << j)) == 0; j++);
22285
22286       /* Gen LDR with post increment here.  */
22287       tmp1 = gen_rtx_MEM (SImode,
22288                           gen_rtx_POST_INC (SImode,
22289                                             stack_pointer_rtx));
22290       set_mem_alias_set (tmp1, get_frame_alias_set ());
22291
22292       reg = gen_rtx_REG (SImode, j);
22293       tmp = gen_rtx_SET (reg, tmp1);
22294       RTX_FRAME_RELATED_P (tmp) = 1;
22295       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22296
22297       if (return_in_pc)
22298         {
22299           /* If return_in_pc, j must be PC_REGNUM.  */
22300           gcc_assert (j == PC_REGNUM);
22301           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22302           XVECEXP (par, 0, 0) = ret_rtx;
22303           XVECEXP (par, 0, 1) = tmp;
22304           par = emit_jump_insn (par);
22305         }
22306       else
22307         {
22308           par = emit_insn (tmp);
22309           REG_NOTES (par) = dwarf;
22310           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22311                                        stack_pointer_rtx, stack_pointer_rtx);
22312         }
22313
22314     }
22315   else if ((num_regs % 2) == 1 && return_in_pc)
22316     {
22317       /* There are 2 registers to be popped.  So, generate the pattern
22318          pop_multiple_with_stack_update_and_return to pop in PC.  */
22319       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22320     }
22321
22322   return;
22323 }
22324
22325 /* LDRD in ARM mode needs consecutive registers as operands.  This function
22326    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22327    offset addressing and then generates one separate stack udpate. This provides
22328    more scheduling freedom, compared to writeback on every load.  However,
22329    if the function returns using load into PC directly
22330    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22331    before the last load.  TODO: Add a peephole optimization to recognize
22332    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
22333    peephole optimization to merge the load at stack-offset zero
22334    with the stack update instruction using load with writeback
22335    in post-index addressing mode.  */
22336 static void
22337 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22338 {
22339   int j = 0;
22340   int offset = 0;
22341   rtx par = NULL_RTX;
22342   rtx dwarf = NULL_RTX;
22343   rtx tmp, mem;
22344
22345   /* Restore saved registers.  */
22346   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22347   j = 0;
22348   while (j <= LAST_ARM_REGNUM)
22349     if (saved_regs_mask & (1 << j))
22350       {
22351         if ((j % 2) == 0
22352             && (saved_regs_mask & (1 << (j + 1)))
22353             && (j + 1) != PC_REGNUM)
22354           {
22355             /* Current register and next register form register pair for which
22356                LDRD can be generated. PC is always the last register popped, and
22357                we handle it separately.  */
22358             if (offset > 0)
22359               mem = gen_frame_mem (DImode,
22360                                    plus_constant (Pmode,
22361                                                   stack_pointer_rtx,
22362                                                   offset));
22363             else
22364               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22365
22366             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22367             tmp = emit_insn (tmp);
22368             RTX_FRAME_RELATED_P (tmp) = 1;
22369
22370             /* Generate dwarf info.  */
22371
22372             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22373                                     gen_rtx_REG (SImode, j),
22374                                     NULL_RTX);
22375             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22376                                     gen_rtx_REG (SImode, j + 1),
22377                                     dwarf);
22378
22379             REG_NOTES (tmp) = dwarf;
22380
22381             offset += 8;
22382             j += 2;
22383           }
22384         else if (j != PC_REGNUM)
22385           {
22386             /* Emit a single word load.  */
22387             if (offset > 0)
22388               mem = gen_frame_mem (SImode,
22389                                    plus_constant (Pmode,
22390                                                   stack_pointer_rtx,
22391                                                   offset));
22392             else
22393               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22394
22395             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22396             tmp = emit_insn (tmp);
22397             RTX_FRAME_RELATED_P (tmp) = 1;
22398
22399             /* Generate dwarf info.  */
22400             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22401                                               gen_rtx_REG (SImode, j),
22402                                               NULL_RTX);
22403
22404             offset += 4;
22405             j += 1;
22406           }
22407         else /* j == PC_REGNUM */
22408           j++;
22409       }
22410     else
22411       j++;
22412
22413   /* Update the stack.  */
22414   if (offset > 0)
22415     {
22416       tmp = gen_rtx_SET (stack_pointer_rtx,
22417                          plus_constant (Pmode,
22418                                         stack_pointer_rtx,
22419                                         offset));
22420       tmp = emit_insn (tmp);
22421       arm_add_cfa_adjust_cfa_note (tmp, offset,
22422                                    stack_pointer_rtx, stack_pointer_rtx);
22423       offset = 0;
22424     }
22425
22426   if (saved_regs_mask & (1 << PC_REGNUM))
22427     {
22428       /* Only PC is to be popped.  */
22429       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22430       XVECEXP (par, 0, 0) = ret_rtx;
22431       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22432                          gen_frame_mem (SImode,
22433                                         gen_rtx_POST_INC (SImode,
22434                                                           stack_pointer_rtx)));
22435       RTX_FRAME_RELATED_P (tmp) = 1;
22436       XVECEXP (par, 0, 1) = tmp;
22437       par = emit_jump_insn (par);
22438
22439       /* Generate dwarf info.  */
22440       dwarf = alloc_reg_note (REG_CFA_RESTORE,
22441                               gen_rtx_REG (SImode, PC_REGNUM),
22442                               NULL_RTX);
22443       REG_NOTES (par) = dwarf;
22444       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22445                                    stack_pointer_rtx, stack_pointer_rtx);
22446     }
22447 }
22448
22449 /* Calculate the size of the return value that is passed in registers.  */
22450 static unsigned
22451 arm_size_return_regs (void)
22452 {
22453   machine_mode mode;
22454
22455   if (crtl->return_rtx != 0)
22456     mode = GET_MODE (crtl->return_rtx);
22457   else
22458     mode = DECL_MODE (DECL_RESULT (current_function_decl));
22459
22460   return GET_MODE_SIZE (mode);
22461 }
22462
22463 /* Return true if the current function needs to save/restore LR.  */
22464 static bool
22465 thumb_force_lr_save (void)
22466 {
22467   return !cfun->machine->lr_save_eliminated
22468          && (!crtl->is_leaf
22469              || thumb_far_jump_used_p ()
22470              || df_regs_ever_live_p (LR_REGNUM));
22471 }
22472
22473 /* We do not know if r3 will be available because
22474    we do have an indirect tailcall happening in this
22475    particular case.  */
22476 static bool
22477 is_indirect_tailcall_p (rtx call)
22478 {
22479   rtx pat = PATTERN (call);
22480
22481   /* Indirect tail call.  */
22482   pat = XVECEXP (pat, 0, 0);
22483   if (GET_CODE (pat) == SET)
22484     pat = SET_SRC (pat);
22485
22486   pat = XEXP (XEXP (pat, 0), 0);
22487   return REG_P (pat);
22488 }
22489
22490 /* Return true if r3 is used by any of the tail call insns in the
22491    current function.  */
22492 static bool
22493 any_sibcall_could_use_r3 (void)
22494 {
22495   edge_iterator ei;
22496   edge e;
22497
22498   if (!crtl->tail_call_emit)
22499     return false;
22500   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22501     if (e->flags & EDGE_SIBCALL)
22502       {
22503         rtx_insn *call = BB_END (e->src);
22504         if (!CALL_P (call))
22505           call = prev_nonnote_nondebug_insn (call);
22506         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22507         if (find_regno_fusage (call, USE, 3)
22508             || is_indirect_tailcall_p (call))
22509           return true;
22510       }
22511   return false;
22512 }
22513
22514
22515 /* Compute the distance from register FROM to register TO.
22516    These can be the arg pointer (26), the soft frame pointer (25),
22517    the stack pointer (13) or the hard frame pointer (11).
22518    In thumb mode r7 is used as the soft frame pointer, if needed.
22519    Typical stack layout looks like this:
22520
22521        old stack pointer -> |    |
22522                              ----
22523                             |    | \
22524                             |    |   saved arguments for
22525                             |    |   vararg functions
22526                             |    | /
22527                               --
22528    hard FP & arg pointer -> |    | \
22529                             |    |   stack
22530                             |    |   frame
22531                             |    | /
22532                               --
22533                             |    | \
22534                             |    |   call saved
22535                             |    |   registers
22536       soft frame pointer -> |    | /
22537                               --
22538                             |    | \
22539                             |    |   local
22540                             |    |   variables
22541      locals base pointer -> |    | /
22542                               --
22543                             |    | \
22544                             |    |   outgoing
22545                             |    |   arguments
22546    current stack pointer -> |    | /
22547                               --
22548
22549   For a given function some or all of these stack components
22550   may not be needed, giving rise to the possibility of
22551   eliminating some of the registers.
22552
22553   The values returned by this function must reflect the behavior
22554   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22555
22556   The sign of the number returned reflects the direction of stack
22557   growth, so the values are positive for all eliminations except
22558   from the soft frame pointer to the hard frame pointer.
22559
22560   SFP may point just inside the local variables block to ensure correct
22561   alignment.  */
22562
22563
22564 /* Return cached stack offsets.  */
22565
22566 static arm_stack_offsets *
22567 arm_get_frame_offsets (void)
22568 {
22569   struct arm_stack_offsets *offsets;
22570
22571   offsets = &cfun->machine->stack_offsets;
22572
22573   return offsets;
22574 }
22575
22576
22577 /* Calculate stack offsets.  These are used to calculate register elimination
22578    offsets and in prologue/epilogue code.  Also calculates which registers
22579    should be saved.  */
22580
22581 static void
22582 arm_compute_frame_layout (void)
22583 {
22584   struct arm_stack_offsets *offsets;
22585   unsigned long func_type;
22586   int saved;
22587   int core_saved;
22588   HOST_WIDE_INT frame_size;
22589   int i;
22590
22591   offsets = &cfun->machine->stack_offsets;
22592
22593   /* Initially this is the size of the local variables.  It will translated
22594      into an offset once we have determined the size of preceding data.  */
22595   frame_size = ROUND_UP_WORD (get_frame_size ());
22596
22597   /* Space for variadic functions.  */
22598   offsets->saved_args = crtl->args.pretend_args_size;
22599
22600   /* In Thumb mode this is incorrect, but never used.  */
22601   offsets->frame
22602     = (offsets->saved_args
22603        + arm_compute_static_chain_stack_bytes ()
22604        + (frame_pointer_needed ? 4 : 0));
22605
22606   if (TARGET_32BIT)
22607     {
22608       unsigned int regno;
22609
22610       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
22611       core_saved = bit_count (offsets->saved_regs_mask) * 4;
22612       saved = core_saved;
22613
22614       /* We know that SP will be doubleword aligned on entry, and we must
22615          preserve that condition at any subroutine call.  We also require the
22616          soft frame pointer to be doubleword aligned.  */
22617
22618       if (TARGET_REALLY_IWMMXT)
22619         {
22620           /* Check for the call-saved iWMMXt registers.  */
22621           for (regno = FIRST_IWMMXT_REGNUM;
22622                regno <= LAST_IWMMXT_REGNUM;
22623                regno++)
22624             if (reg_needs_saving_p (regno))
22625               saved += 8;
22626         }
22627
22628       func_type = arm_current_func_type ();
22629       /* Space for saved VFP registers.  */
22630       if (! IS_VOLATILE (func_type)
22631           && TARGET_VFP_BASE)
22632         saved += arm_get_vfp_saved_size ();
22633
22634       /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
22635          nonecure entry functions with VSTR/VLDR.  */
22636       if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
22637         saved += 4;
22638     }
22639   else /* TARGET_THUMB1 */
22640     {
22641       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
22642       core_saved = bit_count (offsets->saved_regs_mask) * 4;
22643       saved = core_saved;
22644       if (TARGET_BACKTRACE)
22645         saved += 16;
22646     }
22647
22648   /* Saved registers include the stack frame.  */
22649   offsets->saved_regs
22650     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
22651   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
22652
22653   /* A leaf function does not need any stack alignment if it has nothing
22654      on the stack.  */
22655   if (crtl->is_leaf && frame_size == 0
22656       /* However if it calls alloca(), we have a dynamically allocated
22657          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
22658       && ! cfun->calls_alloca)
22659     {
22660       offsets->outgoing_args = offsets->soft_frame;
22661       offsets->locals_base = offsets->soft_frame;
22662       return;
22663     }
22664
22665   /* Ensure SFP has the correct alignment.  */
22666   if (ARM_DOUBLEWORD_ALIGN
22667       && (offsets->soft_frame & 7))
22668     {
22669       offsets->soft_frame += 4;
22670       /* Try to align stack by pushing an extra reg.  Don't bother doing this
22671          when there is a stack frame as the alignment will be rolled into
22672          the normal stack adjustment.  */
22673       if (frame_size + crtl->outgoing_args_size == 0)
22674         {
22675           int reg = -1;
22676
22677           /* Register r3 is caller-saved.  Normally it does not need to be
22678              saved on entry by the prologue.  However if we choose to save
22679              it for padding then we may confuse the compiler into thinking
22680              a prologue sequence is required when in fact it is not.  This
22681              will occur when shrink-wrapping if r3 is used as a scratch
22682              register and there are no other callee-saved writes.
22683
22684              This situation can be avoided when other callee-saved registers
22685              are available and r3 is not mandatory if we choose a callee-saved
22686              register for padding.  */
22687           bool prefer_callee_reg_p = false;
22688
22689           /* If it is safe to use r3, then do so.  This sometimes
22690              generates better code on Thumb-2 by avoiding the need to
22691              use 32-bit push/pop instructions.  */
22692           if (! any_sibcall_could_use_r3 ()
22693               && arm_size_return_regs () <= 12
22694               && (offsets->saved_regs_mask & (1 << 3)) == 0
22695               && (TARGET_THUMB2
22696                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
22697             {
22698               reg = 3;
22699               if (!TARGET_THUMB2)
22700                 prefer_callee_reg_p = true;
22701             }
22702           if (reg == -1
22703               || prefer_callee_reg_p)
22704             {
22705               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
22706                 {
22707                   /* Avoid fixed registers; they may be changed at
22708                      arbitrary times so it's unsafe to restore them
22709                      during the epilogue.  */
22710                   if (!fixed_regs[i]
22711                       && (offsets->saved_regs_mask & (1 << i)) == 0)
22712                     {
22713                       reg = i;
22714                       break;
22715                     }
22716                 }
22717             }
22718
22719           if (reg != -1)
22720             {
22721               offsets->saved_regs += 4;
22722               offsets->saved_regs_mask |= (1 << reg);
22723             }
22724         }
22725     }
22726
22727   offsets->locals_base = offsets->soft_frame + frame_size;
22728   offsets->outgoing_args = (offsets->locals_base
22729                             + crtl->outgoing_args_size);
22730
22731   if (ARM_DOUBLEWORD_ALIGN)
22732     {
22733       /* Ensure SP remains doubleword aligned.  */
22734       if (offsets->outgoing_args & 7)
22735         offsets->outgoing_args += 4;
22736       gcc_assert (!(offsets->outgoing_args & 7));
22737     }
22738 }
22739
22740
22741 /* Calculate the relative offsets for the different stack pointers.  Positive
22742    offsets are in the direction of stack growth.  */
22743
22744 HOST_WIDE_INT
22745 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22746 {
22747   arm_stack_offsets *offsets;
22748
22749   offsets = arm_get_frame_offsets ();
22750
22751   /* OK, now we have enough information to compute the distances.
22752      There must be an entry in these switch tables for each pair
22753      of registers in ELIMINABLE_REGS, even if some of the entries
22754      seem to be redundant or useless.  */
22755   switch (from)
22756     {
22757     case ARG_POINTER_REGNUM:
22758       switch (to)
22759         {
22760         case THUMB_HARD_FRAME_POINTER_REGNUM:
22761           return 0;
22762
22763         case FRAME_POINTER_REGNUM:
22764           /* This is the reverse of the soft frame pointer
22765              to hard frame pointer elimination below.  */
22766           return offsets->soft_frame - offsets->saved_args;
22767
22768         case ARM_HARD_FRAME_POINTER_REGNUM:
22769           /* This is only non-zero in the case where the static chain register
22770              is stored above the frame.  */
22771           return offsets->frame - offsets->saved_args - 4;
22772
22773         case STACK_POINTER_REGNUM:
22774           /* If nothing has been pushed on the stack at all
22775              then this will return -4.  This *is* correct!  */
22776           return offsets->outgoing_args - (offsets->saved_args + 4);
22777
22778         default:
22779           gcc_unreachable ();
22780         }
22781       gcc_unreachable ();
22782
22783     case FRAME_POINTER_REGNUM:
22784       switch (to)
22785         {
22786         case THUMB_HARD_FRAME_POINTER_REGNUM:
22787           return 0;
22788
22789         case ARM_HARD_FRAME_POINTER_REGNUM:
22790           /* The hard frame pointer points to the top entry in the
22791              stack frame.  The soft frame pointer to the bottom entry
22792              in the stack frame.  If there is no stack frame at all,
22793              then they are identical.  */
22794
22795           return offsets->frame - offsets->soft_frame;
22796
22797         case STACK_POINTER_REGNUM:
22798           return offsets->outgoing_args - offsets->soft_frame;
22799
22800         default:
22801           gcc_unreachable ();
22802         }
22803       gcc_unreachable ();
22804
22805     default:
22806       /* You cannot eliminate from the stack pointer.
22807          In theory you could eliminate from the hard frame
22808          pointer to the stack pointer, but this will never
22809          happen, since if a stack frame is not needed the
22810          hard frame pointer will never be used.  */
22811       gcc_unreachable ();
22812     }
22813 }
22814
22815 /* Given FROM and TO register numbers, say whether this elimination is
22816    allowed.  Frame pointer elimination is automatically handled.
22817
22818    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
22819    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
22820    pointer, we must eliminate FRAME_POINTER_REGNUM into
22821    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
22822    ARG_POINTER_REGNUM.  */
22823
22824 bool
22825 arm_can_eliminate (const int from, const int to)
22826 {
22827   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
22828           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
22829           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
22830           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
22831            true);
22832 }
22833
22834 /* Emit RTL to save coprocessor registers on function entry.  Returns the
22835    number of bytes pushed.  */
22836
22837 static int
22838 arm_save_coproc_regs(void)
22839 {
22840   int saved_size = 0;
22841   unsigned reg;
22842   unsigned start_reg;
22843   rtx insn;
22844
22845   if (TARGET_REALLY_IWMMXT)
22846   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
22847     if (reg_needs_saving_p (reg))
22848       {
22849         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22850         insn = gen_rtx_MEM (V2SImode, insn);
22851         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
22852         RTX_FRAME_RELATED_P (insn) = 1;
22853         saved_size += 8;
22854       }
22855
22856   if (TARGET_VFP_BASE)
22857     {
22858       start_reg = FIRST_VFP_REGNUM;
22859
22860       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
22861         {
22862           if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
22863             {
22864               if (start_reg != reg)
22865                 saved_size += vfp_emit_fstmd (start_reg,
22866                                               (reg - start_reg) / 2);
22867               start_reg = reg + 2;
22868             }
22869         }
22870       if (start_reg != reg)
22871         saved_size += vfp_emit_fstmd (start_reg,
22872                                       (reg - start_reg) / 2);
22873     }
22874   return saved_size;
22875 }
22876
22877
22878 /* Set the Thumb frame pointer from the stack pointer.  */
22879
22880 static void
22881 thumb_set_frame_pointer (arm_stack_offsets *offsets)
22882 {
22883   HOST_WIDE_INT amount;
22884   rtx insn, dwarf;
22885
22886   amount = offsets->outgoing_args - offsets->locals_base;
22887   if (amount < 1024)
22888     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22889                                   stack_pointer_rtx, GEN_INT (amount)));
22890   else
22891     {
22892       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
22893       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
22894          expects the first two operands to be the same.  */
22895       if (TARGET_THUMB2)
22896         {
22897           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22898                                         stack_pointer_rtx,
22899                                         hard_frame_pointer_rtx));
22900         }
22901       else
22902         {
22903           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22904                                         hard_frame_pointer_rtx,
22905                                         stack_pointer_rtx));
22906         }
22907       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
22908                            plus_constant (Pmode, stack_pointer_rtx, amount));
22909       RTX_FRAME_RELATED_P (dwarf) = 1;
22910       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22911     }
22912
22913   RTX_FRAME_RELATED_P (insn) = 1;
22914 }
22915
22916 struct scratch_reg {
22917   rtx reg;
22918   bool saved;
22919 };
22920
22921 /* Return a short-lived scratch register for use as a 2nd scratch register on
22922    function entry after the registers are saved in the prologue.  This register
22923    must be released by means of release_scratch_register_on_entry.  IP is not
22924    considered since it is always used as the 1st scratch register if available.
22925
22926    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
22927    mask of live registers.  */
22928
22929 static void
22930 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
22931                                unsigned long live_regs)
22932 {
22933   int regno = -1;
22934
22935   sr->saved = false;
22936
22937   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
22938     regno = LR_REGNUM;
22939   else
22940     {
22941       unsigned int i;
22942
22943       for (i = 4; i < 11; i++)
22944         if (regno1 != i && (live_regs & (1 << i)) != 0)
22945           {
22946             regno = i;
22947             break;
22948           }
22949
22950       if (regno < 0)
22951         {
22952           /* If IP is used as the 1st scratch register for a nested function,
22953              then either r3 wasn't available or is used to preserve IP.  */
22954           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
22955             regno1 = 3;
22956           regno = (regno1 == 3 ? 2 : 3);
22957           sr->saved
22958             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
22959                                regno);
22960         }
22961     }
22962
22963   sr->reg = gen_rtx_REG (SImode, regno);
22964   if (sr->saved)
22965     {
22966       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22967       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
22968       rtx x = gen_rtx_SET (stack_pointer_rtx,
22969                            plus_constant (Pmode, stack_pointer_rtx, -4));
22970       RTX_FRAME_RELATED_P (insn) = 1;
22971       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
22972     }
22973 }
22974
22975 /* Release a scratch register obtained from the preceding function.  */
22976
22977 static void
22978 release_scratch_register_on_entry (struct scratch_reg *sr)
22979 {
22980   if (sr->saved)
22981     {
22982       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
22983       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
22984       rtx x = gen_rtx_SET (stack_pointer_rtx,
22985                            plus_constant (Pmode, stack_pointer_rtx, 4));
22986       RTX_FRAME_RELATED_P (insn) = 1;
22987       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
22988     }
22989 }
22990
22991 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
22992
22993 #if PROBE_INTERVAL > 4096
22994 #error Cannot use indexed addressing mode for stack probing
22995 #endif
22996
22997 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
22998    inclusive.  These are offsets from the current stack pointer.  REGNO1
22999    is the index number of the 1st scratch register and LIVE_REGS is the
23000    mask of live registers.  */
23001
23002 static void
23003 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23004                             unsigned int regno1, unsigned long live_regs)
23005 {
23006   rtx reg1 = gen_rtx_REG (Pmode, regno1);
23007
23008   /* See if we have a constant small number of probes to generate.  If so,
23009      that's the easy case.  */
23010   if (size <= PROBE_INTERVAL)
23011     {
23012       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23013       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23014       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23015     }
23016
23017   /* The run-time loop is made up of 10 insns in the generic case while the
23018      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
23019   else if (size <= 5 * PROBE_INTERVAL)
23020     {
23021       HOST_WIDE_INT i, rem;
23022
23023       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23024       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23025       emit_stack_probe (reg1);
23026
23027       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23028          it exceeds SIZE.  If only two probes are needed, this will not
23029          generate any code.  Then probe at FIRST + SIZE.  */
23030       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23031         {
23032           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23033           emit_stack_probe (reg1);
23034         }
23035
23036       rem = size - (i - PROBE_INTERVAL);
23037       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23038         {
23039           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23040           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23041         }
23042       else
23043         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23044     }
23045
23046   /* Otherwise, do the same as above, but in a loop.  Note that we must be
23047      extra careful with variables wrapping around because we might be at
23048      the very top (or the very bottom) of the address space and we have
23049      to be able to handle this case properly; in particular, we use an
23050      equality test for the loop condition.  */
23051   else
23052     {
23053       HOST_WIDE_INT rounded_size;
23054       struct scratch_reg sr;
23055
23056       get_scratch_register_on_entry (&sr, regno1, live_regs);
23057
23058       emit_move_insn (reg1, GEN_INT (first));
23059
23060
23061       /* Step 1: round SIZE to the previous multiple of the interval.  */
23062
23063       rounded_size = size & -PROBE_INTERVAL;
23064       emit_move_insn (sr.reg, GEN_INT (rounded_size));
23065
23066
23067       /* Step 2: compute initial and final value of the loop counter.  */
23068
23069       /* TEST_ADDR = SP + FIRST.  */
23070       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23071
23072       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
23073       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23074
23075
23076       /* Step 3: the loop
23077
23078          do
23079            {
23080              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23081              probe at TEST_ADDR
23082            }
23083          while (TEST_ADDR != LAST_ADDR)
23084
23085          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23086          until it is equal to ROUNDED_SIZE.  */
23087
23088       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23089
23090
23091       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23092          that SIZE is equal to ROUNDED_SIZE.  */
23093
23094       if (size != rounded_size)
23095         {
23096           HOST_WIDE_INT rem = size - rounded_size;
23097
23098           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23099             {
23100               emit_set_insn (sr.reg,
23101                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23102               emit_stack_probe (plus_constant (Pmode, sr.reg,
23103                                                PROBE_INTERVAL - rem));
23104             }
23105           else
23106             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23107         }
23108
23109       release_scratch_register_on_entry (&sr);
23110     }
23111
23112   /* Make sure nothing is scheduled before we are done.  */
23113   emit_insn (gen_blockage ());
23114 }
23115
23116 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
23117    absolute addresses.  */
23118
23119 const char *
23120 output_probe_stack_range (rtx reg1, rtx reg2)
23121 {
23122   static int labelno = 0;
23123   char loop_lab[32];
23124   rtx xops[2];
23125
23126   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23127
23128   /* Loop.  */
23129   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23130
23131   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
23132   xops[0] = reg1;
23133   xops[1] = GEN_INT (PROBE_INTERVAL);
23134   output_asm_insn ("sub\t%0, %0, %1", xops);
23135
23136   /* Probe at TEST_ADDR.  */
23137   output_asm_insn ("str\tr0, [%0, #0]", xops);
23138
23139   /* Test if TEST_ADDR == LAST_ADDR.  */
23140   xops[1] = reg2;
23141   output_asm_insn ("cmp\t%0, %1", xops);
23142
23143   /* Branch.  */
23144   fputs ("\tbne\t", asm_out_file);
23145   assemble_name_raw (asm_out_file, loop_lab);
23146   fputc ('\n', asm_out_file);
23147
23148   return "";
23149 }
23150
23151 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23152    function.  */
23153 void
23154 arm_expand_prologue (void)
23155 {
23156   rtx amount;
23157   rtx insn;
23158   rtx ip_rtx;
23159   unsigned long live_regs_mask;
23160   unsigned long func_type;
23161   int fp_offset = 0;
23162   int saved_pretend_args = 0;
23163   int saved_regs = 0;
23164   unsigned HOST_WIDE_INT args_to_push;
23165   HOST_WIDE_INT size;
23166   arm_stack_offsets *offsets;
23167   bool clobber_ip;
23168
23169   func_type = arm_current_func_type ();
23170
23171   /* Naked functions don't have prologues.  */
23172   if (IS_NAKED (func_type))
23173     {
23174       if (flag_stack_usage_info)
23175         current_function_static_stack_size = 0;
23176       return;
23177     }
23178
23179   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
23180   args_to_push = crtl->args.pretend_args_size;
23181
23182   /* Compute which register we will have to save onto the stack.  */
23183   offsets = arm_get_frame_offsets ();
23184   live_regs_mask = offsets->saved_regs_mask;
23185
23186   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23187
23188   if (IS_STACKALIGN (func_type))
23189     {
23190       rtx r0, r1;
23191
23192       /* Handle a word-aligned stack pointer.  We generate the following:
23193
23194           mov r0, sp
23195           bic r1, r0, #7
23196           mov sp, r1
23197           <save and restore r0 in normal prologue/epilogue>
23198           mov sp, r0
23199           bx lr
23200
23201          The unwinder doesn't need to know about the stack realignment.
23202          Just tell it we saved SP in r0.  */
23203       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23204
23205       r0 = gen_rtx_REG (SImode, R0_REGNUM);
23206       r1 = gen_rtx_REG (SImode, R1_REGNUM);
23207
23208       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23209       RTX_FRAME_RELATED_P (insn) = 1;
23210       add_reg_note (insn, REG_CFA_REGISTER, NULL);
23211
23212       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23213
23214       /* ??? The CFA changes here, which may cause GDB to conclude that it
23215          has entered a different function.  That said, the unwind info is
23216          correct, individually, before and after this instruction because
23217          we've described the save of SP, which will override the default
23218          handling of SP as restoring from the CFA.  */
23219       emit_insn (gen_movsi (stack_pointer_rtx, r1));
23220     }
23221
23222   /* Let's compute the static_chain_stack_bytes required and store it.  Right
23223      now the value must be -1 as stored by arm_init_machine_status ().  */
23224   cfun->machine->static_chain_stack_bytes
23225     = arm_compute_static_chain_stack_bytes ();
23226
23227   /* The static chain register is the same as the IP register.  If it is
23228      clobbered when creating the frame, we need to save and restore it.  */
23229   clobber_ip = IS_NESTED (func_type)
23230                && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23231                    || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23232                         || flag_stack_clash_protection)
23233                        && !df_regs_ever_live_p (LR_REGNUM)
23234                        && arm_r3_live_at_start_p ()));
23235
23236   /* Find somewhere to store IP whilst the frame is being created.
23237      We try the following places in order:
23238
23239        1. The last argument register r3 if it is available.
23240        2. A slot on the stack above the frame if there are no
23241           arguments to push onto the stack.
23242        3. Register r3 again, after pushing the argument registers
23243           onto the stack, if this is a varargs function.
23244        4. The last slot on the stack created for the arguments to
23245           push, if this isn't a varargs function.
23246
23247      Note - we only need to tell the dwarf2 backend about the SP
23248      adjustment in the second variant; the static chain register
23249      doesn't need to be unwound, as it doesn't contain a value
23250      inherited from the caller.  */
23251   if (clobber_ip)
23252     {
23253       if (!arm_r3_live_at_start_p ())
23254         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23255       else if (args_to_push == 0)
23256         {
23257           rtx addr, dwarf;
23258
23259           gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
23260           saved_regs += 4;
23261
23262           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23263           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23264           fp_offset = 4;
23265
23266           /* Just tell the dwarf backend that we adjusted SP.  */
23267           dwarf = gen_rtx_SET (stack_pointer_rtx,
23268                                plus_constant (Pmode, stack_pointer_rtx,
23269                                               -fp_offset));
23270           RTX_FRAME_RELATED_P (insn) = 1;
23271           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23272         }
23273       else
23274         {
23275           /* Store the args on the stack.  */
23276           if (cfun->machine->uses_anonymous_args)
23277             {
23278               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23279                                           (0xf0 >> (args_to_push / 4)) & 0xf);
23280               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23281               saved_pretend_args = 1;
23282             }
23283           else
23284             {
23285               rtx addr, dwarf;
23286
23287               if (args_to_push == 4)
23288                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23289               else
23290                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23291                                            plus_constant (Pmode,
23292                                                           stack_pointer_rtx,
23293                                                           -args_to_push));
23294
23295               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23296
23297               /* Just tell the dwarf backend that we adjusted SP.  */
23298               dwarf = gen_rtx_SET (stack_pointer_rtx,
23299                                    plus_constant (Pmode, stack_pointer_rtx,
23300                                                   -args_to_push));
23301               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23302             }
23303
23304           RTX_FRAME_RELATED_P (insn) = 1;
23305           fp_offset = args_to_push;
23306           args_to_push = 0;
23307         }
23308     }
23309
23310   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23311     {
23312       if (IS_INTERRUPT (func_type))
23313         {
23314           /* Interrupt functions must not corrupt any registers.
23315              Creating a frame pointer however, corrupts the IP
23316              register, so we must push it first.  */
23317           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23318
23319           /* Do not set RTX_FRAME_RELATED_P on this insn.
23320              The dwarf stack unwinding code only wants to see one
23321              stack decrement per function, and this is not it.  If
23322              this instruction is labeled as being part of the frame
23323              creation sequence then dwarf2out_frame_debug_expr will
23324              die when it encounters the assignment of IP to FP
23325              later on, since the use of SP here establishes SP as
23326              the CFA register and not IP.
23327
23328              Anyway this instruction is not really part of the stack
23329              frame creation although it is part of the prologue.  */
23330         }
23331
23332       insn = emit_set_insn (ip_rtx,
23333                             plus_constant (Pmode, stack_pointer_rtx,
23334                                            fp_offset));
23335       RTX_FRAME_RELATED_P (insn) = 1;
23336     }
23337
23338   /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR.  */
23339   if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23340     {
23341       saved_regs += 4;
23342       insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23343                                                 GEN_INT (FPCXTNS_ENUM)));
23344       rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23345                           plus_constant (Pmode, stack_pointer_rtx, -4));
23346       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23347       RTX_FRAME_RELATED_P (insn) = 1;
23348     }
23349
23350   if (args_to_push)
23351     {
23352       /* Push the argument registers, or reserve space for them.  */
23353       if (cfun->machine->uses_anonymous_args)
23354         insn = emit_multi_reg_push
23355           ((0xf0 >> (args_to_push / 4)) & 0xf,
23356            (0xf0 >> (args_to_push / 4)) & 0xf);
23357       else
23358         insn = emit_insn
23359           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23360                        GEN_INT (- args_to_push)));
23361       RTX_FRAME_RELATED_P (insn) = 1;
23362     }
23363
23364   /* If this is an interrupt service routine, and the link register
23365      is going to be pushed, and we're not generating extra
23366      push of IP (needed when frame is needed and frame layout if apcs),
23367      subtracting four from LR now will mean that the function return
23368      can be done with a single instruction.  */
23369   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23370       && (live_regs_mask & (1 << LR_REGNUM)) != 0
23371       && !(frame_pointer_needed && TARGET_APCS_FRAME)
23372       && TARGET_ARM)
23373     {
23374       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23375
23376       emit_set_insn (lr, plus_constant (SImode, lr, -4));
23377     }
23378
23379   if (live_regs_mask)
23380     {
23381       unsigned long dwarf_regs_mask = live_regs_mask;
23382
23383       saved_regs += bit_count (live_regs_mask) * 4;
23384       if (optimize_size && !frame_pointer_needed
23385           && saved_regs == offsets->saved_regs - offsets->saved_args)
23386         {
23387           /* If no coprocessor registers are being pushed and we don't have
23388              to worry about a frame pointer then push extra registers to
23389              create the stack frame.  This is done in a way that does not
23390              alter the frame layout, so is independent of the epilogue.  */
23391           int n;
23392           int frame;
23393           n = 0;
23394           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23395             n++;
23396           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23397           if (frame && n * 4 >= frame)
23398             {
23399               n = frame / 4;
23400               live_regs_mask |= (1 << n) - 1;
23401               saved_regs += frame;
23402             }
23403         }
23404
23405       if (TARGET_LDRD
23406           && current_tune->prefer_ldrd_strd
23407           && !optimize_function_for_size_p (cfun))
23408         {
23409           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23410           if (TARGET_THUMB2)
23411             thumb2_emit_strd_push (live_regs_mask);
23412           else if (TARGET_ARM
23413                    && !TARGET_APCS_FRAME
23414                    && !IS_INTERRUPT (func_type))
23415             arm_emit_strd_push (live_regs_mask);
23416           else
23417             {
23418               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23419               RTX_FRAME_RELATED_P (insn) = 1;
23420             }
23421         }
23422       else
23423         {
23424           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23425           RTX_FRAME_RELATED_P (insn) = 1;
23426         }
23427     }
23428
23429   if (! IS_VOLATILE (func_type))
23430     saved_regs += arm_save_coproc_regs ();
23431
23432   if (frame_pointer_needed && TARGET_ARM)
23433     {
23434       /* Create the new frame pointer.  */
23435       if (TARGET_APCS_FRAME)
23436         {
23437           insn = GEN_INT (-(4 + args_to_push + fp_offset));
23438           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23439           RTX_FRAME_RELATED_P (insn) = 1;
23440         }
23441       else
23442         {
23443           insn = GEN_INT (saved_regs - (4 + fp_offset));
23444           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23445                                         stack_pointer_rtx, insn));
23446           RTX_FRAME_RELATED_P (insn) = 1;
23447         }
23448     }
23449
23450   size = offsets->outgoing_args - offsets->saved_args;
23451   if (flag_stack_usage_info)
23452     current_function_static_stack_size = size;
23453
23454   /* If this isn't an interrupt service routine and we have a frame, then do
23455      stack checking.  We use IP as the first scratch register, except for the
23456      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
23457   if (!IS_INTERRUPT (func_type)
23458       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23459           || flag_stack_clash_protection))
23460     {
23461       unsigned int regno;
23462
23463       if (!IS_NESTED (func_type) || clobber_ip)
23464         regno = IP_REGNUM;
23465       else if (df_regs_ever_live_p (LR_REGNUM))
23466         regno = LR_REGNUM;
23467       else
23468         regno = 3;
23469
23470       if (crtl->is_leaf && !cfun->calls_alloca)
23471         {
23472           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23473             arm_emit_probe_stack_range (get_stack_check_protect (),
23474                                         size - get_stack_check_protect (),
23475                                         regno, live_regs_mask);
23476         }
23477       else if (size > 0)
23478         arm_emit_probe_stack_range (get_stack_check_protect (), size,
23479                                     regno, live_regs_mask);
23480     }
23481
23482   /* Recover the static chain register.  */
23483   if (clobber_ip)
23484     {
23485       if (!arm_r3_live_at_start_p () || saved_pretend_args)
23486         insn = gen_rtx_REG (SImode, 3);
23487       else
23488         {
23489           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23490           insn = gen_frame_mem (SImode, insn);
23491         }
23492       emit_set_insn (ip_rtx, insn);
23493       emit_insn (gen_force_register_use (ip_rtx));
23494     }
23495
23496   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23497     {
23498       /* This add can produce multiple insns for a large constant, so we
23499          need to get tricky.  */
23500       rtx_insn *last = get_last_insn ();
23501
23502       amount = GEN_INT (offsets->saved_args + saved_regs
23503                         - offsets->outgoing_args);
23504
23505       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23506                                     amount));
23507       do
23508         {
23509           last = last ? NEXT_INSN (last) : get_insns ();
23510           RTX_FRAME_RELATED_P (last) = 1;
23511         }
23512       while (last != insn);
23513
23514       /* If the frame pointer is needed, emit a special barrier that
23515          will prevent the scheduler from moving stores to the frame
23516          before the stack adjustment.  */
23517       if (frame_pointer_needed)
23518         emit_insn (gen_stack_tie (stack_pointer_rtx,
23519                                   hard_frame_pointer_rtx));
23520     }
23521
23522
23523   if (frame_pointer_needed && TARGET_THUMB2)
23524     thumb_set_frame_pointer (offsets);
23525
23526   if (flag_pic && arm_pic_register != INVALID_REGNUM)
23527     {
23528       unsigned long mask;
23529
23530       mask = live_regs_mask;
23531       mask &= THUMB2_WORK_REGS;
23532       if (!IS_NESTED (func_type))
23533         mask |= (1 << IP_REGNUM);
23534       arm_load_pic_register (mask, NULL_RTX);
23535     }
23536
23537   /* If we are profiling, make sure no instructions are scheduled before
23538      the call to mcount.  Similarly if the user has requested no
23539      scheduling in the prolog.  Similarly if we want non-call exceptions
23540      using the EABI unwinder, to prevent faulting instructions from being
23541      swapped with a stack adjustment.  */
23542   if (crtl->profile || !TARGET_SCHED_PROLOG
23543       || (arm_except_unwind_info (&global_options) == UI_TARGET
23544           && cfun->can_throw_non_call_exceptions))
23545     emit_insn (gen_blockage ());
23546
23547   /* If the link register is being kept alive, with the return address in it,
23548      then make sure that it does not get reused by the ce2 pass.  */
23549   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23550     cfun->machine->lr_save_eliminated = 1;
23551 }
23552 \f
23553 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
23554 static void
23555 arm_print_condition (FILE *stream)
23556 {
23557   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
23558     {
23559       /* Branch conversion is not implemented for Thumb-2.  */
23560       if (TARGET_THUMB)
23561         {
23562           output_operand_lossage ("predicated Thumb instruction");
23563           return;
23564         }
23565       if (current_insn_predicate != NULL)
23566         {
23567           output_operand_lossage
23568             ("predicated instruction in conditional sequence");
23569           return;
23570         }
23571
23572       fputs (arm_condition_codes[arm_current_cc], stream);
23573     }
23574   else if (current_insn_predicate)
23575     {
23576       enum arm_cond_code code;
23577
23578       if (TARGET_THUMB1)
23579         {
23580           output_operand_lossage ("predicated Thumb instruction");
23581           return;
23582         }
23583
23584       code = get_arm_condition_code (current_insn_predicate);
23585       fputs (arm_condition_codes[code], stream);
23586     }
23587 }
23588
23589
23590 /* Globally reserved letters: acln
23591    Puncutation letters currently used: @_|?().!#
23592    Lower case letters currently used: bcdefhimpqtvwxyz
23593    Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTU
23594    Letters previously used, but now deprecated/obsolete: sVWXYZ.
23595
23596    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23597
23598    If CODE is 'd', then the X is a condition operand and the instruction
23599    should only be executed if the condition is true.
23600    if CODE is 'D', then the X is a condition operand and the instruction
23601    should only be executed if the condition is false: however, if the mode
23602    of the comparison is CCFPEmode, then always execute the instruction -- we
23603    do this because in these circumstances !GE does not necessarily imply LT;
23604    in these cases the instruction pattern will take care to make sure that
23605    an instruction containing %d will follow, thereby undoing the effects of
23606    doing this instruction unconditionally.
23607    If CODE is 'N' then X is a floating point operand that must be negated
23608    before output.
23609    If CODE is 'B' then output a bitwise inverted value of X (a const int).
23610    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
23611 static void
23612 arm_print_operand (FILE *stream, rtx x, int code)
23613 {
23614   switch (code)
23615     {
23616     case '@':
23617       fputs (ASM_COMMENT_START, stream);
23618       return;
23619
23620     case '_':
23621       fputs (user_label_prefix, stream);
23622       return;
23623
23624     case '|':
23625       fputs (REGISTER_PREFIX, stream);
23626       return;
23627
23628     case '?':
23629       arm_print_condition (stream);
23630       return;
23631
23632     case '.':
23633       /* The current condition code for a condition code setting instruction.
23634          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
23635       fputc('s', stream);
23636       arm_print_condition (stream);
23637       return;
23638
23639     case '!':
23640       /* If the instruction is conditionally executed then print
23641          the current condition code, otherwise print 's'.  */
23642       gcc_assert (TARGET_THUMB2);
23643       if (current_insn_predicate)
23644         arm_print_condition (stream);
23645       else
23646         fputc('s', stream);
23647       break;
23648
23649     /* %# is a "break" sequence. It doesn't output anything, but is used to
23650        separate e.g. operand numbers from following text, if that text consists
23651        of further digits which we don't want to be part of the operand
23652        number.  */
23653     case '#':
23654       return;
23655
23656     case 'N':
23657       {
23658         REAL_VALUE_TYPE r;
23659         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
23660         fprintf (stream, "%s", fp_const_from_val (&r));
23661       }
23662       return;
23663
23664     /* An integer or symbol address without a preceding # sign.  */
23665     case 'c':
23666       switch (GET_CODE (x))
23667         {
23668         case CONST_INT:
23669           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
23670           break;
23671
23672         case SYMBOL_REF:
23673           output_addr_const (stream, x);
23674           break;
23675
23676         case CONST:
23677           if (GET_CODE (XEXP (x, 0)) == PLUS
23678               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
23679             {
23680               output_addr_const (stream, x);
23681               break;
23682             }
23683           /* Fall through.  */
23684
23685         default:
23686           output_operand_lossage ("Unsupported operand for code '%c'", code);
23687         }
23688       return;
23689
23690     /* An integer that we want to print in HEX.  */
23691     case 'x':
23692       switch (GET_CODE (x))
23693         {
23694         case CONST_INT:
23695           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
23696           break;
23697
23698         default:
23699           output_operand_lossage ("Unsupported operand for code '%c'", code);
23700         }
23701       return;
23702
23703     case 'B':
23704       if (CONST_INT_P (x))
23705         {
23706           HOST_WIDE_INT val;
23707           val = ARM_SIGN_EXTEND (~INTVAL (x));
23708           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
23709         }
23710       else
23711         {
23712           putc ('~', stream);
23713           output_addr_const (stream, x);
23714         }
23715       return;
23716
23717     case 'b':
23718       /* Print the log2 of a CONST_INT.  */
23719       {
23720         HOST_WIDE_INT val;
23721
23722         if (!CONST_INT_P (x)
23723             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
23724           output_operand_lossage ("Unsupported operand for code '%c'", code);
23725         else
23726           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
23727       }
23728       return;
23729
23730     case 'L':
23731       /* The low 16 bits of an immediate constant.  */
23732       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
23733       return;
23734
23735     case 'i':
23736       fprintf (stream, "%s", arithmetic_instr (x, 1));
23737       return;
23738
23739     case 'I':
23740       fprintf (stream, "%s", arithmetic_instr (x, 0));
23741       return;
23742
23743     case 'S':
23744       {
23745         HOST_WIDE_INT val;
23746         const char *shift;
23747
23748         shift = shift_op (x, &val);
23749
23750         if (shift)
23751           {
23752             fprintf (stream, ", %s ", shift);
23753             if (val == -1)
23754               arm_print_operand (stream, XEXP (x, 1), 0);
23755             else
23756               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
23757           }
23758       }
23759       return;
23760
23761       /* An explanation of the 'Q', 'R' and 'H' register operands:
23762
23763          In a pair of registers containing a DI or DF value the 'Q'
23764          operand returns the register number of the register containing
23765          the least significant part of the value.  The 'R' operand returns
23766          the register number of the register containing the most
23767          significant part of the value.
23768
23769          The 'H' operand returns the higher of the two register numbers.
23770          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
23771          same as the 'Q' operand, since the most significant part of the
23772          value is held in the lower number register.  The reverse is true
23773          on systems where WORDS_BIG_ENDIAN is false.
23774
23775          The purpose of these operands is to distinguish between cases
23776          where the endian-ness of the values is important (for example
23777          when they are added together), and cases where the endian-ness
23778          is irrelevant, but the order of register operations is important.
23779          For example when loading a value from memory into a register
23780          pair, the endian-ness does not matter.  Provided that the value
23781          from the lower memory address is put into the lower numbered
23782          register, and the value from the higher address is put into the
23783          higher numbered register, the load will work regardless of whether
23784          the value being loaded is big-wordian or little-wordian.  The
23785          order of the two register loads can matter however, if the address
23786          of the memory location is actually held in one of the registers
23787          being overwritten by the load.
23788
23789          The 'Q' and 'R' constraints are also available for 64-bit
23790          constants.  */
23791     case 'Q':
23792       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
23793         {
23794           rtx part = gen_lowpart (SImode, x);
23795           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
23796           return;
23797         }
23798
23799       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23800         {
23801           output_operand_lossage ("invalid operand for code '%c'", code);
23802           return;
23803         }
23804
23805       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
23806       return;
23807
23808     case 'R':
23809       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
23810         {
23811           machine_mode mode = GET_MODE (x);
23812           rtx part;
23813
23814           if (mode == VOIDmode)
23815             mode = DImode;
23816           part = gen_highpart_mode (SImode, mode, x);
23817           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
23818           return;
23819         }
23820
23821       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23822         {
23823           output_operand_lossage ("invalid operand for code '%c'", code);
23824           return;
23825         }
23826
23827       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
23828       return;
23829
23830     case 'H':
23831       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23832         {
23833           output_operand_lossage ("invalid operand for code '%c'", code);
23834           return;
23835         }
23836
23837       asm_fprintf (stream, "%r", REGNO (x) + 1);
23838       return;
23839
23840     case 'J':
23841       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23842         {
23843           output_operand_lossage ("invalid operand for code '%c'", code);
23844           return;
23845         }
23846
23847       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
23848       return;
23849
23850     case 'K':
23851       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23852         {
23853           output_operand_lossage ("invalid operand for code '%c'", code);
23854           return;
23855         }
23856
23857       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
23858       return;
23859
23860     case 'm':
23861       asm_fprintf (stream, "%r",
23862                    REG_P (XEXP (x, 0))
23863                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
23864       return;
23865
23866     case 'M':
23867       asm_fprintf (stream, "{%r-%r}",
23868                    REGNO (x),
23869                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
23870       return;
23871
23872     /* Like 'M', but writing doubleword vector registers, for use by Neon
23873        insns.  */
23874     case 'h':
23875       {
23876         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
23877         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
23878         if (numregs == 1)
23879           asm_fprintf (stream, "{d%d}", regno);
23880         else
23881           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
23882       }
23883       return;
23884
23885     case 'd':
23886       /* CONST_TRUE_RTX means always -- that's the default.  */
23887       if (x == const_true_rtx)
23888         return;
23889
23890       if (!COMPARISON_P (x))
23891         {
23892           output_operand_lossage ("invalid operand for code '%c'", code);
23893           return;
23894         }
23895
23896       fputs (arm_condition_codes[get_arm_condition_code (x)],
23897              stream);
23898       return;
23899
23900     case 'D':
23901       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
23902          want to do that.  */
23903       if (x == const_true_rtx)
23904         {
23905           output_operand_lossage ("instruction never executed");
23906           return;
23907         }
23908       if (!COMPARISON_P (x))
23909         {
23910           output_operand_lossage ("invalid operand for code '%c'", code);
23911           return;
23912         }
23913
23914       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
23915                                  (get_arm_condition_code (x))],
23916              stream);
23917       return;
23918
23919     case 's':
23920     case 'V':
23921     case 'W':
23922     case 'X':
23923     case 'Y':
23924     case 'Z':
23925       /* Former Maverick support, removed after GCC-4.7.  */
23926       output_operand_lossage ("obsolete Maverick format code '%c'", code);
23927       return;
23928
23929     case 'U':
23930       if (!REG_P (x)
23931           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
23932           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
23933         /* Bad value for wCG register number.  */
23934         {
23935           output_operand_lossage ("invalid operand for code '%c'", code);
23936           return;
23937         }
23938
23939       else
23940         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
23941       return;
23942
23943       /* Print an iWMMXt control register name.  */
23944     case 'w':
23945       if (!CONST_INT_P (x)
23946           || INTVAL (x) < 0
23947           || INTVAL (x) >= 16)
23948         /* Bad value for wC register number.  */
23949         {
23950           output_operand_lossage ("invalid operand for code '%c'", code);
23951           return;
23952         }
23953
23954       else
23955         {
23956           static const char * wc_reg_names [16] =
23957             {
23958               "wCID",  "wCon",  "wCSSF", "wCASF",
23959               "wC4",   "wC5",   "wC6",   "wC7",
23960               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
23961               "wC12",  "wC13",  "wC14",  "wC15"
23962             };
23963
23964           fputs (wc_reg_names [INTVAL (x)], stream);
23965         }
23966       return;
23967
23968     /* Print the high single-precision register of a VFP double-precision
23969        register.  */
23970     case 'p':
23971       {
23972         machine_mode mode = GET_MODE (x);
23973         int regno;
23974
23975         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
23976           {
23977             output_operand_lossage ("invalid operand for code '%c'", code);
23978             return;
23979           }
23980
23981         regno = REGNO (x);
23982         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
23983           {
23984             output_operand_lossage ("invalid operand for code '%c'", code);
23985             return;
23986           }
23987
23988         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
23989       }
23990       return;
23991
23992     /* Print a VFP/Neon double precision or quad precision register name.  */
23993     case 'P':
23994     case 'q':
23995       {
23996         machine_mode mode = GET_MODE (x);
23997         int is_quad = (code == 'q');
23998         int regno;
23999
24000         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24001           {
24002             output_operand_lossage ("invalid operand for code '%c'", code);
24003             return;
24004           }
24005
24006         if (!REG_P (x)
24007             || !IS_VFP_REGNUM (REGNO (x)))
24008           {
24009             output_operand_lossage ("invalid operand for code '%c'", code);
24010             return;
24011           }
24012
24013         regno = REGNO (x);
24014         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24015             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24016           {
24017             output_operand_lossage ("invalid operand for code '%c'", code);
24018             return;
24019           }
24020
24021         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24022           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24023       }
24024       return;
24025
24026     /* These two codes print the low/high doubleword register of a Neon quad
24027        register, respectively.  For pair-structure types, can also print
24028        low/high quadword registers.  */
24029     case 'e':
24030     case 'f':
24031       {
24032         machine_mode mode = GET_MODE (x);
24033         int regno;
24034
24035         if ((GET_MODE_SIZE (mode) != 16
24036              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24037           {
24038             output_operand_lossage ("invalid operand for code '%c'", code);
24039             return;
24040           }
24041
24042         regno = REGNO (x);
24043         if (!NEON_REGNO_OK_FOR_QUAD (regno))
24044           {
24045             output_operand_lossage ("invalid operand for code '%c'", code);
24046             return;
24047           }
24048
24049         if (GET_MODE_SIZE (mode) == 16)
24050           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24051                                   + (code == 'f' ? 1 : 0));
24052         else
24053           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24054                                   + (code == 'f' ? 1 : 0));
24055       }
24056       return;
24057
24058     /* Print a VFPv3 floating-point constant, represented as an integer
24059        index.  */
24060     case 'G':
24061       {
24062         int index = vfp3_const_double_index (x);
24063         gcc_assert (index != -1);
24064         fprintf (stream, "%d", index);
24065       }
24066       return;
24067
24068     /* Print bits representing opcode features for Neon.
24069
24070        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
24071        and polynomials as unsigned.
24072
24073        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24074
24075        Bit 2 is 1 for rounding functions, 0 otherwise.  */
24076
24077     /* Identify the type as 's', 'u', 'p' or 'f'.  */
24078     case 'T':
24079       {
24080         HOST_WIDE_INT bits = INTVAL (x);
24081         fputc ("uspf"[bits & 3], stream);
24082       }
24083       return;
24084
24085     /* Likewise, but signed and unsigned integers are both 'i'.  */
24086     case 'F':
24087       {
24088         HOST_WIDE_INT bits = INTVAL (x);
24089         fputc ("iipf"[bits & 3], stream);
24090       }
24091       return;
24092
24093     /* As for 'T', but emit 'u' instead of 'p'.  */
24094     case 't':
24095       {
24096         HOST_WIDE_INT bits = INTVAL (x);
24097         fputc ("usuf"[bits & 3], stream);
24098       }
24099       return;
24100
24101     /* Bit 2: rounding (vs none).  */
24102     case 'O':
24103       {
24104         HOST_WIDE_INT bits = INTVAL (x);
24105         fputs ((bits & 4) != 0 ? "r" : "", stream);
24106       }
24107       return;
24108
24109     /* Memory operand for vld1/vst1 instruction.  */
24110     case 'A':
24111       {
24112         rtx addr;
24113         bool postinc = FALSE;
24114         rtx postinc_reg = NULL;
24115         unsigned align, memsize, align_bits;
24116
24117         gcc_assert (MEM_P (x));
24118         addr = XEXP (x, 0);
24119         if (GET_CODE (addr) == POST_INC)
24120           {
24121             postinc = 1;
24122             addr = XEXP (addr, 0);
24123           }
24124         if (GET_CODE (addr) == POST_MODIFY)
24125           {
24126             postinc_reg = XEXP( XEXP (addr, 1), 1);
24127             addr = XEXP (addr, 0);
24128           }
24129         asm_fprintf (stream, "[%r", REGNO (addr));
24130
24131         /* We know the alignment of this access, so we can emit a hint in the
24132            instruction (for some alignments) as an aid to the memory subsystem
24133            of the target.  */
24134         align = MEM_ALIGN (x) >> 3;
24135         memsize = MEM_SIZE (x);
24136
24137         /* Only certain alignment specifiers are supported by the hardware.  */
24138         if (memsize == 32 && (align % 32) == 0)
24139           align_bits = 256;
24140         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24141           align_bits = 128;
24142         else if (memsize >= 8 && (align % 8) == 0)
24143           align_bits = 64;
24144         else
24145           align_bits = 0;
24146
24147         if (align_bits != 0)
24148           asm_fprintf (stream, ":%d", align_bits);
24149
24150         asm_fprintf (stream, "]");
24151
24152         if (postinc)
24153           fputs("!", stream);
24154         if (postinc_reg)
24155           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24156       }
24157       return;
24158
24159     /* To print the memory operand with "Ux" or "Uj" constraint.  Based on the
24160        rtx_code the memory operands output looks like following.
24161        1. [Rn], #+/-<imm>
24162        2. [Rn, #+/-<imm>]!
24163        3. [Rn, #+/-<imm>]
24164        4. [Rn].  */
24165     case 'E':
24166       {
24167         rtx addr;
24168         rtx postinc_reg = NULL;
24169         unsigned inc_val = 0;
24170         enum rtx_code code;
24171
24172         gcc_assert (MEM_P (x));
24173         addr = XEXP (x, 0);
24174         code = GET_CODE (addr);
24175         if (code == POST_INC || code == POST_DEC || code == PRE_INC
24176             || code  == PRE_DEC)
24177           {
24178             asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24179             inc_val = GET_MODE_SIZE (GET_MODE (x));
24180             if (code == POST_INC || code == POST_DEC)
24181               asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24182                                               ? "": "-", inc_val);
24183             else
24184               asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24185                                                ? "": "-", inc_val);
24186           }
24187         else if (code == POST_MODIFY || code == PRE_MODIFY)
24188           {
24189             asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24190             postinc_reg = XEXP ( XEXP (x, 1), 1);
24191             if (postinc_reg && CONST_INT_P (postinc_reg))
24192               {
24193                 if (code == POST_MODIFY)
24194                   asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24195                 else
24196                   asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24197               }
24198           }
24199         else if (code == PLUS)
24200           {
24201             rtx base = XEXP (addr, 0);
24202             rtx index = XEXP (addr, 1);
24203
24204             gcc_assert (REG_P (base) && CONST_INT_P (index));
24205
24206             HOST_WIDE_INT offset = INTVAL (index);
24207             asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24208           }
24209         else
24210           {
24211             gcc_assert (REG_P (addr));
24212             asm_fprintf (stream, "[%r]",REGNO (addr));
24213           }
24214       }
24215       return;
24216
24217     case 'C':
24218       {
24219         rtx addr;
24220
24221         gcc_assert (MEM_P (x));
24222         addr = XEXP (x, 0);
24223         gcc_assert (REG_P (addr));
24224         asm_fprintf (stream, "[%r]", REGNO (addr));
24225       }
24226       return;
24227
24228     /* Translate an S register number into a D register number and element index.  */
24229     case 'y':
24230       {
24231         machine_mode mode = GET_MODE (x);
24232         int regno;
24233
24234         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24235           {
24236             output_operand_lossage ("invalid operand for code '%c'", code);
24237             return;
24238           }
24239
24240         regno = REGNO (x);
24241         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24242           {
24243             output_operand_lossage ("invalid operand for code '%c'", code);
24244             return;
24245           }
24246
24247         regno = regno - FIRST_VFP_REGNUM;
24248         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24249       }
24250       return;
24251
24252     case 'v':
24253         gcc_assert (CONST_DOUBLE_P (x));
24254         int result;
24255         result = vfp3_const_double_for_fract_bits (x);
24256         if (result == 0)
24257           result = vfp3_const_double_for_bits (x);
24258         fprintf (stream, "#%d", result);
24259         return;
24260
24261     /* Register specifier for vld1.16/vst1.16.  Translate the S register
24262        number into a D register number and element index.  */
24263     case 'z':
24264       {
24265         machine_mode mode = GET_MODE (x);
24266         int regno;
24267
24268         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24269           {
24270             output_operand_lossage ("invalid operand for code '%c'", code);
24271             return;
24272           }
24273
24274         regno = REGNO (x);
24275         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24276           {
24277             output_operand_lossage ("invalid operand for code '%c'", code);
24278             return;
24279           }
24280
24281         regno = regno - FIRST_VFP_REGNUM;
24282         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24283       }
24284       return;
24285
24286     default:
24287       if (x == 0)
24288         {
24289           output_operand_lossage ("missing operand");
24290           return;
24291         }
24292
24293       switch (GET_CODE (x))
24294         {
24295         case REG:
24296           asm_fprintf (stream, "%r", REGNO (x));
24297           break;
24298
24299         case MEM:
24300           output_address (GET_MODE (x), XEXP (x, 0));
24301           break;
24302
24303         case CONST_DOUBLE:
24304           {
24305             char fpstr[20];
24306             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24307                               sizeof (fpstr), 0, 1);
24308             fprintf (stream, "#%s", fpstr);
24309           }
24310           break;
24311
24312         default:
24313           gcc_assert (GET_CODE (x) != NEG);
24314           fputc ('#', stream);
24315           if (GET_CODE (x) == HIGH)
24316             {
24317               fputs (":lower16:", stream);
24318               x = XEXP (x, 0);
24319             }
24320
24321           output_addr_const (stream, x);
24322           break;
24323         }
24324     }
24325 }
24326 \f
24327 /* Target hook for printing a memory address.  */
24328 static void
24329 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24330 {
24331   if (TARGET_32BIT)
24332     {
24333       int is_minus = GET_CODE (x) == MINUS;
24334
24335       if (REG_P (x))
24336         asm_fprintf (stream, "[%r]", REGNO (x));
24337       else if (GET_CODE (x) == PLUS || is_minus)
24338         {
24339           rtx base = XEXP (x, 0);
24340           rtx index = XEXP (x, 1);
24341           HOST_WIDE_INT offset = 0;
24342           if (!REG_P (base)
24343               || (REG_P (index) && REGNO (index) == SP_REGNUM))
24344             {
24345               /* Ensure that BASE is a register.  */
24346               /* (one of them must be).  */
24347               /* Also ensure the SP is not used as in index register.  */
24348               std::swap (base, index);
24349             }
24350           switch (GET_CODE (index))
24351             {
24352             case CONST_INT:
24353               offset = INTVAL (index);
24354               if (is_minus)
24355                 offset = -offset;
24356               asm_fprintf (stream, "[%r, #%wd]",
24357                            REGNO (base), offset);
24358               break;
24359
24360             case REG:
24361               asm_fprintf (stream, "[%r, %s%r]",
24362                            REGNO (base), is_minus ? "-" : "",
24363                            REGNO (index));
24364               break;
24365
24366             case MULT:
24367             case ASHIFTRT:
24368             case LSHIFTRT:
24369             case ASHIFT:
24370             case ROTATERT:
24371               {
24372                 asm_fprintf (stream, "[%r, %s%r",
24373                              REGNO (base), is_minus ? "-" : "",
24374                              REGNO (XEXP (index, 0)));
24375                 arm_print_operand (stream, index, 'S');
24376                 fputs ("]", stream);
24377                 break;
24378               }
24379
24380             default:
24381               gcc_unreachable ();
24382             }
24383         }
24384       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24385                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24386         {
24387           gcc_assert (REG_P (XEXP (x, 0)));
24388
24389           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24390             asm_fprintf (stream, "[%r, #%s%d]!",
24391                          REGNO (XEXP (x, 0)),
24392                          GET_CODE (x) == PRE_DEC ? "-" : "",
24393                          GET_MODE_SIZE (mode));
24394           else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24395             asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24396           else
24397             asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24398                          GET_CODE (x) == POST_DEC ? "-" : "",
24399                          GET_MODE_SIZE (mode));
24400         }
24401       else if (GET_CODE (x) == PRE_MODIFY)
24402         {
24403           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24404           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24405             asm_fprintf (stream, "#%wd]!",
24406                          INTVAL (XEXP (XEXP (x, 1), 1)));
24407           else
24408             asm_fprintf (stream, "%r]!",
24409                          REGNO (XEXP (XEXP (x, 1), 1)));
24410         }
24411       else if (GET_CODE (x) == POST_MODIFY)
24412         {
24413           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24414           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24415             asm_fprintf (stream, "#%wd",
24416                          INTVAL (XEXP (XEXP (x, 1), 1)));
24417           else
24418             asm_fprintf (stream, "%r",
24419                          REGNO (XEXP (XEXP (x, 1), 1)));
24420         }
24421       else output_addr_const (stream, x);
24422     }
24423   else
24424     {
24425       if (REG_P (x))
24426         asm_fprintf (stream, "[%r]", REGNO (x));
24427       else if (GET_CODE (x) == POST_INC)
24428         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24429       else if (GET_CODE (x) == PLUS)
24430         {
24431           gcc_assert (REG_P (XEXP (x, 0)));
24432           if (CONST_INT_P (XEXP (x, 1)))
24433             asm_fprintf (stream, "[%r, #%wd]",
24434                          REGNO (XEXP (x, 0)),
24435                          INTVAL (XEXP (x, 1)));
24436           else
24437             asm_fprintf (stream, "[%r, %r]",
24438                          REGNO (XEXP (x, 0)),
24439                          REGNO (XEXP (x, 1)));
24440         }
24441       else
24442         output_addr_const (stream, x);
24443     }
24444 }
24445 \f
24446 /* Target hook for indicating whether a punctuation character for
24447    TARGET_PRINT_OPERAND is valid.  */
24448 static bool
24449 arm_print_operand_punct_valid_p (unsigned char code)
24450 {
24451   return (code == '@' || code == '|' || code == '.'
24452           || code == '(' || code == ')' || code == '#'
24453           || (TARGET_32BIT && (code == '?'))
24454           || (TARGET_THUMB2 && (code == '!'))
24455           || (TARGET_THUMB && (code == '_')));
24456 }
24457 \f
24458 /* Target hook for assembling integer objects.  The ARM version needs to
24459    handle word-sized values specially.  */
24460 static bool
24461 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24462 {
24463   machine_mode mode;
24464
24465   if (size == UNITS_PER_WORD && aligned_p)
24466     {
24467       fputs ("\t.word\t", asm_out_file);
24468       output_addr_const (asm_out_file, x);
24469
24470       /* Mark symbols as position independent.  We only do this in the
24471          .text segment, not in the .data segment.  */
24472       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24473           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
24474         {
24475           /* See legitimize_pic_address for an explanation of the
24476              TARGET_VXWORKS_RTP check.  */
24477           /* References to weak symbols cannot be resolved locally:
24478              they may be overridden by a non-weak definition at link
24479              time.  */
24480           if (!arm_pic_data_is_text_relative
24481               || (GET_CODE (x) == SYMBOL_REF
24482                   && (!SYMBOL_REF_LOCAL_P (x)
24483                       || (SYMBOL_REF_DECL (x)
24484                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24485                       || (SYMBOL_REF_FUNCTION_P (x)
24486                           && !arm_fdpic_local_funcdesc_p (x)))))
24487             {
24488               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24489                 fputs ("(GOTFUNCDESC)", asm_out_file);
24490               else
24491                 fputs ("(GOT)", asm_out_file);
24492             }
24493           else
24494             {
24495               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24496                 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24497               else
24498                 {
24499                   bool is_readonly;
24500
24501                   if (!TARGET_FDPIC
24502                       || arm_is_segment_info_known (x, &is_readonly))
24503                     fputs ("(GOTOFF)", asm_out_file);
24504                   else
24505                     fputs ("(GOT)", asm_out_file);
24506                 }
24507             }
24508         }
24509
24510       /* For FDPIC we also have to mark symbol for .data section.  */
24511       if (TARGET_FDPIC
24512           && !making_const_table
24513           && SYMBOL_REF_P (x)
24514           && SYMBOL_REF_FUNCTION_P (x))
24515         fputs ("(FUNCDESC)", asm_out_file);
24516
24517       fputc ('\n', asm_out_file);
24518       return true;
24519     }
24520
24521   mode = GET_MODE (x);
24522
24523   if (arm_vector_mode_supported_p (mode))
24524     {
24525       int i, units;
24526
24527       gcc_assert (GET_CODE (x) == CONST_VECTOR);
24528
24529       units = CONST_VECTOR_NUNITS (x);
24530       size = GET_MODE_UNIT_SIZE (mode);
24531
24532       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24533         for (i = 0; i < units; i++)
24534           {
24535             rtx elt = CONST_VECTOR_ELT (x, i);
24536             assemble_integer
24537               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
24538           }
24539       else
24540         for (i = 0; i < units; i++)
24541           {
24542             rtx elt = CONST_VECTOR_ELT (x, i);
24543             assemble_real
24544               (*CONST_DOUBLE_REAL_VALUE (elt),
24545                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
24546                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
24547           }
24548
24549       return true;
24550     }
24551
24552   return default_assemble_integer (x, size, aligned_p);
24553 }
24554
24555 static void
24556 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
24557 {
24558   section *s;
24559
24560   if (!TARGET_AAPCS_BASED)
24561     {
24562       (is_ctor ?
24563        default_named_section_asm_out_constructor
24564        : default_named_section_asm_out_destructor) (symbol, priority);
24565       return;
24566     }
24567
24568   /* Put these in the .init_array section, using a special relocation.  */
24569   if (priority != DEFAULT_INIT_PRIORITY)
24570     {
24571       char buf[18];
24572       sprintf (buf, "%s.%.5u",
24573                is_ctor ? ".init_array" : ".fini_array",
24574                priority);
24575       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
24576     }
24577   else if (is_ctor)
24578     s = ctors_section;
24579   else
24580     s = dtors_section;
24581
24582   switch_to_section (s);
24583   assemble_align (POINTER_SIZE);
24584   fputs ("\t.word\t", asm_out_file);
24585   output_addr_const (asm_out_file, symbol);
24586   fputs ("(target1)\n", asm_out_file);
24587 }
24588
24589 /* Add a function to the list of static constructors.  */
24590
24591 static void
24592 arm_elf_asm_constructor (rtx symbol, int priority)
24593 {
24594   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
24595 }
24596
24597 /* Add a function to the list of static destructors.  */
24598
24599 static void
24600 arm_elf_asm_destructor (rtx symbol, int priority)
24601 {
24602   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
24603 }
24604 \f
24605 /* A finite state machine takes care of noticing whether or not instructions
24606    can be conditionally executed, and thus decrease execution time and code
24607    size by deleting branch instructions.  The fsm is controlled by
24608    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
24609
24610 /* The state of the fsm controlling condition codes are:
24611    0: normal, do nothing special
24612    1: make ASM_OUTPUT_OPCODE not output this instruction
24613    2: make ASM_OUTPUT_OPCODE not output this instruction
24614    3: make instructions conditional
24615    4: make instructions conditional
24616
24617    State transitions (state->state by whom under condition):
24618    0 -> 1 final_prescan_insn if the `target' is a label
24619    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
24620    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
24621    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
24622    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
24623           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
24624    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
24625           (the target insn is arm_target_insn).
24626
24627    If the jump clobbers the conditions then we use states 2 and 4.
24628
24629    A similar thing can be done with conditional return insns.
24630
24631    XXX In case the `target' is an unconditional branch, this conditionalising
24632    of the instructions always reduces code size, but not always execution
24633    time.  But then, I want to reduce the code size to somewhere near what
24634    /bin/cc produces.  */
24635
24636 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
24637    instructions.  When a COND_EXEC instruction is seen the subsequent
24638    instructions are scanned so that multiple conditional instructions can be
24639    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
24640    specify the length and true/false mask for the IT block.  These will be
24641    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
24642
24643 /* Returns the index of the ARM condition code string in
24644    `arm_condition_codes', or ARM_NV if the comparison is invalid.
24645    COMPARISON should be an rtx like `(eq (...) (...))'.  */
24646
24647 enum arm_cond_code
24648 maybe_get_arm_condition_code (rtx comparison)
24649 {
24650   machine_mode mode = GET_MODE (XEXP (comparison, 0));
24651   enum arm_cond_code code;
24652   enum rtx_code comp_code = GET_CODE (comparison);
24653
24654   if (GET_MODE_CLASS (mode) != MODE_CC)
24655     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
24656                            XEXP (comparison, 1));
24657
24658   switch (mode)
24659     {
24660     case E_CC_DNEmode: code = ARM_NE; goto dominance;
24661     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
24662     case E_CC_DGEmode: code = ARM_GE; goto dominance;
24663     case E_CC_DGTmode: code = ARM_GT; goto dominance;
24664     case E_CC_DLEmode: code = ARM_LE; goto dominance;
24665     case E_CC_DLTmode: code = ARM_LT; goto dominance;
24666     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
24667     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
24668     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
24669     case E_CC_DLTUmode: code = ARM_CC;
24670
24671     dominance:
24672       if (comp_code == EQ)
24673         return ARM_INVERSE_CONDITION_CODE (code);
24674       if (comp_code == NE)
24675         return code;
24676       return ARM_NV;
24677
24678     case E_CC_NZmode:
24679       switch (comp_code)
24680         {
24681         case NE: return ARM_NE;
24682         case EQ: return ARM_EQ;
24683         case GE: return ARM_PL;
24684         case LT: return ARM_MI;
24685         default: return ARM_NV;
24686         }
24687
24688     case E_CC_Zmode:
24689       switch (comp_code)
24690         {
24691         case NE: return ARM_NE;
24692         case EQ: return ARM_EQ;
24693         default: return ARM_NV;
24694         }
24695
24696     case E_CC_Nmode:
24697       switch (comp_code)
24698         {
24699         case NE: return ARM_MI;
24700         case EQ: return ARM_PL;
24701         default: return ARM_NV;
24702         }
24703
24704     case E_CCFPEmode:
24705     case E_CCFPmode:
24706       /* We can handle all cases except UNEQ and LTGT.  */
24707       switch (comp_code)
24708         {
24709         case GE: return ARM_GE;
24710         case GT: return ARM_GT;
24711         case LE: return ARM_LS;
24712         case LT: return ARM_MI;
24713         case NE: return ARM_NE;
24714         case EQ: return ARM_EQ;
24715         case ORDERED: return ARM_VC;
24716         case UNORDERED: return ARM_VS;
24717         case UNLT: return ARM_LT;
24718         case UNLE: return ARM_LE;
24719         case UNGT: return ARM_HI;
24720         case UNGE: return ARM_PL;
24721           /* UNEQ and LTGT do not have a representation.  */
24722         case UNEQ: /* Fall through.  */
24723         case LTGT: /* Fall through.  */
24724         default: return ARM_NV;
24725         }
24726
24727     case E_CC_SWPmode:
24728       switch (comp_code)
24729         {
24730         case NE: return ARM_NE;
24731         case EQ: return ARM_EQ;
24732         case GE: return ARM_LE;
24733         case GT: return ARM_LT;
24734         case LE: return ARM_GE;
24735         case LT: return ARM_GT;
24736         case GEU: return ARM_LS;
24737         case GTU: return ARM_CC;
24738         case LEU: return ARM_CS;
24739         case LTU: return ARM_HI;
24740         default: return ARM_NV;
24741         }
24742
24743     case E_CC_Cmode:
24744       switch (comp_code)
24745         {
24746         case LTU: return ARM_CS;
24747         case GEU: return ARM_CC;
24748         default: return ARM_NV;
24749         }
24750
24751     case E_CC_NVmode:
24752       switch (comp_code)
24753         {
24754         case GE: return ARM_GE;
24755         case LT: return ARM_LT;
24756         default: return ARM_NV;
24757         }
24758
24759     case E_CC_Bmode:
24760       switch (comp_code)
24761         {
24762         case GEU: return ARM_CS;
24763         case LTU: return ARM_CC;
24764         default: return ARM_NV;
24765         }
24766
24767     case E_CC_Vmode:
24768       switch (comp_code)
24769         {
24770         case NE: return ARM_VS;
24771         case EQ: return ARM_VC;
24772         default: return ARM_NV;
24773         }
24774
24775     case E_CC_ADCmode:
24776       switch (comp_code)
24777         {
24778         case GEU: return ARM_CS;
24779         case LTU: return ARM_CC;
24780         default: return ARM_NV;
24781         }
24782
24783     case E_CCmode:
24784     case E_CC_RSBmode:
24785       switch (comp_code)
24786         {
24787         case NE: return ARM_NE;
24788         case EQ: return ARM_EQ;
24789         case GE: return ARM_GE;
24790         case GT: return ARM_GT;
24791         case LE: return ARM_LE;
24792         case LT: return ARM_LT;
24793         case GEU: return ARM_CS;
24794         case GTU: return ARM_HI;
24795         case LEU: return ARM_LS;
24796         case LTU: return ARM_CC;
24797         default: return ARM_NV;
24798         }
24799
24800     default: gcc_unreachable ();
24801     }
24802 }
24803
24804 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
24805 static enum arm_cond_code
24806 get_arm_condition_code (rtx comparison)
24807 {
24808   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
24809   gcc_assert (code != ARM_NV);
24810   return code;
24811 }
24812
24813 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
24814    code registers when not targetting Thumb1.  The VFP condition register
24815    only exists when generating hard-float code.  */
24816 static bool
24817 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
24818 {
24819   if (!TARGET_32BIT)
24820     return false;
24821
24822   *p1 = CC_REGNUM;
24823   *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
24824   return true;
24825 }
24826
24827 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
24828    instructions.  */
24829 void
24830 thumb2_final_prescan_insn (rtx_insn *insn)
24831 {
24832   rtx_insn *first_insn = insn;
24833   rtx body = PATTERN (insn);
24834   rtx predicate;
24835   enum arm_cond_code code;
24836   int n;
24837   int mask;
24838   int max;
24839
24840   /* max_insns_skipped in the tune was already taken into account in the
24841      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
24842      just emit the IT blocks as we can.  It does not make sense to split
24843      the IT blocks.  */
24844   max = MAX_INSN_PER_IT_BLOCK;
24845
24846   /* Remove the previous insn from the count of insns to be output.  */
24847   if (arm_condexec_count)
24848       arm_condexec_count--;
24849
24850   /* Nothing to do if we are already inside a conditional block.  */
24851   if (arm_condexec_count)
24852     return;
24853
24854   if (GET_CODE (body) != COND_EXEC)
24855     return;
24856
24857   /* Conditional jumps are implemented directly.  */
24858   if (JUMP_P (insn))
24859     return;
24860
24861   predicate = COND_EXEC_TEST (body);
24862   arm_current_cc = get_arm_condition_code (predicate);
24863
24864   n = get_attr_ce_count (insn);
24865   arm_condexec_count = 1;
24866   arm_condexec_mask = (1 << n) - 1;
24867   arm_condexec_masklen = n;
24868   /* See if subsequent instructions can be combined into the same block.  */
24869   for (;;)
24870     {
24871       insn = next_nonnote_insn (insn);
24872
24873       /* Jumping into the middle of an IT block is illegal, so a label or
24874          barrier terminates the block.  */
24875       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
24876         break;
24877
24878       body = PATTERN (insn);
24879       /* USE and CLOBBER aren't really insns, so just skip them.  */
24880       if (GET_CODE (body) == USE
24881           || GET_CODE (body) == CLOBBER)
24882         continue;
24883
24884       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
24885       if (GET_CODE (body) != COND_EXEC)
24886         break;
24887       /* Maximum number of conditionally executed instructions in a block.  */
24888       n = get_attr_ce_count (insn);
24889       if (arm_condexec_masklen + n > max)
24890         break;
24891
24892       predicate = COND_EXEC_TEST (body);
24893       code = get_arm_condition_code (predicate);
24894       mask = (1 << n) - 1;
24895       if (arm_current_cc == code)
24896         arm_condexec_mask |= (mask << arm_condexec_masklen);
24897       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
24898         break;
24899
24900       arm_condexec_count++;
24901       arm_condexec_masklen += n;
24902
24903       /* A jump must be the last instruction in a conditional block.  */
24904       if (JUMP_P (insn))
24905         break;
24906     }
24907   /* Restore recog_data (getting the attributes of other insns can
24908      destroy this array, but final.c assumes that it remains intact
24909      across this call).  */
24910   extract_constrain_insn_cached (first_insn);
24911 }
24912
24913 void
24914 arm_final_prescan_insn (rtx_insn *insn)
24915 {
24916   /* BODY will hold the body of INSN.  */
24917   rtx body = PATTERN (insn);
24918
24919   /* This will be 1 if trying to repeat the trick, and things need to be
24920      reversed if it appears to fail.  */
24921   int reverse = 0;
24922
24923   /* If we start with a return insn, we only succeed if we find another one.  */
24924   int seeking_return = 0;
24925   enum rtx_code return_code = UNKNOWN;
24926
24927   /* START_INSN will hold the insn from where we start looking.  This is the
24928      first insn after the following code_label if REVERSE is true.  */
24929   rtx_insn *start_insn = insn;
24930
24931   /* If in state 4, check if the target branch is reached, in order to
24932      change back to state 0.  */
24933   if (arm_ccfsm_state == 4)
24934     {
24935       if (insn == arm_target_insn)
24936         {
24937           arm_target_insn = NULL;
24938           arm_ccfsm_state = 0;
24939         }
24940       return;
24941     }
24942
24943   /* If in state 3, it is possible to repeat the trick, if this insn is an
24944      unconditional branch to a label, and immediately following this branch
24945      is the previous target label which is only used once, and the label this
24946      branch jumps to is not too far off.  */
24947   if (arm_ccfsm_state == 3)
24948     {
24949       if (simplejump_p (insn))
24950         {
24951           start_insn = next_nonnote_insn (start_insn);
24952           if (BARRIER_P (start_insn))
24953             {
24954               /* XXX Isn't this always a barrier?  */
24955               start_insn = next_nonnote_insn (start_insn);
24956             }
24957           if (LABEL_P (start_insn)
24958               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
24959               && LABEL_NUSES (start_insn) == 1)
24960             reverse = TRUE;
24961           else
24962             return;
24963         }
24964       else if (ANY_RETURN_P (body))
24965         {
24966           start_insn = next_nonnote_insn (start_insn);
24967           if (BARRIER_P (start_insn))
24968             start_insn = next_nonnote_insn (start_insn);
24969           if (LABEL_P (start_insn)
24970               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
24971               && LABEL_NUSES (start_insn) == 1)
24972             {
24973               reverse = TRUE;
24974               seeking_return = 1;
24975               return_code = GET_CODE (body);
24976             }
24977           else
24978             return;
24979         }
24980       else
24981         return;
24982     }
24983
24984   gcc_assert (!arm_ccfsm_state || reverse);
24985   if (!JUMP_P (insn))
24986     return;
24987
24988   /* This jump might be paralleled with a clobber of the condition codes
24989      the jump should always come first */
24990   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
24991     body = XVECEXP (body, 0, 0);
24992
24993   if (reverse
24994       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
24995           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
24996     {
24997       int insns_skipped;
24998       int fail = FALSE, succeed = FALSE;
24999       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
25000       int then_not_else = TRUE;
25001       rtx_insn *this_insn = start_insn;
25002       rtx label = 0;
25003
25004       /* Register the insn jumped to.  */
25005       if (reverse)
25006         {
25007           if (!seeking_return)
25008             label = XEXP (SET_SRC (body), 0);
25009         }
25010       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25011         label = XEXP (XEXP (SET_SRC (body), 1), 0);
25012       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25013         {
25014           label = XEXP (XEXP (SET_SRC (body), 2), 0);
25015           then_not_else = FALSE;
25016         }
25017       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25018         {
25019           seeking_return = 1;
25020           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25021         }
25022       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25023         {
25024           seeking_return = 1;
25025           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25026           then_not_else = FALSE;
25027         }
25028       else
25029         gcc_unreachable ();
25030
25031       /* See how many insns this branch skips, and what kind of insns.  If all
25032          insns are okay, and the label or unconditional branch to the same
25033          label is not too far away, succeed.  */
25034       for (insns_skipped = 0;
25035            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25036         {
25037           rtx scanbody;
25038
25039           this_insn = next_nonnote_insn (this_insn);
25040           if (!this_insn)
25041             break;
25042
25043           switch (GET_CODE (this_insn))
25044             {
25045             case CODE_LABEL:
25046               /* Succeed if it is the target label, otherwise fail since
25047                  control falls in from somewhere else.  */
25048               if (this_insn == label)
25049                 {
25050                   arm_ccfsm_state = 1;
25051                   succeed = TRUE;
25052                 }
25053               else
25054                 fail = TRUE;
25055               break;
25056
25057             case BARRIER:
25058               /* Succeed if the following insn is the target label.
25059                  Otherwise fail.
25060                  If return insns are used then the last insn in a function
25061                  will be a barrier.  */
25062               this_insn = next_nonnote_insn (this_insn);
25063               if (this_insn && this_insn == label)
25064                 {
25065                   arm_ccfsm_state = 1;
25066                   succeed = TRUE;
25067                 }
25068               else
25069                 fail = TRUE;
25070               break;
25071
25072             case CALL_INSN:
25073               /* The AAPCS says that conditional calls should not be
25074                  used since they make interworking inefficient (the
25075                  linker can't transform BL<cond> into BLX).  That's
25076                  only a problem if the machine has BLX.  */
25077               if (arm_arch5t)
25078                 {
25079                   fail = TRUE;
25080                   break;
25081                 }
25082
25083               /* Succeed if the following insn is the target label, or
25084                  if the following two insns are a barrier and the
25085                  target label.  */
25086               this_insn = next_nonnote_insn (this_insn);
25087               if (this_insn && BARRIER_P (this_insn))
25088                 this_insn = next_nonnote_insn (this_insn);
25089
25090               if (this_insn && this_insn == label
25091                   && insns_skipped < max_insns_skipped)
25092                 {
25093                   arm_ccfsm_state = 1;
25094                   succeed = TRUE;
25095                 }
25096               else
25097                 fail = TRUE;
25098               break;
25099
25100             case JUMP_INSN:
25101               /* If this is an unconditional branch to the same label, succeed.
25102                  If it is to another label, do nothing.  If it is conditional,
25103                  fail.  */
25104               /* XXX Probably, the tests for SET and the PC are
25105                  unnecessary.  */
25106
25107               scanbody = PATTERN (this_insn);
25108               if (GET_CODE (scanbody) == SET
25109                   && GET_CODE (SET_DEST (scanbody)) == PC)
25110                 {
25111                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25112                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25113                     {
25114                       arm_ccfsm_state = 2;
25115                       succeed = TRUE;
25116                     }
25117                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25118                     fail = TRUE;
25119                 }
25120               /* Fail if a conditional return is undesirable (e.g. on a
25121                  StrongARM), but still allow this if optimizing for size.  */
25122               else if (GET_CODE (scanbody) == return_code
25123                        && !use_return_insn (TRUE, NULL)
25124                        && !optimize_size)
25125                 fail = TRUE;
25126               else if (GET_CODE (scanbody) == return_code)
25127                 {
25128                   arm_ccfsm_state = 2;
25129                   succeed = TRUE;
25130                 }
25131               else if (GET_CODE (scanbody) == PARALLEL)
25132                 {
25133                   switch (get_attr_conds (this_insn))
25134                     {
25135                     case CONDS_NOCOND:
25136                       break;
25137                     default:
25138                       fail = TRUE;
25139                       break;
25140                     }
25141                 }
25142               else
25143                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
25144
25145               break;
25146
25147             case INSN:
25148               /* Instructions using or affecting the condition codes make it
25149                  fail.  */
25150               scanbody = PATTERN (this_insn);
25151               if (!(GET_CODE (scanbody) == SET
25152                     || GET_CODE (scanbody) == PARALLEL)
25153                   || get_attr_conds (this_insn) != CONDS_NOCOND)
25154                 fail = TRUE;
25155               break;
25156
25157             default:
25158               break;
25159             }
25160         }
25161       if (succeed)
25162         {
25163           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25164             arm_target_label = CODE_LABEL_NUMBER (label);
25165           else
25166             {
25167               gcc_assert (seeking_return || arm_ccfsm_state == 2);
25168
25169               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25170                 {
25171                   this_insn = next_nonnote_insn (this_insn);
25172                   gcc_assert (!this_insn
25173                               || (!BARRIER_P (this_insn)
25174                                   && !LABEL_P (this_insn)));
25175                 }
25176               if (!this_insn)
25177                 {
25178                   /* Oh, dear! we ran off the end.. give up.  */
25179                   extract_constrain_insn_cached (insn);
25180                   arm_ccfsm_state = 0;
25181                   arm_target_insn = NULL;
25182                   return;
25183                 }
25184               arm_target_insn = this_insn;
25185             }
25186
25187           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25188              what it was.  */
25189           if (!reverse)
25190             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25191
25192           if (reverse || then_not_else)
25193             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25194         }
25195
25196       /* Restore recog_data (getting the attributes of other insns can
25197          destroy this array, but final.c assumes that it remains intact
25198          across this call.  */
25199       extract_constrain_insn_cached (insn);
25200     }
25201 }
25202
25203 /* Output IT instructions.  */
25204 void
25205 thumb2_asm_output_opcode (FILE * stream)
25206 {
25207   char buff[5];
25208   int n;
25209
25210   if (arm_condexec_mask)
25211     {
25212       for (n = 0; n < arm_condexec_masklen; n++)
25213         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25214       buff[n] = 0;
25215       asm_fprintf(stream, "i%s\t%s\n\t", buff,
25216                   arm_condition_codes[arm_current_cc]);
25217       arm_condexec_mask = 0;
25218     }
25219 }
25220
25221 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
25222    UNITS_PER_WORD bytes wide.  */
25223 static unsigned int
25224 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25225 {
25226   if (TARGET_32BIT
25227       && regno > PC_REGNUM
25228       && regno != FRAME_POINTER_REGNUM
25229       && regno != ARG_POINTER_REGNUM
25230       && !IS_VFP_REGNUM (regno))
25231     return 1;
25232
25233   return ARM_NUM_REGS (mode);
25234 }
25235
25236 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
25237 static bool
25238 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25239 {
25240   if (GET_MODE_CLASS (mode) == MODE_CC)
25241     return (regno == CC_REGNUM
25242             || (TARGET_VFP_BASE
25243                 && regno == VFPCC_REGNUM));
25244
25245   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25246     return false;
25247
25248   if (IS_VPR_REGNUM (regno))
25249     return true;
25250
25251   if (TARGET_THUMB1)
25252     /* For the Thumb we only allow values bigger than SImode in
25253        registers 0 - 6, so that there is always a second low
25254        register available to hold the upper part of the value.
25255        We probably we ought to ensure that the register is the
25256        start of an even numbered register pair.  */
25257     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25258
25259   if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25260     {
25261       if (mode == DFmode || mode == DImode)
25262         return VFP_REGNO_OK_FOR_DOUBLE (regno);
25263
25264       if (mode == HFmode || mode == BFmode || mode == HImode
25265           || mode == SFmode || mode == SImode)
25266         return VFP_REGNO_OK_FOR_SINGLE (regno);
25267
25268       if (TARGET_NEON)
25269         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25270                || (VALID_NEON_QREG_MODE (mode)
25271                    && NEON_REGNO_OK_FOR_QUAD (regno))
25272                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25273                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25274                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25275                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25276                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25277      if (TARGET_HAVE_MVE)
25278        return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25279                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25280                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25281
25282       return false;
25283     }
25284
25285   if (TARGET_REALLY_IWMMXT)
25286     {
25287       if (IS_IWMMXT_GR_REGNUM (regno))
25288         return mode == SImode;
25289
25290       if (IS_IWMMXT_REGNUM (regno))
25291         return VALID_IWMMXT_REG_MODE (mode);
25292     }
25293
25294   /* We allow almost any value to be stored in the general registers.
25295      Restrict doubleword quantities to even register pairs in ARM state
25296      so that we can use ldrd. The same restriction applies for MVE
25297      in order to support Armv8.1-M Mainline instructions.
25298      Do not allow very large Neon structure  opaque modes in general
25299      registers; they would use too many.  */
25300   if (regno <= LAST_ARM_REGNUM)
25301     {
25302       if (ARM_NUM_REGS (mode) > 4)
25303         return false;
25304
25305       if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25306         return true;
25307
25308       return !((TARGET_LDRD || TARGET_CDE)
25309                && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25310     }
25311
25312   if (regno == FRAME_POINTER_REGNUM
25313       || regno == ARG_POINTER_REGNUM)
25314     /* We only allow integers in the fake hard registers.  */
25315     return GET_MODE_CLASS (mode) == MODE_INT;
25316
25317   return false;
25318 }
25319
25320 /* Implement TARGET_MODES_TIEABLE_P.  */
25321
25322 static bool
25323 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25324 {
25325   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25326     return true;
25327
25328   /* We specifically want to allow elements of "structure" modes to
25329      be tieable to the structure.  This more general condition allows
25330      other rarer situations too.  */
25331   if ((TARGET_NEON
25332        && (VALID_NEON_DREG_MODE (mode1)
25333            || VALID_NEON_QREG_MODE (mode1)
25334            || VALID_NEON_STRUCT_MODE (mode1))
25335        && (VALID_NEON_DREG_MODE (mode2)
25336            || VALID_NEON_QREG_MODE (mode2)
25337            || VALID_NEON_STRUCT_MODE (mode2)))
25338       || (TARGET_HAVE_MVE
25339           && (VALID_MVE_MODE (mode1)
25340               || VALID_MVE_STRUCT_MODE (mode1))
25341           && (VALID_MVE_MODE (mode2)
25342               || VALID_MVE_STRUCT_MODE (mode2))))
25343     return true;
25344
25345   return false;
25346 }
25347
25348 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25349    not used in arm mode.  */
25350
25351 enum reg_class
25352 arm_regno_class (int regno)
25353 {
25354   if (regno == PC_REGNUM)
25355     return NO_REGS;
25356
25357   if (IS_VPR_REGNUM (regno))
25358     return VPR_REG;
25359
25360   if (TARGET_THUMB1)
25361     {
25362       if (regno == STACK_POINTER_REGNUM)
25363         return STACK_REG;
25364       if (regno == CC_REGNUM)
25365         return CC_REG;
25366       if (regno < 8)
25367         return LO_REGS;
25368       return HI_REGS;
25369     }
25370
25371   if (TARGET_THUMB2 && regno < 8)
25372     return LO_REGS;
25373
25374   if (   regno <= LAST_ARM_REGNUM
25375       || regno == FRAME_POINTER_REGNUM
25376       || regno == ARG_POINTER_REGNUM)
25377     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25378
25379   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25380     return TARGET_THUMB2 ? CC_REG : NO_REGS;
25381
25382   if (IS_VFP_REGNUM (regno))
25383     {
25384       if (regno <= D7_VFP_REGNUM)
25385         return VFP_D0_D7_REGS;
25386       else if (regno <= LAST_LO_VFP_REGNUM)
25387         return VFP_LO_REGS;
25388       else
25389         return VFP_HI_REGS;
25390     }
25391
25392   if (IS_IWMMXT_REGNUM (regno))
25393     return IWMMXT_REGS;
25394
25395   if (IS_IWMMXT_GR_REGNUM (regno))
25396     return IWMMXT_GR_REGS;
25397
25398   return NO_REGS;
25399 }
25400
25401 /* Handle a special case when computing the offset
25402    of an argument from the frame pointer.  */
25403 int
25404 arm_debugger_arg_offset (int value, rtx addr)
25405 {
25406   rtx_insn *insn;
25407
25408   /* We are only interested if dbxout_parms() failed to compute the offset.  */
25409   if (value != 0)
25410     return 0;
25411
25412   /* We can only cope with the case where the address is held in a register.  */
25413   if (!REG_P (addr))
25414     return 0;
25415
25416   /* If we are using the frame pointer to point at the argument, then
25417      an offset of 0 is correct.  */
25418   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25419     return 0;
25420
25421   /* If we are using the stack pointer to point at the
25422      argument, then an offset of 0 is correct.  */
25423   /* ??? Check this is consistent with thumb2 frame layout.  */
25424   if ((TARGET_THUMB || !frame_pointer_needed)
25425       && REGNO (addr) == SP_REGNUM)
25426     return 0;
25427
25428   /* Oh dear.  The argument is pointed to by a register rather
25429      than being held in a register, or being stored at a known
25430      offset from the frame pointer.  Since GDB only understands
25431      those two kinds of argument we must translate the address
25432      held in the register into an offset from the frame pointer.
25433      We do this by searching through the insns for the function
25434      looking to see where this register gets its value.  If the
25435      register is initialized from the frame pointer plus an offset
25436      then we are in luck and we can continue, otherwise we give up.
25437
25438      This code is exercised by producing debugging information
25439      for a function with arguments like this:
25440
25441            double func (double a, double b, int c, double d) {return d;}
25442
25443      Without this code the stab for parameter 'd' will be set to
25444      an offset of 0 from the frame pointer, rather than 8.  */
25445
25446   /* The if() statement says:
25447
25448      If the insn is a normal instruction
25449      and if the insn is setting the value in a register
25450      and if the register being set is the register holding the address of the argument
25451      and if the address is computing by an addition
25452      that involves adding to a register
25453      which is the frame pointer
25454      a constant integer
25455
25456      then...  */
25457
25458   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25459     {
25460       if (   NONJUMP_INSN_P (insn)
25461           && GET_CODE (PATTERN (insn)) == SET
25462           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25463           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25464           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25465           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25466           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25467              )
25468         {
25469           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25470
25471           break;
25472         }
25473     }
25474
25475   if (value == 0)
25476     {
25477       debug_rtx (addr);
25478       warning (0, "unable to compute real location of stacked parameter");
25479       value = 8; /* XXX magic hack */
25480     }
25481
25482   return value;
25483 }
25484 \f
25485 /* Implement TARGET_PROMOTED_TYPE.  */
25486
25487 static tree
25488 arm_promoted_type (const_tree t)
25489 {
25490   if (SCALAR_FLOAT_TYPE_P (t)
25491       && TYPE_PRECISION (t) == 16
25492       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25493     return float_type_node;
25494   return NULL_TREE;
25495 }
25496
25497 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25498    This simply adds HFmode as a supported mode; even though we don't
25499    implement arithmetic on this type directly, it's supported by
25500    optabs conversions, much the way the double-word arithmetic is
25501    special-cased in the default hook.  */
25502
25503 static bool
25504 arm_scalar_mode_supported_p (scalar_mode mode)
25505 {
25506   if (mode == HFmode)
25507     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25508   else if (ALL_FIXED_POINT_MODE_P (mode))
25509     return true;
25510   else
25511     return default_scalar_mode_supported_p (mode);
25512 }
25513
25514 /* Set the value of FLT_EVAL_METHOD.
25515    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25516
25517     0: evaluate all operations and constants, whose semantic type has at
25518        most the range and precision of type float, to the range and
25519        precision of float; evaluate all other operations and constants to
25520        the range and precision of the semantic type;
25521
25522     N, where _FloatN is a supported interchange floating type
25523        evaluate all operations and constants, whose semantic type has at
25524        most the range and precision of _FloatN type, to the range and
25525        precision of the _FloatN type; evaluate all other operations and
25526        constants to the range and precision of the semantic type;
25527
25528    If we have the ARMv8.2-A extensions then we support _Float16 in native
25529    precision, so we should set this to 16.  Otherwise, we support the type,
25530    but want to evaluate expressions in float precision, so set this to
25531    0.  */
25532
25533 static enum flt_eval_method
25534 arm_excess_precision (enum excess_precision_type type)
25535 {
25536   switch (type)
25537     {
25538       case EXCESS_PRECISION_TYPE_FAST:
25539       case EXCESS_PRECISION_TYPE_STANDARD:
25540         /* We can calculate either in 16-bit range and precision or
25541            32-bit range and precision.  Make that decision based on whether
25542            we have native support for the ARMv8.2-A 16-bit floating-point
25543            instructions or not.  */
25544         return (TARGET_VFP_FP16INST
25545                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25546                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
25547       case EXCESS_PRECISION_TYPE_IMPLICIT:
25548         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25549       default:
25550         gcc_unreachable ();
25551     }
25552   return FLT_EVAL_METHOD_UNPREDICTABLE;
25553 }
25554
25555
25556 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
25557    _Float16 if we are using anything other than ieee format for 16-bit
25558    floating point.  Otherwise, punt to the default implementation.  */
25559 static opt_scalar_float_mode
25560 arm_floatn_mode (int n, bool extended)
25561 {
25562   if (!extended && n == 16)
25563     {
25564       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
25565         return HFmode;
25566       return opt_scalar_float_mode ();
25567     }
25568
25569   return default_floatn_mode (n, extended);
25570 }
25571
25572
25573 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25574    not to early-clobber SRC registers in the process.
25575
25576    We assume that the operands described by SRC and DEST represent a
25577    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
25578    number of components into which the copy has been decomposed.  */
25579 void
25580 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25581 {
25582   unsigned int i;
25583
25584   if (!reg_overlap_mentioned_p (operands[0], operands[1])
25585       || REGNO (operands[0]) < REGNO (operands[1]))
25586     {
25587       for (i = 0; i < count; i++)
25588         {
25589           operands[2 * i] = dest[i];
25590           operands[2 * i + 1] = src[i];
25591         }
25592     }
25593   else
25594     {
25595       for (i = 0; i < count; i++)
25596         {
25597           operands[2 * i] = dest[count - i - 1];
25598           operands[2 * i + 1] = src[count - i - 1];
25599         }
25600     }
25601 }
25602
25603 /* Split operands into moves from op[1] + op[2] into op[0].  */
25604
25605 void
25606 neon_split_vcombine (rtx operands[3])
25607 {
25608   unsigned int dest = REGNO (operands[0]);
25609   unsigned int src1 = REGNO (operands[1]);
25610   unsigned int src2 = REGNO (operands[2]);
25611   machine_mode halfmode = GET_MODE (operands[1]);
25612   unsigned int halfregs = REG_NREGS (operands[1]);
25613   rtx destlo, desthi;
25614
25615   if (src1 == dest && src2 == dest + halfregs)
25616     {
25617       /* No-op move.  Can't split to nothing; emit something.  */
25618       emit_note (NOTE_INSN_DELETED);
25619       return;
25620     }
25621
25622   /* Preserve register attributes for variable tracking.  */
25623   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25624   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25625                                GET_MODE_SIZE (halfmode));
25626
25627   /* Special case of reversed high/low parts.  Use VSWP.  */
25628   if (src2 == dest && src1 == dest + halfregs)
25629     {
25630       rtx x = gen_rtx_SET (destlo, operands[1]);
25631       rtx y = gen_rtx_SET (desthi, operands[2]);
25632       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25633       return;
25634     }
25635
25636   if (!reg_overlap_mentioned_p (operands[2], destlo))
25637     {
25638       /* Try to avoid unnecessary moves if part of the result
25639          is in the right place already.  */
25640       if (src1 != dest)
25641         emit_move_insn (destlo, operands[1]);
25642       if (src2 != dest + halfregs)
25643         emit_move_insn (desthi, operands[2]);
25644     }
25645   else
25646     {
25647       if (src2 != dest + halfregs)
25648         emit_move_insn (desthi, operands[2]);
25649       if (src1 != dest)
25650         emit_move_insn (destlo, operands[1]);
25651     }
25652 }
25653 \f
25654 /* Return the number (counting from 0) of
25655    the least significant set bit in MASK.  */
25656
25657 inline static int
25658 number_of_first_bit_set (unsigned mask)
25659 {
25660   return ctz_hwi (mask);
25661 }
25662
25663 /* Like emit_multi_reg_push, but allowing for a different set of
25664    registers to be described as saved.  MASK is the set of registers
25665    to be saved; REAL_REGS is the set of registers to be described as
25666    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
25667
25668 static rtx_insn *
25669 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25670 {
25671   unsigned long regno;
25672   rtx par[10], tmp, reg;
25673   rtx_insn *insn;
25674   int i, j;
25675
25676   /* Build the parallel of the registers actually being stored.  */
25677   for (i = 0; mask; ++i, mask &= mask - 1)
25678     {
25679       regno = ctz_hwi (mask);
25680       reg = gen_rtx_REG (SImode, regno);
25681
25682       if (i == 0)
25683         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
25684       else
25685         tmp = gen_rtx_USE (VOIDmode, reg);
25686
25687       par[i] = tmp;
25688     }
25689
25690   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25691   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
25692   tmp = gen_frame_mem (BLKmode, tmp);
25693   tmp = gen_rtx_SET (tmp, par[0]);
25694   par[0] = tmp;
25695
25696   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
25697   insn = emit_insn (tmp);
25698
25699   /* Always build the stack adjustment note for unwind info.  */
25700   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25701   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
25702   par[0] = tmp;
25703
25704   /* Build the parallel of the registers recorded as saved for unwind.  */
25705   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
25706     {
25707       regno = ctz_hwi (real_regs);
25708       reg = gen_rtx_REG (SImode, regno);
25709
25710       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
25711       tmp = gen_frame_mem (SImode, tmp);
25712       tmp = gen_rtx_SET (tmp, reg);
25713       RTX_FRAME_RELATED_P (tmp) = 1;
25714       par[j + 1] = tmp;
25715     }
25716
25717   if (j == 0)
25718     tmp = par[0];
25719   else
25720     {
25721       RTX_FRAME_RELATED_P (par[0]) = 1;
25722       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
25723     }
25724
25725   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
25726
25727   return insn;
25728 }
25729
25730 /* Emit code to push or pop registers to or from the stack.  F is the
25731    assembly file.  MASK is the registers to pop.  */
25732 static void
25733 thumb_pop (FILE *f, unsigned long mask)
25734 {
25735   int regno;
25736   int lo_mask = mask & 0xFF;
25737
25738   gcc_assert (mask);
25739
25740   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
25741     {
25742       /* Special case.  Do not generate a POP PC statement here, do it in
25743          thumb_exit() */
25744       thumb_exit (f, -1);
25745       return;
25746     }
25747
25748   fprintf (f, "\tpop\t{");
25749
25750   /* Look at the low registers first.  */
25751   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
25752     {
25753       if (lo_mask & 1)
25754         {
25755           asm_fprintf (f, "%r", regno);
25756
25757           if ((lo_mask & ~1) != 0)
25758             fprintf (f, ", ");
25759         }
25760     }
25761
25762   if (mask & (1 << PC_REGNUM))
25763     {
25764       /* Catch popping the PC.  */
25765       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
25766           || IS_CMSE_ENTRY (arm_current_func_type ()))
25767         {
25768           /* The PC is never poped directly, instead
25769              it is popped into r3 and then BX is used.  */
25770           fprintf (f, "}\n");
25771
25772           thumb_exit (f, -1);
25773
25774           return;
25775         }
25776       else
25777         {
25778           if (mask & 0xFF)
25779             fprintf (f, ", ");
25780
25781           asm_fprintf (f, "%r", PC_REGNUM);
25782         }
25783     }
25784
25785   fprintf (f, "}\n");
25786 }
25787
25788 /* Generate code to return from a thumb function.
25789    If 'reg_containing_return_addr' is -1, then the return address is
25790    actually on the stack, at the stack pointer.
25791
25792    Note: do not forget to update length attribute of corresponding insn pattern
25793    when changing assembly output (eg. length attribute of epilogue_insns when
25794    updating Armv8-M Baseline Security Extensions register clearing
25795    sequences).  */
25796 static void
25797 thumb_exit (FILE *f, int reg_containing_return_addr)
25798 {
25799   unsigned regs_available_for_popping;
25800   unsigned regs_to_pop;
25801   int pops_needed;
25802   unsigned available;
25803   unsigned required;
25804   machine_mode mode;
25805   int size;
25806   int restore_a4 = FALSE;
25807
25808   /* Compute the registers we need to pop.  */
25809   regs_to_pop = 0;
25810   pops_needed = 0;
25811
25812   if (reg_containing_return_addr == -1)
25813     {
25814       regs_to_pop |= 1 << LR_REGNUM;
25815       ++pops_needed;
25816     }
25817
25818   if (TARGET_BACKTRACE)
25819     {
25820       /* Restore the (ARM) frame pointer and stack pointer.  */
25821       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
25822       pops_needed += 2;
25823     }
25824
25825   /* If there is nothing to pop then just emit the BX instruction and
25826      return.  */
25827   if (pops_needed == 0)
25828     {
25829       if (crtl->calls_eh_return)
25830         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25831
25832       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25833         {
25834           /* For Armv8.1-M, this is cleared as part of the CLRM instruction
25835              emitted by cmse_nonsecure_entry_clear_before_return ().  */
25836           if (!TARGET_HAVE_FPCXT_CMSE)
25837             asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
25838                          reg_containing_return_addr);
25839           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
25840         }
25841       else
25842         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25843       return;
25844     }
25845   /* Otherwise if we are not supporting interworking and we have not created
25846      a backtrace structure and the function was not entered in ARM mode then
25847      just pop the return address straight into the PC.  */
25848   else if (!TARGET_INTERWORK
25849            && !TARGET_BACKTRACE
25850            && !is_called_in_ARM_mode (current_function_decl)
25851            && !crtl->calls_eh_return
25852            && !IS_CMSE_ENTRY (arm_current_func_type ()))
25853     {
25854       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
25855       return;
25856     }
25857
25858   /* Find out how many of the (return) argument registers we can corrupt.  */
25859   regs_available_for_popping = 0;
25860
25861   /* If returning via __builtin_eh_return, the bottom three registers
25862      all contain information needed for the return.  */
25863   if (crtl->calls_eh_return)
25864     size = 12;
25865   else
25866     {
25867       /* If we can deduce the registers used from the function's
25868          return value.  This is more reliable that examining
25869          df_regs_ever_live_p () because that will be set if the register is
25870          ever used in the function, not just if the register is used
25871          to hold a return value.  */
25872
25873       if (crtl->return_rtx != 0)
25874         mode = GET_MODE (crtl->return_rtx);
25875       else
25876         mode = DECL_MODE (DECL_RESULT (current_function_decl));
25877
25878       size = GET_MODE_SIZE (mode);
25879
25880       if (size == 0)
25881         {
25882           /* In a void function we can use any argument register.
25883              In a function that returns a structure on the stack
25884              we can use the second and third argument registers.  */
25885           if (mode == VOIDmode)
25886             regs_available_for_popping =
25887               (1 << ARG_REGISTER (1))
25888               | (1 << ARG_REGISTER (2))
25889               | (1 << ARG_REGISTER (3));
25890           else
25891             regs_available_for_popping =
25892               (1 << ARG_REGISTER (2))
25893               | (1 << ARG_REGISTER (3));
25894         }
25895       else if (size <= 4)
25896         regs_available_for_popping =
25897           (1 << ARG_REGISTER (2))
25898           | (1 << ARG_REGISTER (3));
25899       else if (size <= 8)
25900         regs_available_for_popping =
25901           (1 << ARG_REGISTER (3));
25902     }
25903
25904   /* Match registers to be popped with registers into which we pop them.  */
25905   for (available = regs_available_for_popping,
25906        required  = regs_to_pop;
25907        required != 0 && available != 0;
25908        available &= ~(available & - available),
25909        required  &= ~(required  & - required))
25910     -- pops_needed;
25911
25912   /* If we have any popping registers left over, remove them.  */
25913   if (available > 0)
25914     regs_available_for_popping &= ~available;
25915
25916   /* Otherwise if we need another popping register we can use
25917      the fourth argument register.  */
25918   else if (pops_needed)
25919     {
25920       /* If we have not found any free argument registers and
25921          reg a4 contains the return address, we must move it.  */
25922       if (regs_available_for_popping == 0
25923           && reg_containing_return_addr == LAST_ARG_REGNUM)
25924         {
25925           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25926           reg_containing_return_addr = LR_REGNUM;
25927         }
25928       else if (size > 12)
25929         {
25930           /* Register a4 is being used to hold part of the return value,
25931              but we have dire need of a free, low register.  */
25932           restore_a4 = TRUE;
25933
25934           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
25935         }
25936
25937       if (reg_containing_return_addr != LAST_ARG_REGNUM)
25938         {
25939           /* The fourth argument register is available.  */
25940           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
25941
25942           --pops_needed;
25943         }
25944     }
25945
25946   /* Pop as many registers as we can.  */
25947   thumb_pop (f, regs_available_for_popping);
25948
25949   /* Process the registers we popped.  */
25950   if (reg_containing_return_addr == -1)
25951     {
25952       /* The return address was popped into the lowest numbered register.  */
25953       regs_to_pop &= ~(1 << LR_REGNUM);
25954
25955       reg_containing_return_addr =
25956         number_of_first_bit_set (regs_available_for_popping);
25957
25958       /* Remove this register for the mask of available registers, so that
25959          the return address will not be corrupted by further pops.  */
25960       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
25961     }
25962
25963   /* If we popped other registers then handle them here.  */
25964   if (regs_available_for_popping)
25965     {
25966       int frame_pointer;
25967
25968       /* Work out which register currently contains the frame pointer.  */
25969       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
25970
25971       /* Move it into the correct place.  */
25972       asm_fprintf (f, "\tmov\t%r, %r\n",
25973                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
25974
25975       /* (Temporarily) remove it from the mask of popped registers.  */
25976       regs_available_for_popping &= ~(1 << frame_pointer);
25977       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
25978
25979       if (regs_available_for_popping)
25980         {
25981           int stack_pointer;
25982
25983           /* We popped the stack pointer as well,
25984              find the register that contains it.  */
25985           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
25986
25987           /* Move it into the stack register.  */
25988           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
25989
25990           /* At this point we have popped all necessary registers, so
25991              do not worry about restoring regs_available_for_popping
25992              to its correct value:
25993
25994              assert (pops_needed == 0)
25995              assert (regs_available_for_popping == (1 << frame_pointer))
25996              assert (regs_to_pop == (1 << STACK_POINTER))  */
25997         }
25998       else
25999         {
26000           /* Since we have just move the popped value into the frame
26001              pointer, the popping register is available for reuse, and
26002              we know that we still have the stack pointer left to pop.  */
26003           regs_available_for_popping |= (1 << frame_pointer);
26004         }
26005     }
26006
26007   /* If we still have registers left on the stack, but we no longer have
26008      any registers into which we can pop them, then we must move the return
26009      address into the link register and make available the register that
26010      contained it.  */
26011   if (regs_available_for_popping == 0 && pops_needed > 0)
26012     {
26013       regs_available_for_popping |= 1 << reg_containing_return_addr;
26014
26015       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26016                    reg_containing_return_addr);
26017
26018       reg_containing_return_addr = LR_REGNUM;
26019     }
26020
26021   /* If we have registers left on the stack then pop some more.
26022      We know that at most we will want to pop FP and SP.  */
26023   if (pops_needed > 0)
26024     {
26025       int  popped_into;
26026       int  move_to;
26027
26028       thumb_pop (f, regs_available_for_popping);
26029
26030       /* We have popped either FP or SP.
26031          Move whichever one it is into the correct register.  */
26032       popped_into = number_of_first_bit_set (regs_available_for_popping);
26033       move_to     = number_of_first_bit_set (regs_to_pop);
26034
26035       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26036       --pops_needed;
26037     }
26038
26039   /* If we still have not popped everything then we must have only
26040      had one register available to us and we are now popping the SP.  */
26041   if (pops_needed > 0)
26042     {
26043       int  popped_into;
26044
26045       thumb_pop (f, regs_available_for_popping);
26046
26047       popped_into = number_of_first_bit_set (regs_available_for_popping);
26048
26049       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26050       /*
26051         assert (regs_to_pop == (1 << STACK_POINTER))
26052         assert (pops_needed == 1)
26053       */
26054     }
26055
26056   /* If necessary restore the a4 register.  */
26057   if (restore_a4)
26058     {
26059       if (reg_containing_return_addr != LR_REGNUM)
26060         {
26061           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26062           reg_containing_return_addr = LR_REGNUM;
26063         }
26064
26065       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26066     }
26067
26068   if (crtl->calls_eh_return)
26069     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26070
26071   /* Return to caller.  */
26072   if (IS_CMSE_ENTRY (arm_current_func_type ()))
26073     {
26074       /* This is for the cases where LR is not being used to contain the return
26075          address.  It may therefore contain information that we might not want
26076          to leak, hence it must be cleared.  The value in R0 will never be a
26077          secret at this point, so it is safe to use it, see the clearing code
26078          in cmse_nonsecure_entry_clear_before_return ().  */
26079       if (reg_containing_return_addr != LR_REGNUM)
26080         asm_fprintf (f, "\tmov\tlr, r0\n");
26081
26082       /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26083          by cmse_nonsecure_entry_clear_before_return ().  */
26084       if (!TARGET_HAVE_FPCXT_CMSE)
26085         asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26086       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26087     }
26088   else
26089     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26090 }
26091 \f
26092 /* Scan INSN just before assembler is output for it.
26093    For Thumb-1, we track the status of the condition codes; this
26094    information is used in the cbranchsi4_insn pattern.  */
26095 void
26096 thumb1_final_prescan_insn (rtx_insn *insn)
26097 {
26098   if (flag_print_asm_name)
26099     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26100                  INSN_ADDRESSES (INSN_UID (insn)));
26101   /* Don't overwrite the previous setter when we get to a cbranch.  */
26102   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26103     {
26104       enum attr_conds conds;
26105
26106       if (cfun->machine->thumb1_cc_insn)
26107         {
26108           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26109               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26110             CC_STATUS_INIT;
26111         }
26112       conds = get_attr_conds (insn);
26113       if (conds == CONDS_SET)
26114         {
26115           rtx set = single_set (insn);
26116           cfun->machine->thumb1_cc_insn = insn;
26117           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26118           cfun->machine->thumb1_cc_op1 = const0_rtx;
26119           cfun->machine->thumb1_cc_mode = CC_NZmode;
26120           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26121             {
26122               rtx src1 = XEXP (SET_SRC (set), 1);
26123               if (src1 == const0_rtx)
26124                 cfun->machine->thumb1_cc_mode = CCmode;
26125             }
26126           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26127             {
26128               /* Record the src register operand instead of dest because
26129                  cprop_hardreg pass propagates src.  */
26130               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26131             }
26132         }
26133       else if (conds != CONDS_NOCOND)
26134         cfun->machine->thumb1_cc_insn = NULL_RTX;
26135     }
26136
26137     /* Check if unexpected far jump is used.  */
26138     if (cfun->machine->lr_save_eliminated
26139         && get_attr_far_jump (insn) == FAR_JUMP_YES)
26140       internal_error("Unexpected thumb1 far jump");
26141 }
26142
26143 int
26144 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26145 {
26146   unsigned HOST_WIDE_INT mask = 0xff;
26147   int i;
26148
26149   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26150   if (val == 0) /* XXX */
26151     return 0;
26152
26153   for (i = 0; i < 25; i++)
26154     if ((val & (mask << i)) == val)
26155       return 1;
26156
26157   return 0;
26158 }
26159
26160 /* Returns nonzero if the current function contains,
26161    or might contain a far jump.  */
26162 static int
26163 thumb_far_jump_used_p (void)
26164 {
26165   rtx_insn *insn;
26166   bool far_jump = false;
26167   unsigned int func_size = 0;
26168
26169   /* If we have already decided that far jumps may be used,
26170      do not bother checking again, and always return true even if
26171      it turns out that they are not being used.  Once we have made
26172      the decision that far jumps are present (and that hence the link
26173      register will be pushed onto the stack) we cannot go back on it.  */
26174   if (cfun->machine->far_jump_used)
26175     return 1;
26176
26177   /* If this function is not being called from the prologue/epilogue
26178      generation code then it must be being called from the
26179      INITIAL_ELIMINATION_OFFSET macro.  */
26180   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26181     {
26182       /* In this case we know that we are being asked about the elimination
26183          of the arg pointer register.  If that register is not being used,
26184          then there are no arguments on the stack, and we do not have to
26185          worry that a far jump might force the prologue to push the link
26186          register, changing the stack offsets.  In this case we can just
26187          return false, since the presence of far jumps in the function will
26188          not affect stack offsets.
26189
26190          If the arg pointer is live (or if it was live, but has now been
26191          eliminated and so set to dead) then we do have to test to see if
26192          the function might contain a far jump.  This test can lead to some
26193          false negatives, since before reload is completed, then length of
26194          branch instructions is not known, so gcc defaults to returning their
26195          longest length, which in turn sets the far jump attribute to true.
26196
26197          A false negative will not result in bad code being generated, but it
26198          will result in a needless push and pop of the link register.  We
26199          hope that this does not occur too often.
26200
26201          If we need doubleword stack alignment this could affect the other
26202          elimination offsets so we can't risk getting it wrong.  */
26203       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26204         cfun->machine->arg_pointer_live = 1;
26205       else if (!cfun->machine->arg_pointer_live)
26206         return 0;
26207     }
26208
26209   /* We should not change far_jump_used during or after reload, as there is
26210      no chance to change stack frame layout.  */
26211   if (reload_in_progress || reload_completed)
26212     return 0;
26213
26214   /* Check to see if the function contains a branch
26215      insn with the far jump attribute set.  */
26216   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26217     {
26218       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26219         {
26220           far_jump = true;
26221         }
26222       func_size += get_attr_length (insn);
26223     }
26224
26225   /* Attribute far_jump will always be true for thumb1 before
26226      shorten_branch pass.  So checking far_jump attribute before
26227      shorten_branch isn't much useful.
26228
26229      Following heuristic tries to estimate more accurately if a far jump
26230      may finally be used.  The heuristic is very conservative as there is
26231      no chance to roll-back the decision of not to use far jump.
26232
26233      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
26234      2-byte insn is associated with a 4 byte constant pool.  Using
26235      function size 2048/3 as the threshold is conservative enough.  */
26236   if (far_jump)
26237     {
26238       if ((func_size * 3) >= 2048)
26239         {
26240           /* Record the fact that we have decided that
26241              the function does use far jumps.  */
26242           cfun->machine->far_jump_used = 1;
26243           return 1;
26244         }
26245     }
26246
26247   return 0;
26248 }
26249
26250 /* Return nonzero if FUNC must be entered in ARM mode.  */
26251 static bool
26252 is_called_in_ARM_mode (tree func)
26253 {
26254   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26255
26256   /* Ignore the problem about functions whose address is taken.  */
26257   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26258     return true;
26259
26260 #ifdef ARM_PE
26261   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26262 #else
26263   return false;
26264 #endif
26265 }
26266
26267 /* Given the stack offsets and register mask in OFFSETS, decide how
26268    many additional registers to push instead of subtracting a constant
26269    from SP.  For epilogues the principle is the same except we use pop.
26270    FOR_PROLOGUE indicates which we're generating.  */
26271 static int
26272 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26273 {
26274   HOST_WIDE_INT amount;
26275   unsigned long live_regs_mask = offsets->saved_regs_mask;
26276   /* Extract a mask of the ones we can give to the Thumb's push/pop
26277      instruction.  */
26278   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26279   /* Then count how many other high registers will need to be pushed.  */
26280   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26281   int n_free, reg_base, size;
26282
26283   if (!for_prologue && frame_pointer_needed)
26284     amount = offsets->locals_base - offsets->saved_regs;
26285   else
26286     amount = offsets->outgoing_args - offsets->saved_regs;
26287
26288   /* If the stack frame size is 512 exactly, we can save one load
26289      instruction, which should make this a win even when optimizing
26290      for speed.  */
26291   if (!optimize_size && amount != 512)
26292     return 0;
26293
26294   /* Can't do this if there are high registers to push.  */
26295   if (high_regs_pushed != 0)
26296     return 0;
26297
26298   /* Shouldn't do it in the prologue if no registers would normally
26299      be pushed at all.  In the epilogue, also allow it if we'll have
26300      a pop insn for the PC.  */
26301   if  (l_mask == 0
26302        && (for_prologue
26303            || TARGET_BACKTRACE
26304            || (live_regs_mask & 1 << LR_REGNUM) == 0
26305            || TARGET_INTERWORK
26306            || crtl->args.pretend_args_size != 0))
26307     return 0;
26308
26309   /* Don't do this if thumb_expand_prologue wants to emit instructions
26310      between the push and the stack frame allocation.  */
26311   if (for_prologue
26312       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26313           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26314     return 0;
26315
26316   reg_base = 0;
26317   n_free = 0;
26318   if (!for_prologue)
26319     {
26320       size = arm_size_return_regs ();
26321       reg_base = ARM_NUM_INTS (size);
26322       live_regs_mask >>= reg_base;
26323     }
26324
26325   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26326          && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26327     {
26328       live_regs_mask >>= 1;
26329       n_free++;
26330     }
26331
26332   if (n_free == 0)
26333     return 0;
26334   gcc_assert (amount / 4 * 4 == amount);
26335
26336   if (amount >= 512 && (amount - n_free * 4) < 512)
26337     return (amount - 508) / 4;
26338   if (amount <= n_free * 4)
26339     return amount / 4;
26340   return 0;
26341 }
26342
26343 /* The bits which aren't usefully expanded as rtl.  */
26344 const char *
26345 thumb1_unexpanded_epilogue (void)
26346 {
26347   arm_stack_offsets *offsets;
26348   int regno;
26349   unsigned long live_regs_mask = 0;
26350   int high_regs_pushed = 0;
26351   int extra_pop;
26352   int had_to_push_lr;
26353   int size;
26354
26355   if (cfun->machine->return_used_this_function != 0)
26356     return "";
26357
26358   if (IS_NAKED (arm_current_func_type ()))
26359     return "";
26360
26361   offsets = arm_get_frame_offsets ();
26362   live_regs_mask = offsets->saved_regs_mask;
26363   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26364
26365   /* If we can deduce the registers used from the function's return value.
26366      This is more reliable that examining df_regs_ever_live_p () because that
26367      will be set if the register is ever used in the function, not just if
26368      the register is used to hold a return value.  */
26369   size = arm_size_return_regs ();
26370
26371   extra_pop = thumb1_extra_regs_pushed (offsets, false);
26372   if (extra_pop > 0)
26373     {
26374       unsigned long extra_mask = (1 << extra_pop) - 1;
26375       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26376     }
26377
26378   /* The prolog may have pushed some high registers to use as
26379      work registers.  e.g. the testsuite file:
26380      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26381      compiles to produce:
26382         push    {r4, r5, r6, r7, lr}
26383         mov     r7, r9
26384         mov     r6, r8
26385         push    {r6, r7}
26386      as part of the prolog.  We have to undo that pushing here.  */
26387
26388   if (high_regs_pushed)
26389     {
26390       unsigned long mask = live_regs_mask & 0xff;
26391       int next_hi_reg;
26392
26393       mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26394
26395       if (mask == 0)
26396         /* Oh dear!  We have no low registers into which we can pop
26397            high registers!  */
26398         internal_error
26399           ("no low registers available for popping high registers");
26400
26401       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26402         if (live_regs_mask & (1 << next_hi_reg))
26403           break;
26404
26405       while (high_regs_pushed)
26406         {
26407           /* Find lo register(s) into which the high register(s) can
26408              be popped.  */
26409           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26410             {
26411               if (mask & (1 << regno))
26412                 high_regs_pushed--;
26413               if (high_regs_pushed == 0)
26414                 break;
26415             }
26416
26417           if (high_regs_pushed == 0 && regno >= 0)
26418             mask &= ~((1 << regno) - 1);
26419
26420           /* Pop the values into the low register(s).  */
26421           thumb_pop (asm_out_file, mask);
26422
26423           /* Move the value(s) into the high registers.  */
26424           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26425             {
26426               if (mask & (1 << regno))
26427                 {
26428                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26429                                regno);
26430
26431                   for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26432                        next_hi_reg--)
26433                     if (live_regs_mask & (1 << next_hi_reg))
26434                       break;
26435                 }
26436             }
26437         }
26438       live_regs_mask &= ~0x0f00;
26439     }
26440
26441   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26442   live_regs_mask &= 0xff;
26443
26444   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26445     {
26446       /* Pop the return address into the PC.  */
26447       if (had_to_push_lr)
26448         live_regs_mask |= 1 << PC_REGNUM;
26449
26450       /* Either no argument registers were pushed or a backtrace
26451          structure was created which includes an adjusted stack
26452          pointer, so just pop everything.  */
26453       if (live_regs_mask)
26454         thumb_pop (asm_out_file, live_regs_mask);
26455
26456       /* We have either just popped the return address into the
26457          PC or it is was kept in LR for the entire function.
26458          Note that thumb_pop has already called thumb_exit if the
26459          PC was in the list.  */
26460       if (!had_to_push_lr)
26461         thumb_exit (asm_out_file, LR_REGNUM);
26462     }
26463   else
26464     {
26465       /* Pop everything but the return address.  */
26466       if (live_regs_mask)
26467         thumb_pop (asm_out_file, live_regs_mask);
26468
26469       if (had_to_push_lr)
26470         {
26471           if (size > 12)
26472             {
26473               /* We have no free low regs, so save one.  */
26474               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26475                            LAST_ARG_REGNUM);
26476             }
26477
26478           /* Get the return address into a temporary register.  */
26479           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26480
26481           if (size > 12)
26482             {
26483               /* Move the return address to lr.  */
26484               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26485                            LAST_ARG_REGNUM);
26486               /* Restore the low register.  */
26487               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26488                            IP_REGNUM);
26489               regno = LR_REGNUM;
26490             }
26491           else
26492             regno = LAST_ARG_REGNUM;
26493         }
26494       else
26495         regno = LR_REGNUM;
26496
26497       /* Remove the argument registers that were pushed onto the stack.  */
26498       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26499                    SP_REGNUM, SP_REGNUM,
26500                    crtl->args.pretend_args_size);
26501
26502       thumb_exit (asm_out_file, regno);
26503     }
26504
26505   return "";
26506 }
26507
26508 /* Functions to save and restore machine-specific function data.  */
26509 static struct machine_function *
26510 arm_init_machine_status (void)
26511 {
26512   struct machine_function *machine;
26513   machine = ggc_cleared_alloc<machine_function> ();
26514
26515 #if ARM_FT_UNKNOWN != 0
26516   machine->func_type = ARM_FT_UNKNOWN;
26517 #endif
26518   machine->static_chain_stack_bytes = -1;
26519   return machine;
26520 }
26521
26522 /* Return an RTX indicating where the return address to the
26523    calling function can be found.  */
26524 rtx
26525 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26526 {
26527   if (count != 0)
26528     return NULL_RTX;
26529
26530   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26531 }
26532
26533 /* Do anything needed before RTL is emitted for each function.  */
26534 void
26535 arm_init_expanders (void)
26536 {
26537   /* Arrange to initialize and mark the machine per-function status.  */
26538   init_machine_status = arm_init_machine_status;
26539
26540   /* This is to stop the combine pass optimizing away the alignment
26541      adjustment of va_arg.  */
26542   /* ??? It is claimed that this should not be necessary.  */
26543   if (cfun)
26544     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26545 }
26546
26547 /* Check that FUNC is called with a different mode.  */
26548
26549 bool
26550 arm_change_mode_p (tree func)
26551 {
26552   if (TREE_CODE (func) != FUNCTION_DECL)
26553     return false;
26554
26555   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
26556
26557   if (!callee_tree)
26558     callee_tree = target_option_default_node;
26559
26560   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
26561   int flags = callee_opts->x_target_flags;
26562
26563   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
26564 }
26565
26566 /* Like arm_compute_initial_elimination offset.  Simpler because there
26567    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
26568    to point at the base of the local variables after static stack
26569    space for a function has been allocated.  */
26570
26571 HOST_WIDE_INT
26572 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26573 {
26574   arm_stack_offsets *offsets;
26575
26576   offsets = arm_get_frame_offsets ();
26577
26578   switch (from)
26579     {
26580     case ARG_POINTER_REGNUM:
26581       switch (to)
26582         {
26583         case STACK_POINTER_REGNUM:
26584           return offsets->outgoing_args - offsets->saved_args;
26585
26586         case FRAME_POINTER_REGNUM:
26587           return offsets->soft_frame - offsets->saved_args;
26588
26589         case ARM_HARD_FRAME_POINTER_REGNUM:
26590           return offsets->saved_regs - offsets->saved_args;
26591
26592         case THUMB_HARD_FRAME_POINTER_REGNUM:
26593           return offsets->locals_base - offsets->saved_args;
26594
26595         default:
26596           gcc_unreachable ();
26597         }
26598       break;
26599
26600     case FRAME_POINTER_REGNUM:
26601       switch (to)
26602         {
26603         case STACK_POINTER_REGNUM:
26604           return offsets->outgoing_args - offsets->soft_frame;
26605
26606         case ARM_HARD_FRAME_POINTER_REGNUM:
26607           return offsets->saved_regs - offsets->soft_frame;
26608
26609         case THUMB_HARD_FRAME_POINTER_REGNUM:
26610           return offsets->locals_base - offsets->soft_frame;
26611
26612         default:
26613           gcc_unreachable ();
26614         }
26615       break;
26616
26617     default:
26618       gcc_unreachable ();
26619     }
26620 }
26621
26622 /* Generate the function's prologue.  */
26623
26624 void
26625 thumb1_expand_prologue (void)
26626 {
26627   rtx_insn *insn;
26628
26629   HOST_WIDE_INT amount;
26630   HOST_WIDE_INT size;
26631   arm_stack_offsets *offsets;
26632   unsigned long func_type;
26633   int regno;
26634   unsigned long live_regs_mask;
26635   unsigned long l_mask;
26636   unsigned high_regs_pushed = 0;
26637   bool lr_needs_saving;
26638
26639   func_type = arm_current_func_type ();
26640
26641   /* Naked functions don't have prologues.  */
26642   if (IS_NAKED (func_type))
26643     {
26644       if (flag_stack_usage_info)
26645         current_function_static_stack_size = 0;
26646       return;
26647     }
26648
26649   if (IS_INTERRUPT (func_type))
26650     {
26651       error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
26652       return;
26653     }
26654
26655   if (is_called_in_ARM_mode (current_function_decl))
26656     emit_insn (gen_prologue_thumb1_interwork ());
26657
26658   offsets = arm_get_frame_offsets ();
26659   live_regs_mask = offsets->saved_regs_mask;
26660   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
26661
26662   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
26663   l_mask = live_regs_mask & 0x40ff;
26664   /* Then count how many other high registers will need to be pushed.  */
26665   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26666
26667   if (crtl->args.pretend_args_size)
26668     {
26669       rtx x = GEN_INT (-crtl->args.pretend_args_size);
26670
26671       if (cfun->machine->uses_anonymous_args)
26672         {
26673           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26674           unsigned long mask;
26675
26676           mask = 1ul << (LAST_ARG_REGNUM + 1);
26677           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26678
26679           insn = thumb1_emit_multi_reg_push (mask, 0);
26680         }
26681       else
26682         {
26683           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26684                                         stack_pointer_rtx, x));
26685         }
26686       RTX_FRAME_RELATED_P (insn) = 1;
26687     }
26688
26689   if (TARGET_BACKTRACE)
26690     {
26691       HOST_WIDE_INT offset = 0;
26692       unsigned work_register;
26693       rtx work_reg, x, arm_hfp_rtx;
26694
26695       /* We have been asked to create a stack backtrace structure.
26696          The code looks like this:
26697
26698          0   .align 2
26699          0   func:
26700          0     sub   SP, #16         Reserve space for 4 registers.
26701          2     push  {R7}            Push low registers.
26702          4     add   R7, SP, #20     Get the stack pointer before the push.
26703          6     str   R7, [SP, #8]    Store the stack pointer
26704                                         (before reserving the space).
26705          8     mov   R7, PC          Get hold of the start of this code + 12.
26706         10     str   R7, [SP, #16]   Store it.
26707         12     mov   R7, FP          Get hold of the current frame pointer.
26708         14     str   R7, [SP, #4]    Store it.
26709         16     mov   R7, LR          Get hold of the current return address.
26710         18     str   R7, [SP, #12]   Store it.
26711         20     add   R7, SP, #16     Point at the start of the
26712                                         backtrace structure.
26713         22     mov   FP, R7          Put this value into the frame pointer.  */
26714
26715       work_register = thumb_find_work_register (live_regs_mask);
26716       work_reg = gen_rtx_REG (SImode, work_register);
26717       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
26718
26719       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26720                                     stack_pointer_rtx, GEN_INT (-16)));
26721       RTX_FRAME_RELATED_P (insn) = 1;
26722
26723       if (l_mask)
26724         {
26725           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
26726           RTX_FRAME_RELATED_P (insn) = 1;
26727           lr_needs_saving = false;
26728
26729           offset = bit_count (l_mask) * UNITS_PER_WORD;
26730         }
26731
26732       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
26733       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26734
26735       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
26736       x = gen_frame_mem (SImode, x);
26737       emit_move_insn (x, work_reg);
26738
26739       /* Make sure that the instruction fetching the PC is in the right place
26740          to calculate "start of backtrace creation code + 12".  */
26741       /* ??? The stores using the common WORK_REG ought to be enough to
26742          prevent the scheduler from doing anything weird.  Failing that
26743          we could always move all of the following into an UNSPEC_VOLATILE.  */
26744       if (l_mask)
26745         {
26746           x = gen_rtx_REG (SImode, PC_REGNUM);
26747           emit_move_insn (work_reg, x);
26748
26749           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26750           x = gen_frame_mem (SImode, x);
26751           emit_move_insn (x, work_reg);
26752
26753           emit_move_insn (work_reg, arm_hfp_rtx);
26754
26755           x = plus_constant (Pmode, stack_pointer_rtx, offset);
26756           x = gen_frame_mem (SImode, x);
26757           emit_move_insn (x, work_reg);
26758         }
26759       else
26760         {
26761           emit_move_insn (work_reg, arm_hfp_rtx);
26762
26763           x = plus_constant (Pmode, stack_pointer_rtx, offset);
26764           x = gen_frame_mem (SImode, x);
26765           emit_move_insn (x, work_reg);
26766
26767           x = gen_rtx_REG (SImode, PC_REGNUM);
26768           emit_move_insn (work_reg, x);
26769
26770           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26771           x = gen_frame_mem (SImode, x);
26772           emit_move_insn (x, work_reg);
26773         }
26774
26775       x = gen_rtx_REG (SImode, LR_REGNUM);
26776       emit_move_insn (work_reg, x);
26777
26778       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
26779       x = gen_frame_mem (SImode, x);
26780       emit_move_insn (x, work_reg);
26781
26782       x = GEN_INT (offset + 12);
26783       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26784
26785       emit_move_insn (arm_hfp_rtx, work_reg);
26786     }
26787   /* Optimization:  If we are not pushing any low registers but we are going
26788      to push some high registers then delay our first push.  This will just
26789      be a push of LR and we can combine it with the push of the first high
26790      register.  */
26791   else if ((l_mask & 0xff) != 0
26792            || (high_regs_pushed == 0 && lr_needs_saving))
26793     {
26794       unsigned long mask = l_mask;
26795       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
26796       insn = thumb1_emit_multi_reg_push (mask, mask);
26797       RTX_FRAME_RELATED_P (insn) = 1;
26798       lr_needs_saving = false;
26799     }
26800
26801   if (high_regs_pushed)
26802     {
26803       unsigned pushable_regs;
26804       unsigned next_hi_reg;
26805       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
26806                                                  : crtl->args.info.nregs;
26807       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
26808
26809       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26810         if (live_regs_mask & (1 << next_hi_reg))
26811           break;
26812
26813       /* Here we need to mask out registers used for passing arguments
26814          even if they can be pushed.  This is to avoid using them to
26815          stash the high registers.  Such kind of stash may clobber the
26816          use of arguments.  */
26817       pushable_regs = l_mask & (~arg_regs_mask);
26818       pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
26819
26820       /* Normally, LR can be used as a scratch register once it has been
26821          saved; but if the function examines its own return address then
26822          the value is still live and we need to avoid using it.  */
26823       bool return_addr_live
26824         = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
26825                            LR_REGNUM);
26826
26827       if (lr_needs_saving || return_addr_live)
26828         pushable_regs &= ~(1 << LR_REGNUM);
26829
26830       if (pushable_regs == 0)
26831         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
26832
26833       while (high_regs_pushed > 0)
26834         {
26835           unsigned long real_regs_mask = 0;
26836           unsigned long push_mask = 0;
26837
26838           for (regno = LR_REGNUM; regno >= 0; regno --)
26839             {
26840               if (pushable_regs & (1 << regno))
26841                 {
26842                   emit_move_insn (gen_rtx_REG (SImode, regno),
26843                                   gen_rtx_REG (SImode, next_hi_reg));
26844
26845                   high_regs_pushed --;
26846                   real_regs_mask |= (1 << next_hi_reg);
26847                   push_mask |= (1 << regno);
26848
26849                   if (high_regs_pushed)
26850                     {
26851                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
26852                            next_hi_reg --)
26853                         if (live_regs_mask & (1 << next_hi_reg))
26854                           break;
26855                     }
26856                   else
26857                     break;
26858                 }
26859             }
26860
26861           /* If we had to find a work register and we have not yet
26862              saved the LR then add it to the list of regs to push.  */
26863           if (lr_needs_saving)
26864             {
26865               push_mask |= 1 << LR_REGNUM;
26866               real_regs_mask |= 1 << LR_REGNUM;
26867               lr_needs_saving = false;
26868               /* If the return address is not live at this point, we
26869                  can add LR to the list of registers that we can use
26870                  for pushes.  */
26871               if (!return_addr_live)
26872                 pushable_regs |= 1 << LR_REGNUM;
26873             }
26874
26875           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
26876           RTX_FRAME_RELATED_P (insn) = 1;
26877         }
26878     }
26879
26880   /* Load the pic register before setting the frame pointer,
26881      so we can use r7 as a temporary work register.  */
26882   if (flag_pic && arm_pic_register != INVALID_REGNUM)
26883     arm_load_pic_register (live_regs_mask, NULL_RTX);
26884
26885   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
26886     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
26887                     stack_pointer_rtx);
26888
26889   size = offsets->outgoing_args - offsets->saved_args;
26890   if (flag_stack_usage_info)
26891     current_function_static_stack_size = size;
26892
26893   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
26894   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
26895        || flag_stack_clash_protection)
26896       && size)
26897     sorry ("%<-fstack-check=specific%> for Thumb-1");
26898
26899   amount = offsets->outgoing_args - offsets->saved_regs;
26900   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
26901   if (amount)
26902     {
26903       if (amount < 512)
26904         {
26905           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26906                                         GEN_INT (- amount)));
26907           RTX_FRAME_RELATED_P (insn) = 1;
26908         }
26909       else
26910         {
26911           rtx reg, dwarf;
26912
26913           /* The stack decrement is too big for an immediate value in a single
26914              insn.  In theory we could issue multiple subtracts, but after
26915              three of them it becomes more space efficient to place the full
26916              value in the constant pool and load into a register.  (Also the
26917              ARM debugger really likes to see only one stack decrement per
26918              function).  So instead we look for a scratch register into which
26919              we can load the decrement, and then we subtract this from the
26920              stack pointer.  Unfortunately on the thumb the only available
26921              scratch registers are the argument registers, and we cannot use
26922              these as they may hold arguments to the function.  Instead we
26923              attempt to locate a call preserved register which is used by this
26924              function.  If we can find one, then we know that it will have
26925              been pushed at the start of the prologue and so we can corrupt
26926              it now.  */
26927           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
26928             if (live_regs_mask & (1 << regno))
26929               break;
26930
26931           gcc_assert(regno <= LAST_LO_REGNUM);
26932
26933           reg = gen_rtx_REG (SImode, regno);
26934
26935           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
26936
26937           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26938                                         stack_pointer_rtx, reg));
26939
26940           dwarf = gen_rtx_SET (stack_pointer_rtx,
26941                                plus_constant (Pmode, stack_pointer_rtx,
26942                                               -amount));
26943           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
26944           RTX_FRAME_RELATED_P (insn) = 1;
26945         }
26946     }
26947
26948   if (frame_pointer_needed)
26949     thumb_set_frame_pointer (offsets);
26950
26951   /* If we are profiling, make sure no instructions are scheduled before
26952      the call to mcount.  Similarly if the user has requested no
26953      scheduling in the prolog.  Similarly if we want non-call exceptions
26954      using the EABI unwinder, to prevent faulting instructions from being
26955      swapped with a stack adjustment.  */
26956   if (crtl->profile || !TARGET_SCHED_PROLOG
26957       || (arm_except_unwind_info (&global_options) == UI_TARGET
26958           && cfun->can_throw_non_call_exceptions))
26959     emit_insn (gen_blockage ());
26960
26961   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
26962   if (live_regs_mask & 0xff)
26963     cfun->machine->lr_save_eliminated = 0;
26964 }
26965
26966 /* Clear caller saved registers not used to pass return values and leaked
26967    condition flags before exiting a cmse_nonsecure_entry function.  */
26968
26969 void
26970 cmse_nonsecure_entry_clear_before_return (void)
26971 {
26972   bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
26973   int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
26974   uint32_t padding_bits_to_clear = 0;
26975   auto_sbitmap to_clear_bitmap (maxregno + 1);
26976   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
26977   tree result_type;
26978
26979   bitmap_clear (to_clear_bitmap);
26980   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
26981   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
26982
26983   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
26984      registers.  */
26985   if (clear_vfpregs)
26986     {
26987       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
26988
26989       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
26990
26991       if (!TARGET_HAVE_FPCXT_CMSE)
26992         {
26993           /* Make sure we don't clear the two scratch registers used to clear
26994              the relevant FPSCR bits in output_return_instruction.  */
26995           emit_use (gen_rtx_REG (SImode, IP_REGNUM));
26996           bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
26997           emit_use (gen_rtx_REG (SImode, 4));
26998           bitmap_clear_bit (to_clear_bitmap, 4);
26999         }
27000     }
27001
27002   /* If the user has defined registers to be caller saved, these are no longer
27003      restored by the function before returning and must thus be cleared for
27004      security purposes.  */
27005   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27006     {
27007       /* We do not touch registers that can be used to pass arguments as per
27008          the AAPCS, since these should never be made callee-saved by user
27009          options.  */
27010       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27011         continue;
27012       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27013         continue;
27014       if (!callee_saved_reg_p (regno)
27015           && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27016               || TARGET_HARD_FLOAT))
27017         bitmap_set_bit (to_clear_bitmap, regno);
27018     }
27019
27020   /* Make sure we do not clear the registers used to return the result in.  */
27021   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27022   if (!VOID_TYPE_P (result_type))
27023     {
27024       uint64_t to_clear_return_mask;
27025       result_rtl = arm_function_value (result_type, current_function_decl, 0);
27026
27027       /* No need to check that we return in registers, because we don't
27028          support returning on stack yet.  */
27029       gcc_assert (REG_P (result_rtl));
27030       to_clear_return_mask
27031         = compute_not_to_clear_mask (result_type, result_rtl, 0,
27032                                      &padding_bits_to_clear);
27033       if (to_clear_return_mask)
27034         {
27035           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27036           for (regno = R0_REGNUM; regno <= maxregno; regno++)
27037             {
27038               if (to_clear_return_mask & (1ULL << regno))
27039                 bitmap_clear_bit (to_clear_bitmap, regno);
27040             }
27041         }
27042     }
27043
27044   if (padding_bits_to_clear != 0)
27045     {
27046       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27047       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27048
27049       /* Padding_bits_to_clear is not 0 so we know we are dealing with
27050          returning a composite type, which only uses r0.  Let's make sure that
27051          r1-r3 is cleared too.  */
27052       bitmap_clear (to_clear_arg_regs_bitmap);
27053       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27054       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27055     }
27056
27057   /* Clear full registers that leak before returning.  */
27058   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27059   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27060   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27061                         clearing_reg);
27062 }
27063
27064 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27065    POP instruction can be generated.  LR should be replaced by PC.  All
27066    the checks required are already done by  USE_RETURN_INSN ().  Hence,
27067    all we really need to check here is if single register is to be
27068    returned, or multiple register return.  */
27069 void
27070 thumb2_expand_return (bool simple_return)
27071 {
27072   int i, num_regs;
27073   unsigned long saved_regs_mask;
27074   arm_stack_offsets *offsets;
27075
27076   offsets = arm_get_frame_offsets ();
27077   saved_regs_mask = offsets->saved_regs_mask;
27078
27079   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27080     if (saved_regs_mask & (1 << i))
27081       num_regs++;
27082
27083   if (!simple_return && saved_regs_mask)
27084     {
27085       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27086          functions or adapt code to handle according to ACLE.  This path should
27087          not be reachable for cmse_nonsecure_entry functions though we prefer
27088          to assert it for now to ensure that future code changes do not silently
27089          change this behavior.  */
27090       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27091       if (num_regs == 1)
27092         {
27093           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27094           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27095           rtx addr = gen_rtx_MEM (SImode,
27096                                   gen_rtx_POST_INC (SImode,
27097                                                     stack_pointer_rtx));
27098           set_mem_alias_set (addr, get_frame_alias_set ());
27099           XVECEXP (par, 0, 0) = ret_rtx;
27100           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27101           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27102           emit_jump_insn (par);
27103         }
27104       else
27105         {
27106           saved_regs_mask &= ~ (1 << LR_REGNUM);
27107           saved_regs_mask |=   (1 << PC_REGNUM);
27108           arm_emit_multi_reg_pop (saved_regs_mask);
27109         }
27110     }
27111   else
27112     {
27113       if (IS_CMSE_ENTRY (arm_current_func_type ()))
27114         cmse_nonsecure_entry_clear_before_return ();
27115       emit_jump_insn (simple_return_rtx);
27116     }
27117 }
27118
27119 void
27120 thumb1_expand_epilogue (void)
27121 {
27122   HOST_WIDE_INT amount;
27123   arm_stack_offsets *offsets;
27124   int regno;
27125
27126   /* Naked functions don't have prologues.  */
27127   if (IS_NAKED (arm_current_func_type ()))
27128     return;
27129
27130   offsets = arm_get_frame_offsets ();
27131   amount = offsets->outgoing_args - offsets->saved_regs;
27132
27133   if (frame_pointer_needed)
27134     {
27135       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27136       amount = offsets->locals_base - offsets->saved_regs;
27137     }
27138   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27139
27140   gcc_assert (amount >= 0);
27141   if (amount)
27142     {
27143       emit_insn (gen_blockage ());
27144
27145       if (amount < 512)
27146         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27147                                GEN_INT (amount)));
27148       else
27149         {
27150           /* r3 is always free in the epilogue.  */
27151           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27152
27153           emit_insn (gen_movsi (reg, GEN_INT (amount)));
27154           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27155         }
27156     }
27157
27158   /* Emit a USE (stack_pointer_rtx), so that
27159      the stack adjustment will not be deleted.  */
27160   emit_insn (gen_force_register_use (stack_pointer_rtx));
27161
27162   if (crtl->profile || !TARGET_SCHED_PROLOG)
27163     emit_insn (gen_blockage ());
27164
27165   /* Emit a clobber for each insn that will be restored in the epilogue,
27166      so that flow2 will get register lifetimes correct.  */
27167   for (regno = 0; regno < 13; regno++)
27168     if (reg_needs_saving_p (regno))
27169       emit_clobber (gen_rtx_REG (SImode, regno));
27170
27171   if (! df_regs_ever_live_p (LR_REGNUM))
27172     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27173
27174   /* Clear all caller-saved regs that are not used to return.  */
27175   if (IS_CMSE_ENTRY (arm_current_func_type ()))
27176     cmse_nonsecure_entry_clear_before_return ();
27177 }
27178
27179 /* Epilogue code for APCS frame.  */
27180 static void
27181 arm_expand_epilogue_apcs_frame (bool really_return)
27182 {
27183   unsigned long func_type;
27184   unsigned long saved_regs_mask;
27185   int num_regs = 0;
27186   int i;
27187   int floats_from_frame = 0;
27188   arm_stack_offsets *offsets;
27189
27190   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27191   func_type = arm_current_func_type ();
27192
27193   /* Get frame offsets for ARM.  */
27194   offsets = arm_get_frame_offsets ();
27195   saved_regs_mask = offsets->saved_regs_mask;
27196
27197   /* Find the offset of the floating-point save area in the frame.  */
27198   floats_from_frame
27199     = (offsets->saved_args
27200        + arm_compute_static_chain_stack_bytes ()
27201        - offsets->frame);
27202
27203   /* Compute how many core registers saved and how far away the floats are.  */
27204   for (i = 0; i <= LAST_ARM_REGNUM; i++)
27205     if (saved_regs_mask & (1 << i))
27206       {
27207         num_regs++;
27208         floats_from_frame += 4;
27209       }
27210
27211   if (TARGET_VFP_BASE)
27212     {
27213       int start_reg;
27214       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27215
27216       /* The offset is from IP_REGNUM.  */
27217       int saved_size = arm_get_vfp_saved_size ();
27218       if (saved_size > 0)
27219         {
27220           rtx_insn *insn;
27221           floats_from_frame += saved_size;
27222           insn = emit_insn (gen_addsi3 (ip_rtx,
27223                                         hard_frame_pointer_rtx,
27224                                         GEN_INT (-floats_from_frame)));
27225           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27226                                        ip_rtx, hard_frame_pointer_rtx);
27227         }
27228
27229       /* Generate VFP register multi-pop.  */
27230       start_reg = FIRST_VFP_REGNUM;
27231
27232       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27233         /* Look for a case where a reg does not need restoring.  */
27234         if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27235           {
27236             if (start_reg != i)
27237               arm_emit_vfp_multi_reg_pop (start_reg,
27238                                           (i - start_reg) / 2,
27239                                           gen_rtx_REG (SImode,
27240                                                        IP_REGNUM));
27241             start_reg = i + 2;
27242           }
27243
27244       /* Restore the remaining regs that we have discovered (or possibly
27245          even all of them, if the conditional in the for loop never
27246          fired).  */
27247       if (start_reg != i)
27248         arm_emit_vfp_multi_reg_pop (start_reg,
27249                                     (i - start_reg) / 2,
27250                                     gen_rtx_REG (SImode, IP_REGNUM));
27251     }
27252
27253   if (TARGET_IWMMXT)
27254     {
27255       /* The frame pointer is guaranteed to be non-double-word aligned, as
27256          it is set to double-word-aligned old_stack_pointer - 4.  */
27257       rtx_insn *insn;
27258       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27259
27260       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27261         if (reg_needs_saving_p (i))
27262           {
27263             rtx addr = gen_frame_mem (V2SImode,
27264                                  plus_constant (Pmode, hard_frame_pointer_rtx,
27265                                                 - lrm_count * 4));
27266             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27267             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27268                                                gen_rtx_REG (V2SImode, i),
27269                                                NULL_RTX);
27270             lrm_count += 2;
27271           }
27272     }
27273
27274   /* saved_regs_mask should contain IP which contains old stack pointer
27275      at the time of activation creation.  Since SP and IP are adjacent registers,
27276      we can restore the value directly into SP.  */
27277   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27278   saved_regs_mask &= ~(1 << IP_REGNUM);
27279   saved_regs_mask |= (1 << SP_REGNUM);
27280
27281   /* There are two registers left in saved_regs_mask - LR and PC.  We
27282      only need to restore LR (the return address), but to
27283      save time we can load it directly into PC, unless we need a
27284      special function exit sequence, or we are not really returning.  */
27285   if (really_return
27286       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27287       && !crtl->calls_eh_return)
27288     /* Delete LR from the register mask, so that LR on
27289        the stack is loaded into the PC in the register mask.  */
27290     saved_regs_mask &= ~(1 << LR_REGNUM);
27291   else
27292     saved_regs_mask &= ~(1 << PC_REGNUM);
27293
27294   num_regs = bit_count (saved_regs_mask);
27295   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27296     {
27297       rtx_insn *insn;
27298       emit_insn (gen_blockage ());
27299       /* Unwind the stack to just below the saved registers.  */
27300       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27301                                     hard_frame_pointer_rtx,
27302                                     GEN_INT (- 4 * num_regs)));
27303
27304       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27305                                    stack_pointer_rtx, hard_frame_pointer_rtx);
27306     }
27307
27308   arm_emit_multi_reg_pop (saved_regs_mask);
27309
27310   if (IS_INTERRUPT (func_type))
27311     {
27312       /* Interrupt handlers will have pushed the
27313          IP onto the stack, so restore it now.  */
27314       rtx_insn *insn;
27315       rtx addr = gen_rtx_MEM (SImode,
27316                               gen_rtx_POST_INC (SImode,
27317                               stack_pointer_rtx));
27318       set_mem_alias_set (addr, get_frame_alias_set ());
27319       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27320       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27321                                          gen_rtx_REG (SImode, IP_REGNUM),
27322                                          NULL_RTX);
27323     }
27324
27325   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27326     return;
27327
27328   if (crtl->calls_eh_return)
27329     emit_insn (gen_addsi3 (stack_pointer_rtx,
27330                            stack_pointer_rtx,
27331                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27332
27333   if (IS_STACKALIGN (func_type))
27334     /* Restore the original stack pointer.  Before prologue, the stack was
27335        realigned and the original stack pointer saved in r0.  For details,
27336        see comment in arm_expand_prologue.  */
27337     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27338
27339   emit_jump_insn (simple_return_rtx);
27340 }
27341
27342 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
27343    function is not a sibcall.  */
27344 void
27345 arm_expand_epilogue (bool really_return)
27346 {
27347   unsigned long func_type;
27348   unsigned long saved_regs_mask;
27349   int num_regs = 0;
27350   int i;
27351   int amount;
27352   arm_stack_offsets *offsets;
27353
27354   func_type = arm_current_func_type ();
27355
27356   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
27357      let output_return_instruction take care of instruction emission if any.  */
27358   if (IS_NAKED (func_type)
27359       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27360     {
27361       if (really_return)
27362         emit_jump_insn (simple_return_rtx);
27363       return;
27364     }
27365
27366   /* If we are throwing an exception, then we really must be doing a
27367      return, so we can't tail-call.  */
27368   gcc_assert (!crtl->calls_eh_return || really_return);
27369
27370   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27371     {
27372       arm_expand_epilogue_apcs_frame (really_return);
27373       return;
27374     }
27375
27376   /* Get frame offsets for ARM.  */
27377   offsets = arm_get_frame_offsets ();
27378   saved_regs_mask = offsets->saved_regs_mask;
27379   num_regs = bit_count (saved_regs_mask);
27380
27381   if (frame_pointer_needed)
27382     {
27383       rtx_insn *insn;
27384       /* Restore stack pointer if necessary.  */
27385       if (TARGET_ARM)
27386         {
27387           /* In ARM mode, frame pointer points to first saved register.
27388              Restore stack pointer to last saved register.  */
27389           amount = offsets->frame - offsets->saved_regs;
27390
27391           /* Force out any pending memory operations that reference stacked data
27392              before stack de-allocation occurs.  */
27393           emit_insn (gen_blockage ());
27394           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27395                             hard_frame_pointer_rtx,
27396                             GEN_INT (amount)));
27397           arm_add_cfa_adjust_cfa_note (insn, amount,
27398                                        stack_pointer_rtx,
27399                                        hard_frame_pointer_rtx);
27400
27401           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27402              deleted.  */
27403           emit_insn (gen_force_register_use (stack_pointer_rtx));
27404         }
27405       else
27406         {
27407           /* In Thumb-2 mode, the frame pointer points to the last saved
27408              register.  */
27409           amount = offsets->locals_base - offsets->saved_regs;
27410           if (amount)
27411             {
27412               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27413                                 hard_frame_pointer_rtx,
27414                                 GEN_INT (amount)));
27415               arm_add_cfa_adjust_cfa_note (insn, amount,
27416                                            hard_frame_pointer_rtx,
27417                                            hard_frame_pointer_rtx);
27418             }
27419
27420           /* Force out any pending memory operations that reference stacked data
27421              before stack de-allocation occurs.  */
27422           emit_insn (gen_blockage ());
27423           insn = emit_insn (gen_movsi (stack_pointer_rtx,
27424                                        hard_frame_pointer_rtx));
27425           arm_add_cfa_adjust_cfa_note (insn, 0,
27426                                        stack_pointer_rtx,
27427                                        hard_frame_pointer_rtx);
27428           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27429              deleted.  */
27430           emit_insn (gen_force_register_use (stack_pointer_rtx));
27431         }
27432     }
27433   else
27434     {
27435       /* Pop off outgoing args and local frame to adjust stack pointer to
27436          last saved register.  */
27437       amount = offsets->outgoing_args - offsets->saved_regs;
27438       if (amount)
27439         {
27440           rtx_insn *tmp;
27441           /* Force out any pending memory operations that reference stacked data
27442              before stack de-allocation occurs.  */
27443           emit_insn (gen_blockage ());
27444           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27445                                        stack_pointer_rtx,
27446                                        GEN_INT (amount)));
27447           arm_add_cfa_adjust_cfa_note (tmp, amount,
27448                                        stack_pointer_rtx, stack_pointer_rtx);
27449           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27450              not deleted.  */
27451           emit_insn (gen_force_register_use (stack_pointer_rtx));
27452         }
27453     }
27454
27455   if (TARGET_VFP_BASE)
27456     {
27457       /* Generate VFP register multi-pop.  */
27458       int end_reg = LAST_VFP_REGNUM + 1;
27459
27460       /* Scan the registers in reverse order.  We need to match
27461          any groupings made in the prologue and generate matching
27462          vldm operations.  The need to match groups is because,
27463          unlike pop, vldm can only do consecutive regs.  */
27464       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27465         /* Look for a case where a reg does not need restoring.  */
27466         if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27467           {
27468             /* Restore the regs discovered so far (from reg+2 to
27469                end_reg).  */
27470             if (end_reg > i + 2)
27471               arm_emit_vfp_multi_reg_pop (i + 2,
27472                                           (end_reg - (i + 2)) / 2,
27473                                           stack_pointer_rtx);
27474             end_reg = i;
27475           }
27476
27477       /* Restore the remaining regs that we have discovered (or possibly
27478          even all of them, if the conditional in the for loop never
27479          fired).  */
27480       if (end_reg > i + 2)
27481         arm_emit_vfp_multi_reg_pop (i + 2,
27482                                     (end_reg - (i + 2)) / 2,
27483                                     stack_pointer_rtx);
27484     }
27485
27486   if (TARGET_IWMMXT)
27487     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27488       if (reg_needs_saving_p (i))
27489         {
27490           rtx_insn *insn;
27491           rtx addr = gen_rtx_MEM (V2SImode,
27492                                   gen_rtx_POST_INC (SImode,
27493                                                     stack_pointer_rtx));
27494           set_mem_alias_set (addr, get_frame_alias_set ());
27495           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27496           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27497                                              gen_rtx_REG (V2SImode, i),
27498                                              NULL_RTX);
27499           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27500                                        stack_pointer_rtx, stack_pointer_rtx);
27501         }
27502
27503   if (saved_regs_mask)
27504     {
27505       rtx insn;
27506       bool return_in_pc = false;
27507
27508       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27509           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27510           && !IS_CMSE_ENTRY (func_type)
27511           && !IS_STACKALIGN (func_type)
27512           && really_return
27513           && crtl->args.pretend_args_size == 0
27514           && saved_regs_mask & (1 << LR_REGNUM)
27515           && !crtl->calls_eh_return)
27516         {
27517           saved_regs_mask &= ~(1 << LR_REGNUM);
27518           saved_regs_mask |= (1 << PC_REGNUM);
27519           return_in_pc = true;
27520         }
27521
27522       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27523         {
27524           for (i = 0; i <= LAST_ARM_REGNUM; i++)
27525             if (saved_regs_mask & (1 << i))
27526               {
27527                 rtx addr = gen_rtx_MEM (SImode,
27528                                         gen_rtx_POST_INC (SImode,
27529                                                           stack_pointer_rtx));
27530                 set_mem_alias_set (addr, get_frame_alias_set ());
27531
27532                 if (i == PC_REGNUM)
27533                   {
27534                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27535                     XVECEXP (insn, 0, 0) = ret_rtx;
27536                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
27537                                                         addr);
27538                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27539                     insn = emit_jump_insn (insn);
27540                   }
27541                 else
27542                   {
27543                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27544                                                  addr));
27545                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27546                                                        gen_rtx_REG (SImode, i),
27547                                                        NULL_RTX);
27548                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27549                                                  stack_pointer_rtx,
27550                                                  stack_pointer_rtx);
27551                   }
27552               }
27553         }
27554       else
27555         {
27556           if (TARGET_LDRD
27557               && current_tune->prefer_ldrd_strd
27558               && !optimize_function_for_size_p (cfun))
27559             {
27560               if (TARGET_THUMB2)
27561                 thumb2_emit_ldrd_pop (saved_regs_mask);
27562               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27563                 arm_emit_ldrd_pop (saved_regs_mask);
27564               else
27565                 arm_emit_multi_reg_pop (saved_regs_mask);
27566             }
27567           else
27568             arm_emit_multi_reg_pop (saved_regs_mask);
27569         }
27570
27571       if (return_in_pc)
27572         return;
27573     }
27574
27575   amount
27576     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
27577   if (amount)
27578     {
27579       int i, j;
27580       rtx dwarf = NULL_RTX;
27581       rtx_insn *tmp =
27582         emit_insn (gen_addsi3 (stack_pointer_rtx,
27583                                stack_pointer_rtx,
27584                                GEN_INT (amount)));
27585
27586       RTX_FRAME_RELATED_P (tmp) = 1;
27587
27588       if (cfun->machine->uses_anonymous_args)
27589         {
27590           /* Restore pretend args.  Refer arm_expand_prologue on how to save
27591              pretend_args in stack.  */
27592           int num_regs = crtl->args.pretend_args_size / 4;
27593           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27594           for (j = 0, i = 0; j < num_regs; i++)
27595             if (saved_regs_mask & (1 << i))
27596               {
27597                 rtx reg = gen_rtx_REG (SImode, i);
27598                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27599                 j++;
27600               }
27601           REG_NOTES (tmp) = dwarf;
27602         }
27603       arm_add_cfa_adjust_cfa_note (tmp, amount,
27604                                    stack_pointer_rtx, stack_pointer_rtx);
27605     }
27606
27607   if (IS_CMSE_ENTRY (func_type))
27608     {
27609       /* CMSE_ENTRY always returns.  */
27610       gcc_assert (really_return);
27611       /* Clear all caller-saved regs that are not used to return.  */
27612       cmse_nonsecure_entry_clear_before_return ();
27613
27614       /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
27615          VLDR.  */
27616       if (TARGET_HAVE_FPCXT_CMSE)
27617         {
27618           rtx_insn *insn;
27619
27620           insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
27621                                                    GEN_INT (FPCXTNS_ENUM)));
27622           rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
27623                                   plus_constant (Pmode, stack_pointer_rtx, 4));
27624           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27625           RTX_FRAME_RELATED_P (insn) = 1;
27626         }
27627     }
27628
27629   if (!really_return)
27630     return;
27631
27632   if (crtl->calls_eh_return)
27633     emit_insn (gen_addsi3 (stack_pointer_rtx,
27634                            stack_pointer_rtx,
27635                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27636
27637   if (IS_STACKALIGN (func_type))
27638     /* Restore the original stack pointer.  Before prologue, the stack was
27639        realigned and the original stack pointer saved in r0.  For details,
27640        see comment in arm_expand_prologue.  */
27641     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27642
27643   emit_jump_insn (simple_return_rtx);
27644 }
27645
27646 /* Implementation of insn prologue_thumb1_interwork.  This is the first
27647    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
27648
27649 const char *
27650 thumb1_output_interwork (void)
27651 {
27652   const char * name;
27653   FILE *f = asm_out_file;
27654
27655   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27656   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27657               == SYMBOL_REF);
27658   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27659
27660   /* Generate code sequence to switch us into Thumb mode.  */
27661   /* The .code 32 directive has already been emitted by
27662      ASM_DECLARE_FUNCTION_NAME.  */
27663   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27664   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27665
27666   /* Generate a label, so that the debugger will notice the
27667      change in instruction sets.  This label is also used by
27668      the assembler to bypass the ARM code when this function
27669      is called from a Thumb encoded function elsewhere in the
27670      same file.  Hence the definition of STUB_NAME here must
27671      agree with the definition in gas/config/tc-arm.c.  */
27672
27673 #define STUB_NAME ".real_start_of"
27674
27675   fprintf (f, "\t.code\t16\n");
27676 #ifdef ARM_PE
27677   if (arm_dllexport_name_p (name))
27678     name = arm_strip_name_encoding (name);
27679 #endif
27680   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27681   fprintf (f, "\t.thumb_func\n");
27682   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27683
27684   return "";
27685 }
27686
27687 /* Handle the case of a double word load into a low register from
27688    a computed memory address.  The computed address may involve a
27689    register which is overwritten by the load.  */
27690 const char *
27691 thumb_load_double_from_address (rtx *operands)
27692 {
27693   rtx addr;
27694   rtx base;
27695   rtx offset;
27696   rtx arg1;
27697   rtx arg2;
27698
27699   gcc_assert (REG_P (operands[0]));
27700   gcc_assert (MEM_P (operands[1]));
27701
27702   /* Get the memory address.  */
27703   addr = XEXP (operands[1], 0);
27704
27705   /* Work out how the memory address is computed.  */
27706   switch (GET_CODE (addr))
27707     {
27708     case REG:
27709       operands[2] = adjust_address (operands[1], SImode, 4);
27710
27711       if (REGNO (operands[0]) == REGNO (addr))
27712         {
27713           output_asm_insn ("ldr\t%H0, %2", operands);
27714           output_asm_insn ("ldr\t%0, %1", operands);
27715         }
27716       else
27717         {
27718           output_asm_insn ("ldr\t%0, %1", operands);
27719           output_asm_insn ("ldr\t%H0, %2", operands);
27720         }
27721       break;
27722
27723     case CONST:
27724       /* Compute <address> + 4 for the high order load.  */
27725       operands[2] = adjust_address (operands[1], SImode, 4);
27726
27727       output_asm_insn ("ldr\t%0, %1", operands);
27728       output_asm_insn ("ldr\t%H0, %2", operands);
27729       break;
27730
27731     case PLUS:
27732       arg1   = XEXP (addr, 0);
27733       arg2   = XEXP (addr, 1);
27734
27735       if (CONSTANT_P (arg1))
27736         base = arg2, offset = arg1;
27737       else
27738         base = arg1, offset = arg2;
27739
27740       gcc_assert (REG_P (base));
27741
27742       /* Catch the case of <address> = <reg> + <reg> */
27743       if (REG_P (offset))
27744         {
27745           int reg_offset = REGNO (offset);
27746           int reg_base   = REGNO (base);
27747           int reg_dest   = REGNO (operands[0]);
27748
27749           /* Add the base and offset registers together into the
27750              higher destination register.  */
27751           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
27752                        reg_dest + 1, reg_base, reg_offset);
27753
27754           /* Load the lower destination register from the address in
27755              the higher destination register.  */
27756           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
27757                        reg_dest, reg_dest + 1);
27758
27759           /* Load the higher destination register from its own address
27760              plus 4.  */
27761           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
27762                        reg_dest + 1, reg_dest + 1);
27763         }
27764       else
27765         {
27766           /* Compute <address> + 4 for the high order load.  */
27767           operands[2] = adjust_address (operands[1], SImode, 4);
27768
27769           /* If the computed address is held in the low order register
27770              then load the high order register first, otherwise always
27771              load the low order register first.  */
27772           if (REGNO (operands[0]) == REGNO (base))
27773             {
27774               output_asm_insn ("ldr\t%H0, %2", operands);
27775               output_asm_insn ("ldr\t%0, %1", operands);
27776             }
27777           else
27778             {
27779               output_asm_insn ("ldr\t%0, %1", operands);
27780               output_asm_insn ("ldr\t%H0, %2", operands);
27781             }
27782         }
27783       break;
27784
27785     case LABEL_REF:
27786       /* With no registers to worry about we can just load the value
27787          directly.  */
27788       operands[2] = adjust_address (operands[1], SImode, 4);
27789
27790       output_asm_insn ("ldr\t%H0, %2", operands);
27791       output_asm_insn ("ldr\t%0, %1", operands);
27792       break;
27793
27794     default:
27795       gcc_unreachable ();
27796     }
27797
27798   return "";
27799 }
27800
27801 const char *
27802 thumb_output_move_mem_multiple (int n, rtx *operands)
27803 {
27804   switch (n)
27805     {
27806     case 2:
27807       if (REGNO (operands[4]) > REGNO (operands[5]))
27808         std::swap (operands[4], operands[5]);
27809
27810       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27811       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27812       break;
27813
27814     case 3:
27815       if (REGNO (operands[4]) > REGNO (operands[5]))
27816         std::swap (operands[4], operands[5]);
27817       if (REGNO (operands[5]) > REGNO (operands[6]))
27818         std::swap (operands[5], operands[6]);
27819       if (REGNO (operands[4]) > REGNO (operands[5]))
27820         std::swap (operands[4], operands[5]);
27821
27822       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27823       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27824       break;
27825
27826     default:
27827       gcc_unreachable ();
27828     }
27829
27830   return "";
27831 }
27832
27833 /* Output a call-via instruction for thumb state.  */
27834 const char *
27835 thumb_call_via_reg (rtx reg)
27836 {
27837   int regno = REGNO (reg);
27838   rtx *labelp;
27839
27840   gcc_assert (regno < LR_REGNUM);
27841
27842   /* If we are in the normal text section we can use a single instance
27843      per compilation unit.  If we are doing function sections, then we need
27844      an entry per section, since we can't rely on reachability.  */
27845   if (in_section == text_section)
27846     {
27847       thumb_call_reg_needed = 1;
27848
27849       if (thumb_call_via_label[regno] == NULL)
27850         thumb_call_via_label[regno] = gen_label_rtx ();
27851       labelp = thumb_call_via_label + regno;
27852     }
27853   else
27854     {
27855       if (cfun->machine->call_via[regno] == NULL)
27856         cfun->machine->call_via[regno] = gen_label_rtx ();
27857       labelp = cfun->machine->call_via + regno;
27858     }
27859
27860   output_asm_insn ("bl\t%a0", labelp);
27861   return "";
27862 }
27863
27864 /* Routines for generating rtl.  */
27865 void
27866 thumb_expand_cpymemqi (rtx *operands)
27867 {
27868   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
27869   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
27870   HOST_WIDE_INT len = INTVAL (operands[2]);
27871   HOST_WIDE_INT offset = 0;
27872
27873   while (len >= 12)
27874     {
27875       emit_insn (gen_cpymem12b (out, in, out, in));
27876       len -= 12;
27877     }
27878
27879   if (len >= 8)
27880     {
27881       emit_insn (gen_cpymem8b (out, in, out, in));
27882       len -= 8;
27883     }
27884
27885   if (len >= 4)
27886     {
27887       rtx reg = gen_reg_rtx (SImode);
27888       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
27889       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
27890       len -= 4;
27891       offset += 4;
27892     }
27893
27894   if (len >= 2)
27895     {
27896       rtx reg = gen_reg_rtx (HImode);
27897       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
27898                                               plus_constant (Pmode, in,
27899                                                              offset))));
27900       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
27901                                                                 offset)),
27902                             reg));
27903       len -= 2;
27904       offset += 2;
27905     }
27906
27907   if (len)
27908     {
27909       rtx reg = gen_reg_rtx (QImode);
27910       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
27911                                               plus_constant (Pmode, in,
27912                                                              offset))));
27913       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
27914                                                                 offset)),
27915                             reg));
27916     }
27917 }
27918
27919 void
27920 thumb_reload_out_hi (rtx *operands)
27921 {
27922   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
27923 }
27924
27925 /* Return the length of a function name prefix
27926     that starts with the character 'c'.  */
27927 static int
27928 arm_get_strip_length (int c)
27929 {
27930   switch (c)
27931     {
27932     ARM_NAME_ENCODING_LENGTHS
27933       default: return 0;
27934     }
27935 }
27936
27937 /* Return a pointer to a function's name with any
27938    and all prefix encodings stripped from it.  */
27939 const char *
27940 arm_strip_name_encoding (const char *name)
27941 {
27942   int skip;
27943
27944   while ((skip = arm_get_strip_length (* name)))
27945     name += skip;
27946
27947   return name;
27948 }
27949
27950 /* If there is a '*' anywhere in the name's prefix, then
27951    emit the stripped name verbatim, otherwise prepend an
27952    underscore if leading underscores are being used.  */
27953 void
27954 arm_asm_output_labelref (FILE *stream, const char *name)
27955 {
27956   int skip;
27957   int verbatim = 0;
27958
27959   while ((skip = arm_get_strip_length (* name)))
27960     {
27961       verbatim |= (*name == '*');
27962       name += skip;
27963     }
27964
27965   if (verbatim)
27966     fputs (name, stream);
27967   else
27968     asm_fprintf (stream, "%U%s", name);
27969 }
27970
27971 /* This function is used to emit an EABI tag and its associated value.
27972    We emit the numerical value of the tag in case the assembler does not
27973    support textual tags.  (Eg gas prior to 2.20).  If requested we include
27974    the tag name in a comment so that anyone reading the assembler output
27975    will know which tag is being set.
27976
27977    This function is not static because arm-c.c needs it too.  */
27978
27979 void
27980 arm_emit_eabi_attribute (const char *name, int num, int val)
27981 {
27982   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
27983   if (flag_verbose_asm || flag_debug_asm)
27984     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
27985   asm_fprintf (asm_out_file, "\n");
27986 }
27987
27988 /* This function is used to print CPU tuning information as comment
27989    in assembler file.  Pointers are not printed for now.  */
27990
27991 void
27992 arm_print_tune_info (void)
27993 {
27994   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
27995   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
27996                current_tune->constant_limit);
27997   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27998                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
27999   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28000                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28001   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28002                "prefetch.l1_cache_size:\t%d\n",
28003                current_tune->prefetch.l1_cache_size);
28004   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28005                "prefetch.l1_cache_line_size:\t%d\n",
28006                current_tune->prefetch.l1_cache_line_size);
28007   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28008                "prefer_constant_pool:\t%d\n",
28009                (int) current_tune->prefer_constant_pool);
28010   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28011                "branch_cost:\t(s:speed, p:predictable)\n");
28012   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28013   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28014                current_tune->branch_cost (false, false));
28015   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28016                current_tune->branch_cost (false, true));
28017   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28018                current_tune->branch_cost (true, false));
28019   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28020                current_tune->branch_cost (true, true));
28021   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28022                "prefer_ldrd_strd:\t%d\n",
28023                (int) current_tune->prefer_ldrd_strd);
28024   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28025                "logical_op_non_short_circuit:\t[%d,%d]\n",
28026                (int) current_tune->logical_op_non_short_circuit_thumb,
28027                (int) current_tune->logical_op_non_short_circuit_arm);
28028   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28029                "disparage_flag_setting_t16_encodings:\t%d\n",
28030                (int) current_tune->disparage_flag_setting_t16_encodings);
28031   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28032                "string_ops_prefer_neon:\t%d\n",
28033                (int) current_tune->string_ops_prefer_neon);
28034   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28035                "max_insns_inline_memset:\t%d\n",
28036                current_tune->max_insns_inline_memset);
28037   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28038                current_tune->fusible_ops);
28039   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28040                (int) current_tune->sched_autopref);
28041 }
28042
28043 /* Print .arch and .arch_extension directives corresponding to the
28044    current architecture configuration.  */
28045 static void
28046 arm_print_asm_arch_directives ()
28047 {
28048   const arch_option *arch
28049     = arm_parse_arch_option_name (all_architectures, "-march",
28050                                   arm_active_target.arch_name);
28051   auto_sbitmap opt_bits (isa_num_bits);
28052
28053   gcc_assert (arch);
28054
28055   asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
28056   arm_last_printed_arch_string = arm_active_target.arch_name;
28057   if (!arch->common.extensions)
28058     return;
28059
28060   for (const struct cpu_arch_extension *opt = arch->common.extensions;
28061        opt->name != NULL;
28062        opt++)
28063     {
28064       if (!opt->remove)
28065         {
28066           arm_initialize_isa (opt_bits, opt->isa_bits);
28067
28068           /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28069              "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28070              floating point instructions is disabled.  So the following check
28071              restricts the printing of ".arch_extension mve" and
28072              ".arch_extension fp" (for mve.fp) in the assembly file.  MVE needs
28073              this special behaviour because the feature bit "mve" and
28074              "mve_float" are not part of "fpu bits", so they are not cleared
28075              when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28076              TARGET_HAVE_MVE_FLOAT are disabled.  */
28077           if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28078               || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28079                   && !TARGET_HAVE_MVE_FLOAT))
28080             continue;
28081
28082           /* If every feature bit of this option is set in the target
28083              ISA specification, print out the option name.  However,
28084              don't print anything if all the bits are part of the
28085              FPU specification.  */
28086           if (bitmap_subset_p (opt_bits, arm_active_target.isa)
28087               && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28088             asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
28089         }
28090     }
28091 }
28092
28093 static void
28094 arm_file_start (void)
28095 {
28096   int val;
28097
28098   if (TARGET_BPABI)
28099     {
28100       /* We don't have a specified CPU.  Use the architecture to
28101          generate the tags.
28102
28103          Note: it might be better to do this unconditionally, then the
28104          assembler would not need to know about all new CPU names as
28105          they are added.  */
28106       if (!arm_active_target.core_name)
28107         {
28108           /* armv7ve doesn't support any extensions.  */
28109           if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
28110             {
28111               /* Keep backward compatability for assemblers
28112                  which don't support armv7ve.  */
28113               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28114               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28115               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28116               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28117               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28118               arm_last_printed_arch_string = "armv7ve";
28119             }
28120           else
28121             arm_print_asm_arch_directives ();
28122         }
28123       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
28124         {
28125           asm_fprintf (asm_out_file, "\t.arch %s\n",
28126                        arm_active_target.core_name + 8);
28127           arm_last_printed_arch_string = arm_active_target.core_name + 8;
28128         }
28129       else
28130         {
28131           const char* truncated_name
28132             = arm_rewrite_selected_cpu (arm_active_target.core_name);
28133           if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28134             asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28135                          truncated_name);
28136           else
28137             asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28138         }
28139
28140       if (print_tune_info)
28141         arm_print_tune_info ();
28142
28143       if (! TARGET_SOFT_FLOAT)
28144         {
28145           if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28146             arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28147
28148           if (TARGET_HARD_FLOAT_ABI)
28149             arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28150         }
28151
28152       /* Some of these attributes only apply when the corresponding features
28153          are used.  However we don't have any easy way of figuring this out.
28154          Conservatively record the setting that would have been used.  */
28155
28156       if (flag_rounding_math)
28157         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28158
28159       if (!flag_unsafe_math_optimizations)
28160         {
28161           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28162           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28163         }
28164       if (flag_signaling_nans)
28165         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28166
28167       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28168                            flag_finite_math_only ? 1 : 3);
28169
28170       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28171       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28172       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28173                                flag_short_enums ? 1 : 2);
28174
28175       /* Tag_ABI_optimization_goals.  */
28176       if (optimize_size)
28177         val = 4;
28178       else if (optimize >= 2)
28179         val = 2;
28180       else if (optimize)
28181         val = 1;
28182       else
28183         val = 6;
28184       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28185
28186       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28187                                unaligned_access);
28188
28189       if (arm_fp16_format)
28190         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28191                              (int) arm_fp16_format);
28192
28193       if (arm_lang_output_object_attributes_hook)
28194         arm_lang_output_object_attributes_hook();
28195     }
28196
28197   default_file_start ();
28198 }
28199
28200 static void
28201 arm_file_end (void)
28202 {
28203   int regno;
28204
28205   if (NEED_INDICATE_EXEC_STACK)
28206     /* Add .note.GNU-stack.  */
28207     file_end_indicate_exec_stack ();
28208
28209   if (! thumb_call_reg_needed)
28210     return;
28211
28212   switch_to_section (text_section);
28213   asm_fprintf (asm_out_file, "\t.code 16\n");
28214   ASM_OUTPUT_ALIGN (asm_out_file, 1);
28215
28216   for (regno = 0; regno < LR_REGNUM; regno++)
28217     {
28218       rtx label = thumb_call_via_label[regno];
28219
28220       if (label != 0)
28221         {
28222           targetm.asm_out.internal_label (asm_out_file, "L",
28223                                           CODE_LABEL_NUMBER (label));
28224           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28225         }
28226     }
28227 }
28228
28229 #ifndef ARM_PE
28230 /* Symbols in the text segment can be accessed without indirecting via the
28231    constant pool; it may take an extra binary operation, but this is still
28232    faster than indirecting via memory.  Don't do this when not optimizing,
28233    since we won't be calculating al of the offsets necessary to do this
28234    simplification.  */
28235
28236 static void
28237 arm_encode_section_info (tree decl, rtx rtl, int first)
28238 {
28239   if (optimize > 0 && TREE_CONSTANT (decl))
28240     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28241
28242   default_encode_section_info (decl, rtl, first);
28243 }
28244 #endif /* !ARM_PE */
28245
28246 static void
28247 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28248 {
28249   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28250       && !strcmp (prefix, "L"))
28251     {
28252       arm_ccfsm_state = 0;
28253       arm_target_insn = NULL;
28254     }
28255   default_internal_label (stream, prefix, labelno);
28256 }
28257
28258 /* Output code to add DELTA to the first argument, and then jump
28259    to FUNCTION.  Used for C++ multiple inheritance.  */
28260
28261 static void
28262 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28263                      HOST_WIDE_INT, tree function)
28264 {
28265   static int thunk_label = 0;
28266   char label[256];
28267   char labelpc[256];
28268   int mi_delta = delta;
28269   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28270   int shift = 0;
28271   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28272                     ? 1 : 0);
28273   if (mi_delta < 0)
28274     mi_delta = - mi_delta;
28275
28276   final_start_function (emit_barrier (), file, 1);
28277
28278   if (TARGET_THUMB1)
28279     {
28280       int labelno = thunk_label++;
28281       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28282       /* Thunks are entered in arm mode when available.  */
28283       if (TARGET_THUMB1_ONLY)
28284         {
28285           /* push r3 so we can use it as a temporary.  */
28286           /* TODO: Omit this save if r3 is not used.  */
28287           fputs ("\tpush {r3}\n", file);
28288
28289           /* With -mpure-code, we cannot load the address from the
28290              constant pool: we build it explicitly.  */
28291           if (target_pure_code)
28292             {
28293               fputs ("\tmovs\tr3, #:upper8_15:#", file);
28294               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28295               fputc ('\n', file);
28296               fputs ("\tlsls r3, #8\n", file);
28297               fputs ("\tadds\tr3, #:upper0_7:#", file);
28298               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28299               fputc ('\n', file);
28300               fputs ("\tlsls r3, #8\n", file);
28301               fputs ("\tadds\tr3, #:lower8_15:#", file);
28302               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28303               fputc ('\n', file);
28304               fputs ("\tlsls r3, #8\n", file);
28305               fputs ("\tadds\tr3, #:lower0_7:#", file);
28306               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28307               fputc ('\n', file);
28308             }
28309           else
28310             fputs ("\tldr\tr3, ", file);
28311         }
28312       else
28313         {
28314           fputs ("\tldr\tr12, ", file);
28315         }
28316
28317       if (!target_pure_code)
28318         {
28319           assemble_name (file, label);
28320           fputc ('\n', file);
28321         }
28322
28323       if (flag_pic)
28324         {
28325           /* If we are generating PIC, the ldr instruction below loads
28326              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
28327              the address of the add + 8, so we have:
28328
28329              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28330                  = target + 1.
28331
28332              Note that we have "+ 1" because some versions of GNU ld
28333              don't set the low bit of the result for R_ARM_REL32
28334              relocations against thumb function symbols.
28335              On ARMv6M this is +4, not +8.  */
28336           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28337           assemble_name (file, labelpc);
28338           fputs (":\n", file);
28339           if (TARGET_THUMB1_ONLY)
28340             {
28341               /* This is 2 insns after the start of the thunk, so we know it
28342                  is 4-byte aligned.  */
28343               fputs ("\tadd\tr3, pc, r3\n", file);
28344               fputs ("\tmov r12, r3\n", file);
28345             }
28346           else
28347             fputs ("\tadd\tr12, pc, r12\n", file);
28348         }
28349       else if (TARGET_THUMB1_ONLY)
28350         fputs ("\tmov r12, r3\n", file);
28351     }
28352   if (TARGET_THUMB1_ONLY)
28353     {
28354       if (mi_delta > 255)
28355         {
28356           fputs ("\tldr\tr3, ", file);
28357           assemble_name (file, label);
28358           fputs ("+4\n", file);
28359           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
28360                        mi_op, this_regno, this_regno);
28361         }
28362       else if (mi_delta != 0)
28363         {
28364           /* Thumb1 unified syntax requires s suffix in instruction name when
28365              one of the operands is immediate.  */
28366           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
28367                        mi_op, this_regno, this_regno,
28368                        mi_delta);
28369         }
28370     }
28371   else
28372     {
28373       /* TODO: Use movw/movt for large constants when available.  */
28374       while (mi_delta != 0)
28375         {
28376           if ((mi_delta & (3 << shift)) == 0)
28377             shift += 2;
28378           else
28379             {
28380               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28381                            mi_op, this_regno, this_regno,
28382                            mi_delta & (0xff << shift));
28383               mi_delta &= ~(0xff << shift);
28384               shift += 8;
28385             }
28386         }
28387     }
28388   if (TARGET_THUMB1)
28389     {
28390       if (TARGET_THUMB1_ONLY)
28391         fputs ("\tpop\t{r3}\n", file);
28392
28393       fprintf (file, "\tbx\tr12\n");
28394       ASM_OUTPUT_ALIGN (file, 2);
28395       assemble_name (file, label);
28396       fputs (":\n", file);
28397       if (flag_pic)
28398         {
28399           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
28400           rtx tem = XEXP (DECL_RTL (function), 0);
28401           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28402              pipeline offset is four rather than eight.  Adjust the offset
28403              accordingly.  */
28404           tem = plus_constant (GET_MODE (tem), tem,
28405                                TARGET_THUMB1_ONLY ? -3 : -7);
28406           tem = gen_rtx_MINUS (GET_MODE (tem),
28407                                tem,
28408                                gen_rtx_SYMBOL_REF (Pmode,
28409                                                    ggc_strdup (labelpc)));
28410           assemble_integer (tem, 4, BITS_PER_WORD, 1);
28411         }
28412       else
28413         /* Output ".word .LTHUNKn".  */
28414         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28415
28416       if (TARGET_THUMB1_ONLY && mi_delta > 255)
28417         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28418     }
28419   else
28420     {
28421       fputs ("\tb\t", file);
28422       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28423       if (NEED_PLT_RELOC)
28424         fputs ("(PLT)", file);
28425       fputc ('\n', file);
28426     }
28427
28428   final_end_function ();
28429 }
28430
28431 /* MI thunk handling for TARGET_32BIT.  */
28432
28433 static void
28434 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28435                        HOST_WIDE_INT vcall_offset, tree function)
28436 {
28437   const bool long_call_p = arm_is_long_call_p (function);
28438
28439   /* On ARM, this_regno is R0 or R1 depending on
28440      whether the function returns an aggregate or not.
28441   */
28442   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
28443                                        function)
28444                     ? R1_REGNUM : R0_REGNUM);
28445
28446   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
28447   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
28448   reload_completed = 1;
28449   emit_note (NOTE_INSN_PROLOGUE_END);
28450
28451   /* Add DELTA to THIS_RTX.  */
28452   if (delta != 0)
28453     arm_split_constant (PLUS, Pmode, NULL_RTX,
28454                         delta, this_rtx, this_rtx, false);
28455
28456   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
28457   if (vcall_offset != 0)
28458     {
28459       /* Load *THIS_RTX.  */
28460       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
28461       /* Compute *THIS_RTX + VCALL_OFFSET.  */
28462       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
28463                           false);
28464       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
28465       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
28466       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
28467     }
28468
28469   /* Generate a tail call to the target function.  */
28470   if (!TREE_USED (function))
28471     {
28472       assemble_external (function);
28473       TREE_USED (function) = 1;
28474     }
28475   rtx funexp = XEXP (DECL_RTL (function), 0);
28476   if (long_call_p)
28477     {
28478       emit_move_insn (temp, funexp);
28479       funexp = temp;
28480     }
28481   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
28482   rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
28483   SIBLING_CALL_P (insn) = 1;
28484   emit_barrier ();
28485
28486   /* Indirect calls require a bit of fixup in PIC mode.  */
28487   if (long_call_p)
28488     {
28489       split_all_insns_noflow ();
28490       arm_reorg ();
28491     }
28492
28493   insn = get_insns ();
28494   shorten_branches (insn);
28495   final_start_function (insn, file, 1);
28496   final (insn, file, 1);
28497   final_end_function ();
28498
28499   /* Stop pretending this is a post-reload pass.  */
28500   reload_completed = 0;
28501 }
28502
28503 /* Output code to add DELTA to the first argument, and then jump
28504    to FUNCTION.  Used for C++ multiple inheritance.  */
28505
28506 static void
28507 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
28508                      HOST_WIDE_INT vcall_offset, tree function)
28509 {
28510   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
28511
28512   assemble_start_function (thunk, fnname);
28513   if (TARGET_32BIT)
28514     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
28515   else
28516     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
28517   assemble_end_function (thunk, fnname);
28518 }
28519
28520 int
28521 arm_emit_vector_const (FILE *file, rtx x)
28522 {
28523   int i;
28524   const char * pattern;
28525
28526   gcc_assert (GET_CODE (x) == CONST_VECTOR);
28527
28528   switch (GET_MODE (x))
28529     {
28530     case E_V2SImode: pattern = "%08x"; break;
28531     case E_V4HImode: pattern = "%04x"; break;
28532     case E_V8QImode: pattern = "%02x"; break;
28533     default:       gcc_unreachable ();
28534     }
28535
28536   fprintf (file, "0x");
28537   for (i = CONST_VECTOR_NUNITS (x); i--;)
28538     {
28539       rtx element;
28540
28541       element = CONST_VECTOR_ELT (x, i);
28542       fprintf (file, pattern, INTVAL (element));
28543     }
28544
28545   return 1;
28546 }
28547
28548 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28549    HFmode constant pool entries are actually loaded with ldr.  */
28550 void
28551 arm_emit_fp16_const (rtx c)
28552 {
28553   long bits;
28554
28555   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
28556   if (WORDS_BIG_ENDIAN)
28557     assemble_zeros (2);
28558   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28559   if (!WORDS_BIG_ENDIAN)
28560     assemble_zeros (2);
28561 }
28562
28563 const char *
28564 arm_output_load_gr (rtx *operands)
28565 {
28566   rtx reg;
28567   rtx offset;
28568   rtx wcgr;
28569   rtx sum;
28570
28571   if (!MEM_P (operands [1])
28572       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28573       || !REG_P (reg = XEXP (sum, 0))
28574       || !CONST_INT_P (offset = XEXP (sum, 1))
28575       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28576     return "wldrw%?\t%0, %1";
28577
28578   /* Fix up an out-of-range load of a GR register.  */
28579   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28580   wcgr = operands[0];
28581   operands[0] = reg;
28582   output_asm_insn ("ldr%?\t%0, %1", operands);
28583
28584   operands[0] = wcgr;
28585   operands[1] = reg;
28586   output_asm_insn ("tmcr%?\t%0, %1", operands);
28587   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28588
28589   return "";
28590 }
28591
28592 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28593
28594    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28595    named arg and all anonymous args onto the stack.
28596    XXX I know the prologue shouldn't be pushing registers, but it is faster
28597    that way.  */
28598
28599 static void
28600 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28601                             const function_arg_info &arg,
28602                             int *pretend_size,
28603                             int second_time ATTRIBUTE_UNUSED)
28604 {
28605   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28606   int nregs;
28607
28608   cfun->machine->uses_anonymous_args = 1;
28609   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28610     {
28611       nregs = pcum->aapcs_ncrn;
28612       if (nregs & 1)
28613         {
28614           int res = arm_needs_doubleword_align (arg.mode, arg.type);
28615           if (res < 0 && warn_psabi)
28616             inform (input_location, "parameter passing for argument of "
28617                     "type %qT changed in GCC 7.1", arg.type);
28618           else if (res > 0)
28619             {
28620               nregs++;
28621               if (res > 1 && warn_psabi)
28622                 inform (input_location,
28623                         "parameter passing for argument of type "
28624                         "%qT changed in GCC 9.1", arg.type);
28625             }
28626         }
28627     }
28628   else
28629     nregs = pcum->nregs;
28630
28631   if (nregs < NUM_ARG_REGS)
28632     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28633 }
28634
28635 /* We can't rely on the caller doing the proper promotion when
28636    using APCS or ATPCS.  */
28637
28638 static bool
28639 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28640 {
28641     return !TARGET_AAPCS_BASED;
28642 }
28643
28644 static machine_mode
28645 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28646                            machine_mode mode,
28647                            int *punsignedp ATTRIBUTE_UNUSED,
28648                            const_tree fntype ATTRIBUTE_UNUSED,
28649                            int for_return ATTRIBUTE_UNUSED)
28650 {
28651   if (GET_MODE_CLASS (mode) == MODE_INT
28652       && GET_MODE_SIZE (mode) < 4)
28653     return SImode;
28654
28655   return mode;
28656 }
28657
28658
28659 static bool
28660 arm_default_short_enums (void)
28661 {
28662   return ARM_DEFAULT_SHORT_ENUMS;
28663 }
28664
28665
28666 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
28667
28668 static bool
28669 arm_align_anon_bitfield (void)
28670 {
28671   return TARGET_AAPCS_BASED;
28672 }
28673
28674
28675 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
28676
28677 static tree
28678 arm_cxx_guard_type (void)
28679 {
28680   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28681 }
28682
28683
28684 /* The EABI says test the least significant bit of a guard variable.  */
28685
28686 static bool
28687 arm_cxx_guard_mask_bit (void)
28688 {
28689   return TARGET_AAPCS_BASED;
28690 }
28691
28692
28693 /* The EABI specifies that all array cookies are 8 bytes long.  */
28694
28695 static tree
28696 arm_get_cookie_size (tree type)
28697 {
28698   tree size;
28699
28700   if (!TARGET_AAPCS_BASED)
28701     return default_cxx_get_cookie_size (type);
28702
28703   size = build_int_cst (sizetype, 8);
28704   return size;
28705 }
28706
28707
28708 /* The EABI says that array cookies should also contain the element size.  */
28709
28710 static bool
28711 arm_cookie_has_size (void)
28712 {
28713   return TARGET_AAPCS_BASED;
28714 }
28715
28716
28717 /* The EABI says constructors and destructors should return a pointer to
28718    the object constructed/destroyed.  */
28719
28720 static bool
28721 arm_cxx_cdtor_returns_this (void)
28722 {
28723   return TARGET_AAPCS_BASED;
28724 }
28725
28726 /* The EABI says that an inline function may never be the key
28727    method.  */
28728
28729 static bool
28730 arm_cxx_key_method_may_be_inline (void)
28731 {
28732   return !TARGET_AAPCS_BASED;
28733 }
28734
28735 static void
28736 arm_cxx_determine_class_data_visibility (tree decl)
28737 {
28738   if (!TARGET_AAPCS_BASED
28739       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28740     return;
28741
28742   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28743      is exported.  However, on systems without dynamic vague linkage,
28744      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
28745   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28746     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28747   else
28748     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28749   DECL_VISIBILITY_SPECIFIED (decl) = 1;
28750 }
28751
28752 static bool
28753 arm_cxx_class_data_always_comdat (void)
28754 {
28755   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28756      vague linkage if the class has no key function.  */
28757   return !TARGET_AAPCS_BASED;
28758 }
28759
28760
28761 /* The EABI says __aeabi_atexit should be used to register static
28762    destructors.  */
28763
28764 static bool
28765 arm_cxx_use_aeabi_atexit (void)
28766 {
28767   return TARGET_AAPCS_BASED;
28768 }
28769
28770
28771 void
28772 arm_set_return_address (rtx source, rtx scratch)
28773 {
28774   arm_stack_offsets *offsets;
28775   HOST_WIDE_INT delta;
28776   rtx addr, mem;
28777   unsigned long saved_regs;
28778
28779   offsets = arm_get_frame_offsets ();
28780   saved_regs = offsets->saved_regs_mask;
28781
28782   if ((saved_regs & (1 << LR_REGNUM)) == 0)
28783     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28784   else
28785     {
28786       if (frame_pointer_needed)
28787         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28788       else
28789         {
28790           /* LR will be the first saved register.  */
28791           delta = offsets->outgoing_args - (offsets->frame + 4);
28792
28793
28794           if (delta >= 4096)
28795             {
28796               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28797                                      GEN_INT (delta & ~4095)));
28798               addr = scratch;
28799               delta &= 4095;
28800             }
28801           else
28802             addr = stack_pointer_rtx;
28803
28804           addr = plus_constant (Pmode, addr, delta);
28805         }
28806
28807       /* The store needs to be marked to prevent DSE from deleting
28808          it as dead if it is based on fp.  */
28809       mem = gen_frame_mem (Pmode, addr);
28810       MEM_VOLATILE_P (mem) = true;
28811       emit_move_insn (mem, source);
28812     }
28813 }
28814
28815
28816 void
28817 thumb_set_return_address (rtx source, rtx scratch)
28818 {
28819   arm_stack_offsets *offsets;
28820   HOST_WIDE_INT delta;
28821   HOST_WIDE_INT limit;
28822   int reg;
28823   rtx addr, mem;
28824   unsigned long mask;
28825
28826   emit_use (source);
28827
28828   offsets = arm_get_frame_offsets ();
28829   mask = offsets->saved_regs_mask;
28830   if (mask & (1 << LR_REGNUM))
28831     {
28832       limit = 1024;
28833       /* Find the saved regs.  */
28834       if (frame_pointer_needed)
28835         {
28836           delta = offsets->soft_frame - offsets->saved_args;
28837           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28838           if (TARGET_THUMB1)
28839             limit = 128;
28840         }
28841       else
28842         {
28843           delta = offsets->outgoing_args - offsets->saved_args;
28844           reg = SP_REGNUM;
28845         }
28846       /* Allow for the stack frame.  */
28847       if (TARGET_THUMB1 && TARGET_BACKTRACE)
28848         delta -= 16;
28849       /* The link register is always the first saved register.  */
28850       delta -= 4;
28851
28852       /* Construct the address.  */
28853       addr = gen_rtx_REG (SImode, reg);
28854       if (delta > limit)
28855         {
28856           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28857           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28858           addr = scratch;
28859         }
28860       else
28861         addr = plus_constant (Pmode, addr, delta);
28862
28863       /* The store needs to be marked to prevent DSE from deleting
28864          it as dead if it is based on fp.  */
28865       mem = gen_frame_mem (Pmode, addr);
28866       MEM_VOLATILE_P (mem) = true;
28867       emit_move_insn (mem, source);
28868     }
28869   else
28870     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28871 }
28872
28873 /* Implements target hook vector_mode_supported_p.  */
28874 bool
28875 arm_vector_mode_supported_p (machine_mode mode)
28876 {
28877   /* Neon also supports V2SImode, etc. listed in the clause below.  */
28878   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28879       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
28880       || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
28881       || mode == V8BFmode))
28882     return true;
28883
28884   if ((TARGET_NEON || TARGET_IWMMXT)
28885       && ((mode == V2SImode)
28886           || (mode == V4HImode)
28887           || (mode == V8QImode)))
28888     return true;
28889
28890   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28891       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28892       || mode == V2HAmode))
28893     return true;
28894
28895   if (TARGET_HAVE_MVE
28896       && (mode == V2DImode || mode == V4SImode || mode == V8HImode
28897           || mode == V16QImode))
28898       return true;
28899
28900   if (TARGET_HAVE_MVE_FLOAT
28901       && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
28902       return true;
28903
28904   return false;
28905 }
28906
28907 /* Implements target hook array_mode_supported_p.  */
28908
28909 static bool
28910 arm_array_mode_supported_p (machine_mode mode,
28911                             unsigned HOST_WIDE_INT nelems)
28912 {
28913   /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
28914      for now, as the lane-swapping logic needs to be extended in the expanders.
28915      See PR target/82518.  */
28916   if (TARGET_NEON && !BYTES_BIG_ENDIAN
28917       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28918       && (nelems >= 2 && nelems <= 4))
28919     return true;
28920
28921   if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
28922       && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
28923     return true;
28924
28925   return false;
28926 }
28927
28928 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28929    registers when autovectorizing for Neon, at least until multiple vector
28930    widths are supported properly by the middle-end.  */
28931
28932 static machine_mode
28933 arm_preferred_simd_mode (scalar_mode mode)
28934 {
28935   if (TARGET_NEON)
28936     switch (mode)
28937       {
28938       case E_SFmode:
28939         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
28940       case E_SImode:
28941         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
28942       case E_HImode:
28943         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
28944       case E_QImode:
28945         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
28946       case E_DImode:
28947         if (!TARGET_NEON_VECTORIZE_DOUBLE)
28948           return V2DImode;
28949         break;
28950
28951       default:;
28952       }
28953
28954   if (TARGET_REALLY_IWMMXT)
28955     switch (mode)
28956       {
28957       case E_SImode:
28958         return V2SImode;
28959       case E_HImode:
28960         return V4HImode;
28961       case E_QImode:
28962         return V8QImode;
28963
28964       default:;
28965       }
28966
28967   if (TARGET_HAVE_MVE)
28968     switch (mode)
28969       {
28970       case QImode:
28971         return V16QImode;
28972       case HImode:
28973         return V8HImode;
28974       case SImode:
28975         return V4SImode;
28976
28977       default:;
28978       }
28979
28980   if (TARGET_HAVE_MVE_FLOAT)
28981     switch (mode)
28982       {
28983       case HFmode:
28984         return V8HFmode;
28985       case SFmode:
28986         return V4SFmode;
28987
28988       default:;
28989       }
28990
28991   return word_mode;
28992 }
28993
28994 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28995
28996    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
28997    using r0-r4 for function arguments, r7 for the stack frame and don't have
28998    enough left over to do doubleword arithmetic.  For Thumb-2 all the
28999    potentially problematic instructions accept high registers so this is not
29000    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
29001    that require many low registers.  */
29002 static bool
29003 arm_class_likely_spilled_p (reg_class_t rclass)
29004 {
29005   if ((TARGET_THUMB1 && rclass == LO_REGS)
29006       || rclass  == CC_REG)
29007     return true;
29008
29009   return false;
29010 }
29011
29012 /* Implements target hook small_register_classes_for_mode_p.  */
29013 bool
29014 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29015 {
29016   return TARGET_THUMB1;
29017 }
29018
29019 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
29020    ARM insns and therefore guarantee that the shift count is modulo 256.
29021    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29022    guarantee no particular behavior for out-of-range counts.  */
29023
29024 static unsigned HOST_WIDE_INT
29025 arm_shift_truncation_mask (machine_mode mode)
29026 {
29027   return mode == SImode ? 255 : 0;
29028 }
29029
29030
29031 /* Map internal gcc register numbers to DWARF2 register numbers.  */
29032
29033 unsigned int
29034 arm_dbx_register_number (unsigned int regno)
29035 {
29036   if (regno < 16)
29037     return regno;
29038
29039   if (IS_VFP_REGNUM (regno))
29040     {
29041       /* See comment in arm_dwarf_register_span.  */
29042       if (VFP_REGNO_OK_FOR_SINGLE (regno))
29043         return 64 + regno - FIRST_VFP_REGNUM;
29044       else
29045         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29046     }
29047
29048   if (IS_IWMMXT_GR_REGNUM (regno))
29049     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29050
29051   if (IS_IWMMXT_REGNUM (regno))
29052     return 112 + regno - FIRST_IWMMXT_REGNUM;
29053
29054   return DWARF_FRAME_REGISTERS;
29055 }
29056
29057 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29058    GCC models tham as 64 32-bit registers, so we need to describe this to
29059    the DWARF generation code.  Other registers can use the default.  */
29060 static rtx
29061 arm_dwarf_register_span (rtx rtl)
29062 {
29063   machine_mode mode;
29064   unsigned regno;
29065   rtx parts[16];
29066   int nregs;
29067   int i;
29068
29069   regno = REGNO (rtl);
29070   if (!IS_VFP_REGNUM (regno))
29071     return NULL_RTX;
29072
29073   /* XXX FIXME: The EABI defines two VFP register ranges:
29074         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29075         256-287: D0-D31
29076      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29077      corresponding D register.  Until GDB supports this, we shall use the
29078      legacy encodings.  We also use these encodings for D0-D15 for
29079      compatibility with older debuggers.  */
29080   mode = GET_MODE (rtl);
29081   if (GET_MODE_SIZE (mode) < 8)
29082     return NULL_RTX;
29083
29084   if (VFP_REGNO_OK_FOR_SINGLE (regno))
29085     {
29086       nregs = GET_MODE_SIZE (mode) / 4;
29087       for (i = 0; i < nregs; i += 2)
29088         if (TARGET_BIG_END)
29089           {
29090             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29091             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29092           }
29093         else
29094           {
29095             parts[i] = gen_rtx_REG (SImode, regno + i);
29096             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29097           }
29098     }
29099   else
29100     {
29101       nregs = GET_MODE_SIZE (mode) / 8;
29102       for (i = 0; i < nregs; i++)
29103         parts[i] = gen_rtx_REG (DImode, regno + i);
29104     }
29105
29106   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29107 }
29108
29109 #if ARM_UNWIND_INFO
29110 /* Emit unwind directives for a store-multiple instruction or stack pointer
29111    push during alignment.
29112    These should only ever be generated by the function prologue code, so
29113    expect them to have a particular form.
29114    The store-multiple instruction sometimes pushes pc as the last register,
29115    although it should not be tracked into unwind information, or for -Os
29116    sometimes pushes some dummy registers before first register that needs
29117    to be tracked in unwind information; such dummy registers are there just
29118    to avoid separate stack adjustment, and will not be restored in the
29119    epilogue.  */
29120
29121 static void
29122 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29123 {
29124   int i;
29125   HOST_WIDE_INT offset;
29126   HOST_WIDE_INT nregs;
29127   int reg_size;
29128   unsigned reg;
29129   unsigned lastreg;
29130   unsigned padfirst = 0, padlast = 0;
29131   rtx e;
29132
29133   e = XVECEXP (p, 0, 0);
29134   gcc_assert (GET_CODE (e) == SET);
29135
29136   /* First insn will adjust the stack pointer.  */
29137   gcc_assert (GET_CODE (e) == SET
29138               && REG_P (SET_DEST (e))
29139               && REGNO (SET_DEST (e)) == SP_REGNUM
29140               && GET_CODE (SET_SRC (e)) == PLUS);
29141
29142   offset = -INTVAL (XEXP (SET_SRC (e), 1));
29143   nregs = XVECLEN (p, 0) - 1;
29144   gcc_assert (nregs);
29145
29146   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29147   if (reg < 16)
29148     {
29149       /* For -Os dummy registers can be pushed at the beginning to
29150          avoid separate stack pointer adjustment.  */
29151       e = XVECEXP (p, 0, 1);
29152       e = XEXP (SET_DEST (e), 0);
29153       if (GET_CODE (e) == PLUS)
29154         padfirst = INTVAL (XEXP (e, 1));
29155       gcc_assert (padfirst == 0 || optimize_size);
29156       /* The function prologue may also push pc, but not annotate it as it is
29157          never restored.  We turn this into a stack pointer adjustment.  */
29158       e = XVECEXP (p, 0, nregs);
29159       e = XEXP (SET_DEST (e), 0);
29160       if (GET_CODE (e) == PLUS)
29161         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29162       else
29163         padlast = offset - 4;
29164       gcc_assert (padlast == 0 || padlast == 4);
29165       if (padlast == 4)
29166         fprintf (asm_out_file, "\t.pad #4\n");
29167       reg_size = 4;
29168       fprintf (asm_out_file, "\t.save {");
29169     }
29170   else if (IS_VFP_REGNUM (reg))
29171     {
29172       reg_size = 8;
29173       fprintf (asm_out_file, "\t.vsave {");
29174     }
29175   else
29176     /* Unknown register type.  */
29177     gcc_unreachable ();
29178
29179   /* If the stack increment doesn't match the size of the saved registers,
29180      something has gone horribly wrong.  */
29181   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29182
29183   offset = padfirst;
29184   lastreg = 0;
29185   /* The remaining insns will describe the stores.  */
29186   for (i = 1; i <= nregs; i++)
29187     {
29188       /* Expect (set (mem <addr>) (reg)).
29189          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
29190       e = XVECEXP (p, 0, i);
29191       gcc_assert (GET_CODE (e) == SET
29192                   && MEM_P (SET_DEST (e))
29193                   && REG_P (SET_SRC (e)));
29194
29195       reg = REGNO (SET_SRC (e));
29196       gcc_assert (reg >= lastreg);
29197
29198       if (i != 1)
29199         fprintf (asm_out_file, ", ");
29200       /* We can't use %r for vfp because we need to use the
29201          double precision register names.  */
29202       if (IS_VFP_REGNUM (reg))
29203         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29204       else
29205         asm_fprintf (asm_out_file, "%r", reg);
29206
29207       if (flag_checking)
29208         {
29209           /* Check that the addresses are consecutive.  */
29210           e = XEXP (SET_DEST (e), 0);
29211           if (GET_CODE (e) == PLUS)
29212             gcc_assert (REG_P (XEXP (e, 0))
29213                         && REGNO (XEXP (e, 0)) == SP_REGNUM
29214                         && CONST_INT_P (XEXP (e, 1))
29215                         && offset == INTVAL (XEXP (e, 1)));
29216           else
29217             gcc_assert (i == 1
29218                         && REG_P (e)
29219                         && REGNO (e) == SP_REGNUM);
29220           offset += reg_size;
29221         }
29222     }
29223   fprintf (asm_out_file, "}\n");
29224   if (padfirst)
29225     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29226 }
29227
29228 /*  Emit unwind directives for a SET.  */
29229
29230 static void
29231 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29232 {
29233   rtx e0;
29234   rtx e1;
29235   unsigned reg;
29236
29237   e0 = XEXP (p, 0);
29238   e1 = XEXP (p, 1);
29239   switch (GET_CODE (e0))
29240     {
29241     case MEM:
29242       /* Pushing a single register.  */
29243       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29244           || !REG_P (XEXP (XEXP (e0, 0), 0))
29245           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29246         abort ();
29247
29248       asm_fprintf (asm_out_file, "\t.save ");
29249       if (IS_VFP_REGNUM (REGNO (e1)))
29250         asm_fprintf(asm_out_file, "{d%d}\n",
29251                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29252       else
29253         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29254       break;
29255
29256     case REG:
29257       if (REGNO (e0) == SP_REGNUM)
29258         {
29259           /* A stack increment.  */
29260           if (GET_CODE (e1) != PLUS
29261               || !REG_P (XEXP (e1, 0))
29262               || REGNO (XEXP (e1, 0)) != SP_REGNUM
29263               || !CONST_INT_P (XEXP (e1, 1)))
29264             abort ();
29265
29266           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29267                        -INTVAL (XEXP (e1, 1)));
29268         }
29269       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29270         {
29271           HOST_WIDE_INT offset;
29272
29273           if (GET_CODE (e1) == PLUS)
29274             {
29275               if (!REG_P (XEXP (e1, 0))
29276                   || !CONST_INT_P (XEXP (e1, 1)))
29277                 abort ();
29278               reg = REGNO (XEXP (e1, 0));
29279               offset = INTVAL (XEXP (e1, 1));
29280               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29281                            HARD_FRAME_POINTER_REGNUM, reg,
29282                            offset);
29283             }
29284           else if (REG_P (e1))
29285             {
29286               reg = REGNO (e1);
29287               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29288                            HARD_FRAME_POINTER_REGNUM, reg);
29289             }
29290           else
29291             abort ();
29292         }
29293       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29294         {
29295           /* Move from sp to reg.  */
29296           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29297         }
29298      else if (GET_CODE (e1) == PLUS
29299               && REG_P (XEXP (e1, 0))
29300               && REGNO (XEXP (e1, 0)) == SP_REGNUM
29301               && CONST_INT_P (XEXP (e1, 1)))
29302         {
29303           /* Set reg to offset from sp.  */
29304           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29305                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29306         }
29307       else
29308         abort ();
29309       break;
29310
29311     default:
29312       abort ();
29313     }
29314 }
29315
29316
29317 /* Emit unwind directives for the given insn.  */
29318
29319 static void
29320 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
29321 {
29322   rtx note, pat;
29323   bool handled_one = false;
29324
29325   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29326     return;
29327
29328   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29329       && (TREE_NOTHROW (current_function_decl)
29330           || crtl->all_throwers_are_sibcalls))
29331     return;
29332
29333   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29334     return;
29335
29336   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29337     {
29338       switch (REG_NOTE_KIND (note))
29339         {
29340         case REG_FRAME_RELATED_EXPR:
29341           pat = XEXP (note, 0);
29342           goto found;
29343
29344         case REG_CFA_REGISTER:
29345           pat = XEXP (note, 0);
29346           if (pat == NULL)
29347             {
29348               pat = PATTERN (insn);
29349               if (GET_CODE (pat) == PARALLEL)
29350                 pat = XVECEXP (pat, 0, 0);
29351             }
29352
29353           /* Only emitted for IS_STACKALIGN re-alignment.  */
29354           {
29355             rtx dest, src;
29356             unsigned reg;
29357
29358             src = SET_SRC (pat);
29359             dest = SET_DEST (pat);
29360
29361             gcc_assert (src == stack_pointer_rtx);
29362             reg = REGNO (dest);
29363             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29364                          reg + 0x90, reg);
29365           }
29366           handled_one = true;
29367           break;
29368
29369         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
29370            to get correct dwarf information for shrink-wrap.  We should not
29371            emit unwind information for it because these are used either for
29372            pretend arguments or notes to adjust sp and restore registers from
29373            stack.  */
29374         case REG_CFA_DEF_CFA:
29375         case REG_CFA_ADJUST_CFA:
29376         case REG_CFA_RESTORE:
29377           return;
29378
29379         case REG_CFA_EXPRESSION:
29380         case REG_CFA_OFFSET:
29381           /* ??? Only handling here what we actually emit.  */
29382           gcc_unreachable ();
29383
29384         default:
29385           break;
29386         }
29387     }
29388   if (handled_one)
29389     return;
29390   pat = PATTERN (insn);
29391  found:
29392
29393   switch (GET_CODE (pat))
29394     {
29395     case SET:
29396       arm_unwind_emit_set (asm_out_file, pat);
29397       break;
29398
29399     case SEQUENCE:
29400       /* Store multiple.  */
29401       arm_unwind_emit_sequence (asm_out_file, pat);
29402       break;
29403
29404     default:
29405       abort();
29406     }
29407 }
29408
29409
29410 /* Output a reference from a function exception table to the type_info
29411    object X.  The EABI specifies that the symbol should be relocated by
29412    an R_ARM_TARGET2 relocation.  */
29413
29414 static bool
29415 arm_output_ttype (rtx x)
29416 {
29417   fputs ("\t.word\t", asm_out_file);
29418   output_addr_const (asm_out_file, x);
29419   /* Use special relocations for symbol references.  */
29420   if (!CONST_INT_P (x))
29421     fputs ("(TARGET2)", asm_out_file);
29422   fputc ('\n', asm_out_file);
29423
29424   return TRUE;
29425 }
29426
29427 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
29428
29429 static void
29430 arm_asm_emit_except_personality (rtx personality)
29431 {
29432   fputs ("\t.personality\t", asm_out_file);
29433   output_addr_const (asm_out_file, personality);
29434   fputc ('\n', asm_out_file);
29435 }
29436 #endif /* ARM_UNWIND_INFO */
29437
29438 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
29439
29440 static void
29441 arm_asm_init_sections (void)
29442 {
29443 #if ARM_UNWIND_INFO
29444   exception_section = get_unnamed_section (0, output_section_asm_op,
29445                                            "\t.handlerdata");
29446 #endif /* ARM_UNWIND_INFO */
29447
29448 #ifdef OBJECT_FORMAT_ELF
29449   if (target_pure_code)
29450     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
29451 #endif
29452 }
29453
29454 /* Output unwind directives for the start/end of a function.  */
29455
29456 void
29457 arm_output_fn_unwind (FILE * f, bool prologue)
29458 {
29459   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29460     return;
29461
29462   if (prologue)
29463     fputs ("\t.fnstart\n", f);
29464   else
29465     {
29466       /* If this function will never be unwound, then mark it as such.
29467          The came condition is used in arm_unwind_emit to suppress
29468          the frame annotations.  */
29469       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29470           && (TREE_NOTHROW (current_function_decl)
29471               || crtl->all_throwers_are_sibcalls))
29472         fputs("\t.cantunwind\n", f);
29473
29474       fputs ("\t.fnend\n", f);
29475     }
29476 }
29477
29478 static bool
29479 arm_emit_tls_decoration (FILE *fp, rtx x)
29480 {
29481   enum tls_reloc reloc;
29482   rtx val;
29483
29484   val = XVECEXP (x, 0, 0);
29485   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29486
29487   output_addr_const (fp, val);
29488
29489   switch (reloc)
29490     {
29491     case TLS_GD32:
29492       fputs ("(tlsgd)", fp);
29493       break;
29494     case TLS_GD32_FDPIC:
29495       fputs ("(tlsgd_fdpic)", fp);
29496       break;
29497     case TLS_LDM32:
29498       fputs ("(tlsldm)", fp);
29499       break;
29500     case TLS_LDM32_FDPIC:
29501       fputs ("(tlsldm_fdpic)", fp);
29502       break;
29503     case TLS_LDO32:
29504       fputs ("(tlsldo)", fp);
29505       break;
29506     case TLS_IE32:
29507       fputs ("(gottpoff)", fp);
29508       break;
29509     case TLS_IE32_FDPIC:
29510       fputs ("(gottpoff_fdpic)", fp);
29511       break;
29512     case TLS_LE32:
29513       fputs ("(tpoff)", fp);
29514       break;
29515     case TLS_DESCSEQ:
29516       fputs ("(tlsdesc)", fp);
29517       break;
29518     default:
29519       gcc_unreachable ();
29520     }
29521
29522   switch (reloc)
29523     {
29524     case TLS_GD32:
29525     case TLS_LDM32:
29526     case TLS_IE32:
29527     case TLS_DESCSEQ:
29528       fputs (" + (. - ", fp);
29529       output_addr_const (fp, XVECEXP (x, 0, 2));
29530       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29531       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29532       output_addr_const (fp, XVECEXP (x, 0, 3));
29533       fputc (')', fp);
29534       break;
29535     default:
29536       break;
29537     }
29538
29539   return TRUE;
29540 }
29541
29542 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
29543
29544 static void
29545 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29546 {
29547   gcc_assert (size == 4);
29548   fputs ("\t.word\t", file);
29549   output_addr_const (file, x);
29550   fputs ("(tlsldo)", file);
29551 }
29552
29553 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
29554
29555 static bool
29556 arm_output_addr_const_extra (FILE *fp, rtx x)
29557 {
29558   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29559     return arm_emit_tls_decoration (fp, x);
29560   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29561     {
29562       char label[256];
29563       int labelno = INTVAL (XVECEXP (x, 0, 0));
29564
29565       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29566       assemble_name_raw (fp, label);
29567
29568       return TRUE;
29569     }
29570   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29571     {
29572       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29573       if (GOT_PCREL)
29574         fputs ("+.", fp);
29575       fputs ("-(", fp);
29576       output_addr_const (fp, XVECEXP (x, 0, 0));
29577       fputc (')', fp);
29578       return TRUE;
29579     }
29580   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29581     {
29582       output_addr_const (fp, XVECEXP (x, 0, 0));
29583       if (GOT_PCREL)
29584         fputs ("+.", fp);
29585       fputs ("-(", fp);
29586       output_addr_const (fp, XVECEXP (x, 0, 1));
29587       fputc (')', fp);
29588       return TRUE;
29589     }
29590   else if (GET_CODE (x) == CONST_VECTOR)
29591     return arm_emit_vector_const (fp, x);
29592
29593   return FALSE;
29594 }
29595
29596 /* Output assembly for a shift instruction.
29597    SET_FLAGS determines how the instruction modifies the condition codes.
29598    0 - Do not set condition codes.
29599    1 - Set condition codes.
29600    2 - Use smallest instruction.  */
29601 const char *
29602 arm_output_shift(rtx * operands, int set_flags)
29603 {
29604   char pattern[100];
29605   static const char flag_chars[3] = {'?', '.', '!'};
29606   const char *shift;
29607   HOST_WIDE_INT val;
29608   char c;
29609
29610   c = flag_chars[set_flags];
29611   shift = shift_op(operands[3], &val);
29612   if (shift)
29613     {
29614       if (val != -1)
29615         operands[2] = GEN_INT(val);
29616       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29617     }
29618   else
29619     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29620
29621   output_asm_insn (pattern, operands);
29622   return "";
29623 }
29624
29625 /* Output assembly for a WMMX immediate shift instruction.  */
29626 const char *
29627 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29628 {
29629   int shift = INTVAL (operands[2]);
29630   char templ[50];
29631   machine_mode opmode = GET_MODE (operands[0]);
29632
29633   gcc_assert (shift >= 0);
29634
29635   /* If the shift value in the register versions is > 63 (for D qualifier),
29636      31 (for W qualifier) or 15 (for H qualifier).  */
29637   if (((opmode == V4HImode) && (shift > 15))
29638         || ((opmode == V2SImode) && (shift > 31))
29639         || ((opmode == DImode) && (shift > 63)))
29640   {
29641     if (wror_or_wsra)
29642       {
29643         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29644         output_asm_insn (templ, operands);
29645         if (opmode == DImode)
29646           {
29647             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29648             output_asm_insn (templ, operands);
29649           }
29650       }
29651     else
29652       {
29653         /* The destination register will contain all zeros.  */
29654         sprintf (templ, "wzero\t%%0");
29655         output_asm_insn (templ, operands);
29656       }
29657     return "";
29658   }
29659
29660   if ((opmode == DImode) && (shift > 32))
29661     {
29662       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29663       output_asm_insn (templ, operands);
29664       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29665       output_asm_insn (templ, operands);
29666     }
29667   else
29668     {
29669       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29670       output_asm_insn (templ, operands);
29671     }
29672   return "";
29673 }
29674
29675 /* Output assembly for a WMMX tinsr instruction.  */
29676 const char *
29677 arm_output_iwmmxt_tinsr (rtx *operands)
29678 {
29679   int mask = INTVAL (operands[3]);
29680   int i;
29681   char templ[50];
29682   int units = mode_nunits[GET_MODE (operands[0])];
29683   gcc_assert ((mask & (mask - 1)) == 0);
29684   for (i = 0; i < units; ++i)
29685     {
29686       if ((mask & 0x01) == 1)
29687         {
29688           break;
29689         }
29690       mask >>= 1;
29691     }
29692   gcc_assert (i < units);
29693   {
29694     switch (GET_MODE (operands[0]))
29695       {
29696       case E_V8QImode:
29697         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29698         break;
29699       case E_V4HImode:
29700         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29701         break;
29702       case E_V2SImode:
29703         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29704         break;
29705       default:
29706         gcc_unreachable ();
29707         break;
29708       }
29709     output_asm_insn (templ, operands);
29710   }
29711   return "";
29712 }
29713
29714 /* Output a Thumb-1 casesi dispatch sequence.  */
29715 const char *
29716 thumb1_output_casesi (rtx *operands)
29717 {
29718   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
29719
29720   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29721
29722   switch (GET_MODE(diff_vec))
29723     {
29724     case E_QImode:
29725       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29726               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29727     case E_HImode:
29728       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29729               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29730     case E_SImode:
29731       return "bl\t%___gnu_thumb1_case_si";
29732     default:
29733       gcc_unreachable ();
29734     }
29735 }
29736
29737 /* Output a Thumb-2 casesi instruction.  */
29738 const char *
29739 thumb2_output_casesi (rtx *operands)
29740 {
29741   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
29742
29743   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29744
29745   output_asm_insn ("cmp\t%0, %1", operands);
29746   output_asm_insn ("bhi\t%l3", operands);
29747   switch (GET_MODE(diff_vec))
29748     {
29749     case E_QImode:
29750       return "tbb\t[%|pc, %0]";
29751     case E_HImode:
29752       return "tbh\t[%|pc, %0, lsl #1]";
29753     case E_SImode:
29754       if (flag_pic)
29755         {
29756           output_asm_insn ("adr\t%4, %l2", operands);
29757           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29758           output_asm_insn ("add\t%4, %4, %5", operands);
29759           return "bx\t%4";
29760         }
29761       else
29762         {
29763           output_asm_insn ("adr\t%4, %l2", operands);
29764           return "ldr\t%|pc, [%4, %0, lsl #2]";
29765         }
29766     default:
29767       gcc_unreachable ();
29768     }
29769 }
29770
29771 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
29772    per-core tuning structs.  */
29773 static int
29774 arm_issue_rate (void)
29775 {
29776   return current_tune->issue_rate;
29777 }
29778
29779 /* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
29780 static int
29781 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
29782 {
29783   if (DEBUG_INSN_P (insn))
29784     return more;
29785
29786   rtx_code code = GET_CODE (PATTERN (insn));
29787   if (code == USE || code == CLOBBER)
29788     return more;
29789
29790   if (get_attr_type (insn) == TYPE_NO_INSN)
29791     return more;
29792
29793   return more - 1;
29794 }
29795
29796 /* Return how many instructions should scheduler lookahead to choose the
29797    best one.  */
29798 static int
29799 arm_first_cycle_multipass_dfa_lookahead (void)
29800 {
29801   int issue_rate = arm_issue_rate ();
29802
29803   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
29804 }
29805
29806 /* Enable modeling of L2 auto-prefetcher.  */
29807 static int
29808 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
29809 {
29810   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
29811 }
29812
29813 const char *
29814 arm_mangle_type (const_tree type)
29815 {
29816   /* The ARM ABI documents (10th October 2008) say that "__va_list"
29817      has to be managled as if it is in the "std" namespace.  */
29818   if (TARGET_AAPCS_BASED
29819       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29820     return "St9__va_list";
29821
29822   /* Half-precision floating point types.  */
29823   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29824     {
29825       if (TYPE_MODE (type) == BFmode)
29826         return "u6__bf16";
29827       else
29828         return "Dh";
29829     }
29830
29831   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
29832      builtin type.  */
29833   if (TYPE_NAME (type) != NULL)
29834     return arm_mangle_builtin_type (type);
29835
29836   /* Use the default mangling.  */
29837   return NULL;
29838 }
29839
29840 /* Order of allocation of core registers for Thumb: this allocation is
29841    written over the corresponding initial entries of the array
29842    initialized with REG_ALLOC_ORDER.  We allocate all low registers
29843    first.  Saving and restoring a low register is usually cheaper than
29844    using a call-clobbered high register.  */
29845
29846 static const int thumb_core_reg_alloc_order[] =
29847 {
29848    3,  2,  1,  0,  4,  5,  6,  7,
29849   12, 14,  8,  9, 10, 11
29850 };
29851
29852 /* Adjust register allocation order when compiling for Thumb.  */
29853
29854 void
29855 arm_order_regs_for_local_alloc (void)
29856 {
29857   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29858   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29859   if (TARGET_THUMB)
29860     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29861             sizeof (thumb_core_reg_alloc_order));
29862 }
29863
29864 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
29865
29866 bool
29867 arm_frame_pointer_required (void)
29868 {
29869   if (SUBTARGET_FRAME_POINTER_REQUIRED)
29870     return true;
29871
29872   /* If the function receives nonlocal gotos, it needs to save the frame
29873      pointer in the nonlocal_goto_save_area object.  */
29874   if (cfun->has_nonlocal_label)
29875     return true;
29876
29877   /* The frame pointer is required for non-leaf APCS frames.  */
29878   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
29879     return true;
29880
29881   /* If we are probing the stack in the prologue, we will have a faulting
29882      instruction prior to the stack adjustment and this requires a frame
29883      pointer if we want to catch the exception using the EABI unwinder.  */
29884   if (!IS_INTERRUPT (arm_current_func_type ())
29885       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
29886           || flag_stack_clash_protection)
29887       && arm_except_unwind_info (&global_options) == UI_TARGET
29888       && cfun->can_throw_non_call_exceptions)
29889     {
29890       HOST_WIDE_INT size = get_frame_size ();
29891
29892       /* That's irrelevant if there is no stack adjustment.  */
29893       if (size <= 0)
29894         return false;
29895
29896       /* That's relevant only if there is a stack probe.  */
29897       if (crtl->is_leaf && !cfun->calls_alloca)
29898         {
29899           /* We don't have the final size of the frame so adjust.  */
29900           size += 32 * UNITS_PER_WORD;
29901           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
29902             return true;
29903         }
29904       else
29905         return true;
29906     }
29907
29908   return false;
29909 }
29910
29911 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
29912    All modes except THUMB1 have conditional execution.
29913    If we have conditional arithmetic, return false before reload to
29914    enable some ifcvt transformations. */
29915 static bool
29916 arm_have_conditional_execution (void)
29917 {
29918   bool has_cond_exec, enable_ifcvt_trans;
29919
29920   /* Only THUMB1 cannot support conditional execution. */
29921   has_cond_exec = !TARGET_THUMB1;
29922
29923   /* Enable ifcvt transformations if we have conditional arithmetic, but only
29924      before reload. */
29925   enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
29926
29927   return has_cond_exec && !enable_ifcvt_trans;
29928 }
29929
29930 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
29931 static HOST_WIDE_INT
29932 arm_vector_alignment (const_tree type)
29933 {
29934   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
29935
29936   if (TARGET_AAPCS_BASED)
29937     align = MIN (align, 64);
29938
29939   return align;
29940 }
29941
29942 static unsigned int
29943 arm_autovectorize_vector_modes (vector_modes *modes, bool)
29944 {
29945   if (!TARGET_NEON_VECTORIZE_DOUBLE)
29946     {
29947       modes->safe_push (V16QImode);
29948       modes->safe_push (V8QImode);
29949     }
29950   return 0;
29951 }
29952
29953 static bool
29954 arm_vector_alignment_reachable (const_tree type, bool is_packed)
29955 {
29956   /* Vectors which aren't in packed structures will not be less aligned than
29957      the natural alignment of their element type, so this is safe.  */
29958   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29959     return !is_packed;
29960
29961   return default_builtin_vector_alignment_reachable (type, is_packed);
29962 }
29963
29964 static bool
29965 arm_builtin_support_vector_misalignment (machine_mode mode,
29966                                          const_tree type, int misalignment,
29967                                          bool is_packed)
29968 {
29969   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29970     {
29971       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
29972
29973       if (is_packed)
29974         return align == 1;
29975
29976       /* If the misalignment is unknown, we should be able to handle the access
29977          so long as it is not to a member of a packed data structure.  */
29978       if (misalignment == -1)
29979         return true;
29980
29981       /* Return true if the misalignment is a multiple of the natural alignment
29982          of the vector's element type.  This is probably always going to be
29983          true in practice, since we've already established that this isn't a
29984          packed access.  */
29985       return ((misalignment % align) == 0);
29986     }
29987
29988   return default_builtin_support_vector_misalignment (mode, type, misalignment,
29989                                                       is_packed);
29990 }
29991
29992 static void
29993 arm_conditional_register_usage (void)
29994 {
29995   int regno;
29996
29997   if (TARGET_THUMB1 && optimize_size)
29998     {
29999       /* When optimizing for size on Thumb-1, it's better not
30000         to use the HI regs, because of the overhead of
30001         stacking them.  */
30002       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30003         fixed_regs[regno] = call_used_regs[regno] = 1;
30004     }
30005
30006   /* The link register can be clobbered by any branch insn,
30007      but we have no way to track that at present, so mark
30008      it as unavailable.  */
30009   if (TARGET_THUMB1)
30010     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30011
30012   if (TARGET_32BIT && TARGET_VFP_BASE)
30013     {
30014       /* VFPv3 registers are disabled when earlier VFP
30015          versions are selected due to the definition of
30016          LAST_VFP_REGNUM.  */
30017       for (regno = FIRST_VFP_REGNUM;
30018            regno <= LAST_VFP_REGNUM; ++ regno)
30019         {
30020           fixed_regs[regno] = 0;
30021           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30022             || regno >= FIRST_VFP_REGNUM + 32;
30023         }
30024       if (TARGET_HAVE_MVE)
30025         fixed_regs[VPR_REGNUM] = 0;
30026     }
30027
30028   if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30029     {
30030       regno = FIRST_IWMMXT_GR_REGNUM;
30031       /* The 2002/10/09 revision of the XScale ABI has wCG0
30032          and wCG1 as call-preserved registers.  The 2002/11/21
30033          revision changed this so that all wCG registers are
30034          scratch registers.  */
30035       for (regno = FIRST_IWMMXT_GR_REGNUM;
30036            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30037         fixed_regs[regno] = 0;
30038       /* The XScale ABI has wR0 - wR9 as scratch registers,
30039          the rest as call-preserved registers.  */
30040       for (regno = FIRST_IWMMXT_REGNUM;
30041            regno <= LAST_IWMMXT_REGNUM; ++ regno)
30042         {
30043           fixed_regs[regno] = 0;
30044           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30045         }
30046     }
30047
30048   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30049     {
30050       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30051       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30052     }
30053   else if (TARGET_APCS_STACK)
30054     {
30055       fixed_regs[10]     = 1;
30056       call_used_regs[10] = 1;
30057     }
30058   /* -mcaller-super-interworking reserves r11 for calls to
30059      _interwork_r11_call_via_rN().  Making the register global
30060      is an easy way of ensuring that it remains valid for all
30061      calls.  */
30062   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30063       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30064     {
30065       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30066       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30067       if (TARGET_CALLER_INTERWORKING)
30068         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30069     }
30070
30071   /* The Q and GE bits are only accessed via special ACLE patterns.  */
30072   CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30073   CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30074
30075   SUBTARGET_CONDITIONAL_REGISTER_USAGE
30076 }
30077
30078 static reg_class_t
30079 arm_preferred_rename_class (reg_class_t rclass)
30080 {
30081   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30082      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
30083      and code size can be reduced.  */
30084   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30085     return LO_REGS;
30086   else
30087     return NO_REGS;
30088 }
30089
30090 /* Compute the attribute "length" of insn "*push_multi".
30091    So this function MUST be kept in sync with that insn pattern.  */
30092 int
30093 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30094 {
30095   int i, regno, hi_reg;
30096   int num_saves = XVECLEN (parallel_op, 0);
30097
30098   /* ARM mode.  */
30099   if (TARGET_ARM)
30100     return 4;
30101   /* Thumb1 mode.  */
30102   if (TARGET_THUMB1)
30103     return 2;
30104
30105   /* Thumb2 mode.  */
30106   regno = REGNO (first_op);
30107   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30108      list is 8-bit.  Normally this means all registers in the list must be
30109      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
30110      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
30111      with 16-bit encoding.  */
30112   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30113   for (i = 1; i < num_saves && !hi_reg; i++)
30114     {
30115       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30116       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30117     }
30118
30119   if (!hi_reg)
30120     return 2;
30121   return 4;
30122 }
30123
30124 /* Compute the attribute "length" of insn.  Currently, this function is used
30125    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30126    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
30127    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
30128    true if OPERANDS contains insn which explicit updates base register.  */
30129
30130 int
30131 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30132 {
30133   /* ARM mode.  */
30134   if (TARGET_ARM)
30135     return 4;
30136   /* Thumb1 mode.  */
30137   if (TARGET_THUMB1)
30138     return 2;
30139
30140   rtx parallel_op = operands[0];
30141   /* Initialize to elements number of PARALLEL.  */
30142   unsigned indx = XVECLEN (parallel_op, 0) - 1;
30143   /* Initialize the value to base register.  */
30144   unsigned regno = REGNO (operands[1]);
30145   /* Skip return and write back pattern.
30146      We only need register pop pattern for later analysis.  */
30147   unsigned first_indx = 0;
30148   first_indx += return_pc ? 1 : 0;
30149   first_indx += write_back_p ? 1 : 0;
30150
30151   /* A pop operation can be done through LDM or POP.  If the base register is SP
30152      and if it's with write back, then a LDM will be alias of POP.  */
30153   bool pop_p = (regno == SP_REGNUM && write_back_p);
30154   bool ldm_p = !pop_p;
30155
30156   /* Check base register for LDM.  */
30157   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30158     return 4;
30159
30160   /* Check each register in the list.  */
30161   for (; indx >= first_indx; indx--)
30162     {
30163       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30164       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
30165          comment in arm_attr_length_push_multi.  */
30166       if (REGNO_REG_CLASS (regno) == HI_REGS
30167           && (regno != PC_REGNUM || ldm_p))
30168         return 4;
30169     }
30170
30171   return 2;
30172 }
30173
30174 /* Compute the number of instructions emitted by output_move_double.  */
30175 int
30176 arm_count_output_move_double_insns (rtx *operands)
30177 {
30178   int count;
30179   rtx ops[2];
30180   /* output_move_double may modify the operands array, so call it
30181      here on a copy of the array.  */
30182   ops[0] = operands[0];
30183   ops[1] = operands[1];
30184   output_move_double (ops, false, &count);
30185   return count;
30186 }
30187
30188 /* Same as above, but operands are a register/memory pair in SImode.
30189    Assumes operands has the base register in position 0 and memory in position
30190    2 (which is the order provided by the arm_{ldrd,strd} patterns).  */
30191 int
30192 arm_count_ldrdstrd_insns (rtx *operands, bool load)
30193 {
30194   int count;
30195   rtx ops[2];
30196   int regnum, memnum;
30197   if (load)
30198     regnum = 0, memnum = 1;
30199   else
30200     regnum = 1, memnum = 0;
30201   ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
30202   ops[memnum] = adjust_address (operands[2], DImode, 0);
30203   output_move_double (ops, false, &count);
30204   return count;
30205 }
30206
30207
30208 int
30209 vfp3_const_double_for_fract_bits (rtx operand)
30210 {
30211   REAL_VALUE_TYPE r0;
30212
30213   if (!CONST_DOUBLE_P (operand))
30214     return 0;
30215
30216   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
30217   if (exact_real_inverse (DFmode, &r0)
30218       && !REAL_VALUE_NEGATIVE (r0))
30219     {
30220       if (exact_real_truncate (DFmode, &r0))
30221         {
30222           HOST_WIDE_INT value = real_to_integer (&r0);
30223           value = value & 0xffffffff;
30224           if ((value != 0) && ( (value & (value - 1)) == 0))
30225             {
30226               int ret = exact_log2 (value);
30227               gcc_assert (IN_RANGE (ret, 0, 31));
30228               return ret;
30229             }
30230         }
30231     }
30232   return 0;
30233 }
30234
30235 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30236    log2 is in [1, 32], return that log2.  Otherwise return -1.
30237    This is used in the patterns for vcvt.s32.f32 floating-point to
30238    fixed-point conversions.  */
30239
30240 int
30241 vfp3_const_double_for_bits (rtx x)
30242 {
30243   const REAL_VALUE_TYPE *r;
30244
30245   if (!CONST_DOUBLE_P (x))
30246     return -1;
30247
30248   r = CONST_DOUBLE_REAL_VALUE (x);
30249
30250   if (REAL_VALUE_NEGATIVE (*r)
30251       || REAL_VALUE_ISNAN (*r)
30252       || REAL_VALUE_ISINF (*r)
30253       || !real_isinteger (r, SFmode))
30254     return -1;
30255
30256   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
30257
30258 /* The exact_log2 above will have returned -1 if this is
30259    not an exact log2.  */
30260   if (!IN_RANGE (hwint, 1, 32))
30261     return -1;
30262
30263   return hwint;
30264 }
30265
30266 \f
30267 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
30268
30269 static void
30270 arm_pre_atomic_barrier (enum memmodel model)
30271 {
30272   if (need_atomic_barrier_p (model, true))
30273     emit_insn (gen_memory_barrier ());
30274 }
30275
30276 static void
30277 arm_post_atomic_barrier (enum memmodel model)
30278 {
30279   if (need_atomic_barrier_p (model, false))
30280     emit_insn (gen_memory_barrier ());
30281 }
30282
30283 /* Emit the load-exclusive and store-exclusive instructions.
30284    Use acquire and release versions if necessary.  */
30285
30286 static void
30287 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30288 {
30289   rtx (*gen) (rtx, rtx);
30290
30291   if (acq)
30292     {
30293       switch (mode)
30294         {
30295         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30296         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30297         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30298         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30299         default:
30300           gcc_unreachable ();
30301         }
30302     }
30303   else
30304     {
30305       switch (mode)
30306         {
30307         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
30308         case E_HImode: gen = gen_arm_load_exclusivehi; break;
30309         case E_SImode: gen = gen_arm_load_exclusivesi; break;
30310         case E_DImode: gen = gen_arm_load_exclusivedi; break;
30311         default:
30312           gcc_unreachable ();
30313         }
30314     }
30315
30316   emit_insn (gen (rval, mem));
30317 }
30318
30319 static void
30320 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30321                           rtx mem, bool rel)
30322 {
30323   rtx (*gen) (rtx, rtx, rtx);
30324
30325   if (rel)
30326     {
30327       switch (mode)
30328         {
30329         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
30330         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
30331         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
30332         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
30333         default:
30334           gcc_unreachable ();
30335         }
30336     }
30337   else
30338     {
30339       switch (mode)
30340         {
30341         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
30342         case E_HImode: gen = gen_arm_store_exclusivehi; break;
30343         case E_SImode: gen = gen_arm_store_exclusivesi; break;
30344         case E_DImode: gen = gen_arm_store_exclusivedi; break;
30345         default:
30346           gcc_unreachable ();
30347         }
30348     }
30349
30350   emit_insn (gen (bval, rval, mem));
30351 }
30352
30353 /* Mark the previous jump instruction as unlikely.  */
30354
30355 static void
30356 emit_unlikely_jump (rtx insn)
30357 {
30358   rtx_insn *jump = emit_jump_insn (insn);
30359   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
30360 }
30361
30362 /* Expand a compare and swap pattern.  */
30363
30364 void
30365 arm_expand_compare_and_swap (rtx operands[])
30366 {
30367   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30368   machine_mode mode, cmp_mode;
30369
30370   bval = operands[0];
30371   rval = operands[1];
30372   mem = operands[2];
30373   oldval = operands[3];
30374   newval = operands[4];
30375   is_weak = operands[5];
30376   mod_s = operands[6];
30377   mod_f = operands[7];
30378   mode = GET_MODE (mem);
30379
30380   /* Normally the succ memory model must be stronger than fail, but in the
30381      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30382      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
30383
30384   if (TARGET_HAVE_LDACQ
30385       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
30386       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
30387     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30388
30389   switch (mode)
30390     {
30391     case E_QImode:
30392     case E_HImode:
30393       /* For narrow modes, we're going to perform the comparison in SImode,
30394          so do the zero-extension now.  */
30395       rval = gen_reg_rtx (SImode);
30396       oldval = convert_modes (SImode, mode, oldval, true);
30397       /* FALLTHRU */
30398
30399     case E_SImode:
30400       /* Force the value into a register if needed.  We waited until after
30401          the zero-extension above to do this properly.  */
30402       if (!arm_add_operand (oldval, SImode))
30403         oldval = force_reg (SImode, oldval);
30404       break;
30405
30406     case E_DImode:
30407       if (!cmpdi_operand (oldval, mode))
30408         oldval = force_reg (mode, oldval);
30409       break;
30410
30411     default:
30412       gcc_unreachable ();
30413     }
30414
30415   if (TARGET_THUMB1)
30416     cmp_mode = E_SImode;
30417   else
30418     cmp_mode = CC_Zmode;
30419
30420   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
30421   emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
30422                                         oldval, newval, is_weak, mod_s, mod_f));
30423
30424   if (mode == QImode || mode == HImode)
30425     emit_move_insn (operands[1], gen_lowpart (mode, rval));
30426
30427   /* In all cases, we arrange for success to be signaled by Z set.
30428      This arrangement allows for the boolean result to be used directly
30429      in a subsequent branch, post optimization.  For Thumb-1 targets, the
30430      boolean negation of the result is also stored in bval because Thumb-1
30431      backend lacks dependency tracking for CC flag due to flag-setting not
30432      being represented at RTL level.  */
30433   if (TARGET_THUMB1)
30434       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
30435   else
30436     {
30437       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
30438       emit_insn (gen_rtx_SET (bval, x));
30439     }
30440 }
30441
30442 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
30443    another memory store between the load-exclusive and store-exclusive can
30444    reset the monitor from Exclusive to Open state.  This means we must wait
30445    until after reload to split the pattern, lest we get a register spill in
30446    the middle of the atomic sequence.  Success of the compare and swap is
30447    indicated by the Z flag set for 32bit targets and by neg_bval being zero
30448    for Thumb-1 targets (ie. negation of the boolean value returned by
30449    atomic_compare_and_swapmode standard pattern in operand 0).  */
30450
30451 void
30452 arm_split_compare_and_swap (rtx operands[])
30453 {
30454   rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
30455   machine_mode mode;
30456   enum memmodel mod_s, mod_f;
30457   bool is_weak;
30458   rtx_code_label *label1, *label2;
30459   rtx x, cond;
30460
30461   rval = operands[1];
30462   mem = operands[2];
30463   oldval = operands[3];
30464   newval = operands[4];
30465   is_weak = (operands[5] != const0_rtx);
30466   mod_s_rtx = operands[6];
30467   mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
30468   mod_f = memmodel_from_int (INTVAL (operands[7]));
30469   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
30470   mode = GET_MODE (mem);
30471
30472   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
30473
30474   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
30475   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
30476
30477   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
30478      a full barrier is emitted after the store-release.  */
30479   if (is_armv8_sync)
30480     use_acquire = false;
30481
30482   /* Checks whether a barrier is needed and emits one accordingly.  */
30483   if (!(use_acquire || use_release))
30484     arm_pre_atomic_barrier (mod_s);
30485
30486   label1 = NULL;
30487   if (!is_weak)
30488     {
30489       label1 = gen_label_rtx ();
30490       emit_label (label1);
30491     }
30492   label2 = gen_label_rtx ();
30493
30494   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30495
30496   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
30497      as required to communicate with arm_expand_compare_and_swap.  */
30498   if (TARGET_32BIT)
30499     {
30500       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
30501       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30502       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30503                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30504       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30505     }
30506   else
30507     {
30508       emit_move_insn (neg_bval, const1_rtx);
30509       cond = gen_rtx_NE (VOIDmode, rval, oldval);
30510       if (thumb1_cmpneg_operand (oldval, SImode))
30511         emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
30512                                                     label2, cond));
30513       else
30514         emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
30515     }
30516
30517   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
30518
30519   /* Weak or strong, we want EQ to be true for success, so that we
30520      match the flags that we got from the compare above.  */
30521   if (TARGET_32BIT)
30522     {
30523       cond = gen_rtx_REG (CCmode, CC_REGNUM);
30524       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
30525       emit_insn (gen_rtx_SET (cond, x));
30526     }
30527
30528   if (!is_weak)
30529     {
30530       /* Z is set to boolean value of !neg_bval, as required to communicate
30531          with arm_expand_compare_and_swap.  */
30532       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
30533       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
30534     }
30535
30536   if (!is_mm_relaxed (mod_f))
30537     emit_label (label2);
30538
30539   /* Checks whether a barrier is needed and emits one accordingly.  */
30540   if (is_armv8_sync
30541       || !(use_acquire || use_release))
30542     arm_post_atomic_barrier (mod_s);
30543
30544   if (is_mm_relaxed (mod_f))
30545     emit_label (label2);
30546 }
30547
30548 /* Split an atomic operation pattern.  Operation is given by CODE and is one
30549    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
30550    operation).  Operation is performed on the content at MEM and on VALUE
30551    following the memory model MODEL_RTX.  The content at MEM before and after
30552    the operation is returned in OLD_OUT and NEW_OUT respectively while the
30553    success of the operation is returned in COND.  Using a scratch register or
30554    an operand register for these determines what result is returned for that
30555    pattern.  */
30556
30557 void
30558 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30559                      rtx value, rtx model_rtx, rtx cond)
30560 {
30561   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
30562   machine_mode mode = GET_MODE (mem);
30563   machine_mode wmode = (mode == DImode ? DImode : SImode);
30564   rtx_code_label *label;
30565   bool all_low_regs, bind_old_new;
30566   rtx x;
30567
30568   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
30569
30570   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
30571   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
30572
30573   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
30574      a full barrier is emitted after the store-release.  */
30575   if (is_armv8_sync)
30576     use_acquire = false;
30577
30578   /* Checks whether a barrier is needed and emits one accordingly.  */
30579   if (!(use_acquire || use_release))
30580     arm_pre_atomic_barrier (model);
30581
30582   label = gen_label_rtx ();
30583   emit_label (label);
30584
30585   if (new_out)
30586     new_out = gen_lowpart (wmode, new_out);
30587   if (old_out)
30588     old_out = gen_lowpart (wmode, old_out);
30589   else
30590     old_out = new_out;
30591   value = simplify_gen_subreg (wmode, value, mode, 0);
30592
30593   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30594
30595   /* Does the operation require destination and first operand to use the same
30596      register?  This is decided by register constraints of relevant insn
30597      patterns in thumb1.md.  */
30598   gcc_assert (!new_out || REG_P (new_out));
30599   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
30600                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
30601                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
30602   bind_old_new =
30603     (TARGET_THUMB1
30604      && code != SET
30605      && code != MINUS
30606      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
30607
30608   /* We want to return the old value while putting the result of the operation
30609      in the same register as the old value so copy the old value over to the
30610      destination register and use that register for the operation.  */
30611   if (old_out && bind_old_new)
30612     {
30613       emit_move_insn (new_out, old_out);
30614       old_out = new_out;
30615     }
30616
30617   switch (code)
30618     {
30619     case SET:
30620       new_out = value;
30621       break;
30622
30623     case NOT:
30624       x = gen_rtx_AND (wmode, old_out, value);
30625       emit_insn (gen_rtx_SET (new_out, x));
30626       x = gen_rtx_NOT (wmode, new_out);
30627       emit_insn (gen_rtx_SET (new_out, x));
30628       break;
30629
30630     case MINUS:
30631       if (CONST_INT_P (value))
30632         {
30633           value = GEN_INT (-INTVAL (value));
30634           code = PLUS;
30635         }
30636       /* FALLTHRU */
30637
30638     case PLUS:
30639       if (mode == DImode)
30640         {
30641           /* DImode plus/minus need to clobber flags.  */
30642           /* The adddi3 and subdi3 patterns are incorrectly written so that
30643              they require matching operands, even when we could easily support
30644              three operands.  Thankfully, this can be fixed up post-splitting,
30645              as the individual add+adc patterns do accept three operands and
30646              post-reload cprop can make these moves go away.  */
30647           emit_move_insn (new_out, old_out);
30648           if (code == PLUS)
30649             x = gen_adddi3 (new_out, new_out, value);
30650           else
30651             x = gen_subdi3 (new_out, new_out, value);
30652           emit_insn (x);
30653           break;
30654         }
30655       /* FALLTHRU */
30656
30657     default:
30658       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30659       emit_insn (gen_rtx_SET (new_out, x));
30660       break;
30661     }
30662
30663   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30664                             use_release);
30665
30666   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30667   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30668
30669   /* Checks whether a barrier is needed and emits one accordingly.  */
30670   if (is_armv8_sync
30671       || !(use_acquire || use_release))
30672     arm_post_atomic_barrier (model);
30673 }
30674 \f
30675 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
30676    If CAN_INVERT, store either the result or its inverse in TARGET
30677    and return true if TARGET contains the inverse.  If !CAN_INVERT,
30678    always store the result in TARGET, never its inverse.
30679
30680    Note that the handling of floating-point comparisons is not
30681    IEEE compliant.  */
30682
30683 bool
30684 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
30685                            bool can_invert)
30686 {
30687   machine_mode cmp_result_mode = GET_MODE (target);
30688   machine_mode cmp_mode = GET_MODE (op0);
30689
30690   bool inverted;
30691   switch (code)
30692     {
30693     /* For these we need to compute the inverse of the requested
30694        comparison.  */
30695     case UNORDERED:
30696     case UNLT:
30697     case UNLE:
30698     case UNGT:
30699     case UNGE:
30700     case UNEQ:
30701     case NE:
30702       code = reverse_condition_maybe_unordered (code);
30703       if (!can_invert)
30704         {
30705           /* Recursively emit the inverted comparison into a temporary
30706              and then store its inverse in TARGET.  This avoids reusing
30707              TARGET (which for integer NE could be one of the inputs).  */
30708           rtx tmp = gen_reg_rtx (cmp_result_mode);
30709           if (arm_expand_vector_compare (tmp, code, op0, op1, true))
30710             gcc_unreachable ();
30711           emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
30712           return false;
30713         }
30714       inverted = true;
30715       break;
30716
30717     default:
30718       inverted = false;
30719       break;
30720     }
30721
30722   switch (code)
30723     {
30724     /* These are natively supported for zero comparisons, but otherwise
30725        require the operands to be swapped.  */
30726     case LE:
30727     case LT:
30728       if (op1 != CONST0_RTX (cmp_mode))
30729         {
30730           code = swap_condition (code);
30731           std::swap (op0, op1);
30732         }
30733       /* Fall through.  */
30734
30735     /* These are natively supported for both register and zero operands.  */
30736     case EQ:
30737     case GE:
30738     case GT:
30739       emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
30740       return inverted;
30741
30742     /* These are natively supported for register operands only.
30743        Comparisons with zero aren't useful and should be folded
30744        or canonicalized by target-independent code.  */
30745     case GEU:
30746     case GTU:
30747       emit_insn (gen_neon_vc (code, cmp_mode, target,
30748                               op0, force_reg (cmp_mode, op1)));
30749       return inverted;
30750
30751     /* These require the operands to be swapped and likewise do not
30752        support comparisons with zero.  */
30753     case LEU:
30754     case LTU:
30755       emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
30756                               target, force_reg (cmp_mode, op1), op0));
30757       return inverted;
30758
30759     /* These need a combination of two comparisons.  */
30760     case LTGT:
30761     case ORDERED:
30762       {
30763         /* Operands are LTGT iff (a > b || a > b).
30764            Operands are ORDERED iff (a > b || a <= b).  */
30765         rtx gt_res = gen_reg_rtx (cmp_result_mode);
30766         rtx alt_res = gen_reg_rtx (cmp_result_mode);
30767         rtx_code alt_code = (code == LTGT ? LT : LE);
30768         if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
30769             || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
30770           gcc_unreachable ();
30771         emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
30772                                                      gt_res, alt_res)));
30773         return inverted;
30774       }
30775
30776     default:
30777       gcc_unreachable ();
30778     }
30779 }
30780
30781 /* Expand a vcond or vcondu pattern with operands OPERANDS.
30782    CMP_RESULT_MODE is the mode of the comparison result.  */
30783
30784 void
30785 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
30786 {
30787   rtx mask = gen_reg_rtx (cmp_result_mode);
30788   bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
30789                                              operands[4], operands[5], true);
30790   if (inverted)
30791     std::swap (operands[1], operands[2]);
30792   emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
30793                             mask, operands[1], operands[2]));
30794 }
30795 \f
30796 #define MAX_VECT_LEN 16
30797
30798 struct expand_vec_perm_d
30799 {
30800   rtx target, op0, op1;
30801   vec_perm_indices perm;
30802   machine_mode vmode;
30803   bool one_vector_p;
30804   bool testing_p;
30805 };
30806
30807 /* Generate a variable permutation.  */
30808
30809 static void
30810 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30811 {
30812   machine_mode vmode = GET_MODE (target);
30813   bool one_vector_p = rtx_equal_p (op0, op1);
30814
30815   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30816   gcc_checking_assert (GET_MODE (op0) == vmode);
30817   gcc_checking_assert (GET_MODE (op1) == vmode);
30818   gcc_checking_assert (GET_MODE (sel) == vmode);
30819   gcc_checking_assert (TARGET_NEON);
30820
30821   if (one_vector_p)
30822     {
30823       if (vmode == V8QImode)
30824         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30825       else
30826         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30827     }
30828   else
30829     {
30830       rtx pair;
30831
30832       if (vmode == V8QImode)
30833         {
30834           pair = gen_reg_rtx (V16QImode);
30835           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30836           pair = gen_lowpart (TImode, pair);
30837           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30838         }
30839       else
30840         {
30841           pair = gen_reg_rtx (OImode);
30842           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30843           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30844         }
30845     }
30846 }
30847
30848 void
30849 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30850 {
30851   machine_mode vmode = GET_MODE (target);
30852   unsigned int nelt = GET_MODE_NUNITS (vmode);
30853   bool one_vector_p = rtx_equal_p (op0, op1);
30854   rtx mask;
30855
30856   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
30857      numbering of elements for big-endian, we must reverse the order.  */
30858   gcc_checking_assert (!BYTES_BIG_ENDIAN);
30859
30860   /* The VTBL instruction does not use a modulo index, so we must take care
30861      of that ourselves.  */
30862   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30863   mask = gen_const_vec_duplicate (vmode, mask);
30864   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30865
30866   arm_expand_vec_perm_1 (target, op0, op1, sel);
30867 }
30868
30869 /* Map lane ordering between architectural lane order, and GCC lane order,
30870    taking into account ABI.  See comment above output_move_neon for details.  */
30871
30872 static int
30873 neon_endian_lane_map (machine_mode mode, int lane)
30874 {
30875   if (BYTES_BIG_ENDIAN)
30876   {
30877     int nelems = GET_MODE_NUNITS (mode);
30878     /* Reverse lane order.  */
30879     lane = (nelems - 1 - lane);
30880     /* Reverse D register order, to match ABI.  */
30881     if (GET_MODE_SIZE (mode) == 16)
30882       lane = lane ^ (nelems / 2);
30883   }
30884   return lane;
30885 }
30886
30887 /* Some permutations index into pairs of vectors, this is a helper function
30888    to map indexes into those pairs of vectors.  */
30889
30890 static int
30891 neon_pair_endian_lane_map (machine_mode mode, int lane)
30892 {
30893   int nelem = GET_MODE_NUNITS (mode);
30894   if (BYTES_BIG_ENDIAN)
30895     lane =
30896       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
30897   return lane;
30898 }
30899
30900 /* Generate or test for an insn that supports a constant permutation.  */
30901
30902 /* Recognize patterns for the VUZP insns.  */
30903
30904 static bool
30905 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30906 {
30907   unsigned int i, odd, mask, nelt = d->perm.length ();
30908   rtx out0, out1, in0, in1;
30909   int first_elem;
30910   int swap_nelt;
30911
30912   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30913     return false;
30914
30915   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
30916      big endian pattern on 64 bit vectors, so we correct for that.  */
30917   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
30918     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
30919
30920   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
30921
30922   if (first_elem == neon_endian_lane_map (d->vmode, 0))
30923     odd = 0;
30924   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
30925     odd = 1;
30926   else
30927     return false;
30928   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30929
30930   for (i = 0; i < nelt; i++)
30931     {
30932       unsigned elt =
30933         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
30934       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
30935         return false;
30936     }
30937
30938   /* Success!  */
30939   if (d->testing_p)
30940     return true;
30941
30942   in0 = d->op0;
30943   in1 = d->op1;
30944   if (swap_nelt != 0)
30945     std::swap (in0, in1);
30946
30947   out0 = d->target;
30948   out1 = gen_reg_rtx (d->vmode);
30949   if (odd)
30950     std::swap (out0, out1);
30951
30952   emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
30953   return true;
30954 }
30955
30956 /* Recognize patterns for the VZIP insns.  */
30957
30958 static bool
30959 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30960 {
30961   unsigned int i, high, mask, nelt = d->perm.length ();
30962   rtx out0, out1, in0, in1;
30963   int first_elem;
30964   bool is_swapped;
30965
30966   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30967     return false;
30968
30969   is_swapped = BYTES_BIG_ENDIAN;
30970
30971   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
30972
30973   high = nelt / 2;
30974   if (first_elem == neon_endian_lane_map (d->vmode, high))
30975     ;
30976   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
30977     high = 0;
30978   else
30979     return false;
30980   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30981
30982   for (i = 0; i < nelt / 2; i++)
30983     {
30984       unsigned elt =
30985         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
30986       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
30987           != elt)
30988         return false;
30989       elt =
30990         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
30991       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
30992           != elt)
30993         return false;
30994     }
30995
30996   /* Success!  */
30997   if (d->testing_p)
30998     return true;
30999
31000   in0 = d->op0;
31001   in1 = d->op1;
31002   if (is_swapped)
31003     std::swap (in0, in1);
31004
31005   out0 = d->target;
31006   out1 = gen_reg_rtx (d->vmode);
31007   if (high)
31008     std::swap (out0, out1);
31009
31010   emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31011   return true;
31012 }
31013
31014 /* Recognize patterns for the VREV insns.  */
31015 static bool
31016 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31017 {
31018   unsigned int i, j, diff, nelt = d->perm.length ();
31019   rtx (*gen) (machine_mode, rtx, rtx);
31020
31021   if (!d->one_vector_p)
31022     return false;
31023
31024   diff = d->perm[0];
31025   switch (diff)
31026     {
31027     case 7:
31028        switch (d->vmode)
31029         {
31030          case E_V16QImode:
31031          case E_V8QImode:
31032           gen = gen_neon_vrev64;
31033           break;
31034          default:
31035           return false;
31036         }
31037        break;
31038     case 3:
31039        switch (d->vmode)
31040         {
31041         case E_V16QImode:
31042         case E_V8QImode:
31043           gen = gen_neon_vrev32;
31044           break;
31045         case E_V8HImode:
31046         case E_V4HImode:
31047         case E_V8HFmode:
31048         case E_V4HFmode:
31049           gen = gen_neon_vrev64;
31050           break;
31051         default:
31052           return false;
31053         }
31054       break;
31055     case 1:
31056       switch (d->vmode)
31057         {
31058         case E_V16QImode:
31059         case E_V8QImode:
31060           gen = gen_neon_vrev16;
31061           break;
31062         case E_V8HImode:
31063         case E_V4HImode:
31064           gen = gen_neon_vrev32;
31065           break;
31066         case E_V4SImode:
31067         case E_V2SImode:
31068         case E_V4SFmode:
31069         case E_V2SFmode:
31070           gen = gen_neon_vrev64;
31071           break;
31072         default:
31073           return false;
31074         }
31075       break;
31076     default:
31077       return false;
31078     }
31079
31080   for (i = 0; i < nelt ; i += diff + 1)
31081     for (j = 0; j <= diff; j += 1)
31082       {
31083         /* This is guaranteed to be true as the value of diff
31084            is 7, 3, 1 and we should have enough elements in the
31085            queue to generate this. Getting a vector mask with a
31086            value of diff other than these values implies that
31087            something is wrong by the time we get here.  */
31088         gcc_assert (i + j < nelt);
31089         if (d->perm[i + j] != i + diff - j)
31090           return false;
31091       }
31092
31093   /* Success! */
31094   if (d->testing_p)
31095     return true;
31096
31097   emit_insn (gen (d->vmode, d->target, d->op0));
31098   return true;
31099 }
31100
31101 /* Recognize patterns for the VTRN insns.  */
31102
31103 static bool
31104 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31105 {
31106   unsigned int i, odd, mask, nelt = d->perm.length ();
31107   rtx out0, out1, in0, in1;
31108
31109   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31110     return false;
31111
31112   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
31113   if (d->perm[0] == 0)
31114     odd = 0;
31115   else if (d->perm[0] == 1)
31116     odd = 1;
31117   else
31118     return false;
31119   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31120
31121   for (i = 0; i < nelt; i += 2)
31122     {
31123       if (d->perm[i] != i + odd)
31124         return false;
31125       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31126         return false;
31127     }
31128
31129   /* Success!  */
31130   if (d->testing_p)
31131     return true;
31132
31133   in0 = d->op0;
31134   in1 = d->op1;
31135   if (BYTES_BIG_ENDIAN)
31136     {
31137       std::swap (in0, in1);
31138       odd = !odd;
31139     }
31140
31141   out0 = d->target;
31142   out1 = gen_reg_rtx (d->vmode);
31143   if (odd)
31144     std::swap (out0, out1);
31145
31146   emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
31147   return true;
31148 }
31149
31150 /* Recognize patterns for the VEXT insns.  */
31151
31152 static bool
31153 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31154 {
31155   unsigned int i, nelt = d->perm.length ();
31156   rtx offset;
31157
31158   unsigned int location;
31159
31160   unsigned int next  = d->perm[0] + 1;
31161
31162   /* TODO: Handle GCC's numbering of elements for big-endian.  */
31163   if (BYTES_BIG_ENDIAN)
31164     return false;
31165
31166   /* Check if the extracted indexes are increasing by one.  */
31167   for (i = 1; i < nelt; next++, i++)
31168     {
31169       /* If we hit the most significant element of the 2nd vector in
31170          the previous iteration, no need to test further.  */
31171       if (next == 2 * nelt)
31172         return false;
31173
31174       /* If we are operating on only one vector: it could be a
31175          rotation.  If there are only two elements of size < 64, let
31176          arm_evpc_neon_vrev catch it.  */
31177       if (d->one_vector_p && (next == nelt))
31178         {
31179           if ((nelt == 2) && (d->vmode != V2DImode))
31180             return false;
31181           else
31182             next = 0;
31183         }
31184
31185       if (d->perm[i] != next)
31186         return false;
31187     }
31188
31189   location = d->perm[0];
31190
31191   /* Success! */
31192   if (d->testing_p)
31193     return true;
31194
31195   offset = GEN_INT (location);
31196
31197   if(d->vmode == E_DImode)
31198     return false;
31199
31200   emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
31201   return true;
31202 }
31203
31204 /* The NEON VTBL instruction is a fully variable permuation that's even
31205    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
31206    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
31207    can do slightly better by expanding this as a constant where we don't
31208    have to apply a mask.  */
31209
31210 static bool
31211 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31212 {
31213   rtx rperm[MAX_VECT_LEN], sel;
31214   machine_mode vmode = d->vmode;
31215   unsigned int i, nelt = d->perm.length ();
31216
31217   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
31218      numbering of elements for big-endian, we must reverse the order.  */
31219   if (BYTES_BIG_ENDIAN)
31220     return false;
31221
31222   if (d->testing_p)
31223     return true;
31224
31225   /* Generic code will try constant permutation twice.  Once with the
31226      original mode and again with the elements lowered to QImode.
31227      So wait and don't do the selector expansion ourselves.  */
31228   if (vmode != V8QImode && vmode != V16QImode)
31229     return false;
31230
31231   for (i = 0; i < nelt; ++i)
31232     rperm[i] = GEN_INT (d->perm[i]);
31233   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31234   sel = force_reg (vmode, sel);
31235
31236   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31237   return true;
31238 }
31239
31240 static bool
31241 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31242 {
31243   /* Check if the input mask matches vext before reordering the
31244      operands.  */
31245   if (TARGET_NEON)
31246     if (arm_evpc_neon_vext (d))
31247       return true;
31248
31249   /* The pattern matching functions above are written to look for a small
31250      number to begin the sequence (0, 1, N/2).  If we begin with an index
31251      from the second operand, we can swap the operands.  */
31252   unsigned int nelt = d->perm.length ();
31253   if (d->perm[0] >= nelt)
31254     {
31255       d->perm.rotate_inputs (1);
31256       std::swap (d->op0, d->op1);
31257     }
31258
31259   if (TARGET_NEON)
31260     {
31261       if (arm_evpc_neon_vuzp (d))
31262         return true;
31263       if (arm_evpc_neon_vzip (d))
31264         return true;
31265       if (arm_evpc_neon_vrev (d))
31266         return true;
31267       if (arm_evpc_neon_vtrn (d))
31268         return true;
31269       return arm_evpc_neon_vtbl (d);
31270     }
31271   return false;
31272 }
31273
31274 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
31275
31276 static bool
31277 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
31278                               const vec_perm_indices &sel)
31279 {
31280   struct expand_vec_perm_d d;
31281   int i, nelt, which;
31282
31283   if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
31284     return false;
31285
31286   d.target = target;
31287   d.op0 = op0;
31288   d.op1 = op1;
31289
31290   d.vmode = vmode;
31291   gcc_assert (VECTOR_MODE_P (d.vmode));
31292   d.testing_p = !target;
31293
31294   nelt = GET_MODE_NUNITS (d.vmode);
31295   for (i = which = 0; i < nelt; ++i)
31296     {
31297       int ei = sel[i] & (2 * nelt - 1);
31298       which |= (ei < nelt ? 1 : 2);
31299     }
31300
31301   switch (which)
31302     {
31303     default:
31304       gcc_unreachable();
31305
31306     case 3:
31307       d.one_vector_p = false;
31308       if (d.testing_p || !rtx_equal_p (op0, op1))
31309         break;
31310
31311       /* The elements of PERM do not suggest that only the first operand
31312          is used, but both operands are identical.  Allow easier matching
31313          of the permutation by folding the permutation into the single
31314          input vector.  */
31315       /* FALLTHRU */
31316     case 2:
31317       d.op0 = op1;
31318       d.one_vector_p = true;
31319       break;
31320
31321     case 1:
31322       d.op1 = op0;
31323       d.one_vector_p = true;
31324       break;
31325     }
31326
31327   d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
31328
31329   if (!d.testing_p)
31330     return arm_expand_vec_perm_const_1 (&d);
31331
31332   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31333   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31334   if (!d.one_vector_p)
31335     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31336
31337   start_sequence ();
31338   bool ret = arm_expand_vec_perm_const_1 (&d);
31339   end_sequence ();
31340
31341   return ret;
31342 }
31343
31344 bool
31345 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
31346 {
31347   /* If we are soft float and we do not have ldrd
31348      then all auto increment forms are ok.  */
31349   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31350     return true;
31351
31352   switch (code)
31353     {
31354       /* Post increment and Pre Decrement are supported for all
31355          instruction forms except for vector forms.  */
31356     case ARM_POST_INC:
31357     case ARM_PRE_DEC:
31358       if (VECTOR_MODE_P (mode))
31359         {
31360           if (code != ARM_PRE_DEC)
31361             return true;
31362           else
31363             return false;
31364         }
31365
31366       return true;
31367
31368     case ARM_POST_DEC:
31369     case ARM_PRE_INC:
31370       /* Without LDRD and mode size greater than
31371          word size, there is no point in auto-incrementing
31372          because ldm and stm will not have these forms.  */
31373       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31374         return false;
31375
31376       /* Vector and floating point modes do not support
31377          these auto increment forms.  */
31378       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31379         return false;
31380
31381       return true;
31382
31383     default:
31384       return false;
31385
31386     }
31387
31388   return false;
31389 }
31390
31391 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31392    on ARM, since we know that shifts by negative amounts are no-ops.
31393    Additionally, the default expansion code is not available or suitable
31394    for post-reload insn splits (this can occur when the register allocator
31395    chooses not to do a shift in NEON).
31396
31397    This function is used in both initial expand and post-reload splits, and
31398    handles all kinds of 64-bit shifts.
31399
31400    Input requirements:
31401     - It is safe for the input and output to be the same register, but
31402       early-clobber rules apply for the shift amount and scratch registers.
31403     - Shift by register requires both scratch registers.  In all other cases
31404       the scratch registers may be NULL.
31405     - Ashiftrt by a register also clobbers the CC register.  */
31406 void
31407 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31408                                rtx amount, rtx scratch1, rtx scratch2)
31409 {
31410   rtx out_high = gen_highpart (SImode, out);
31411   rtx out_low = gen_lowpart (SImode, out);
31412   rtx in_high = gen_highpart (SImode, in);
31413   rtx in_low = gen_lowpart (SImode, in);
31414
31415   /* Terminology:
31416         in = the register pair containing the input value.
31417         out = the destination register pair.
31418         up = the high- or low-part of each pair.
31419         down = the opposite part to "up".
31420      In a shift, we can consider bits to shift from "up"-stream to
31421      "down"-stream, so in a left-shift "up" is the low-part and "down"
31422      is the high-part of each register pair.  */
31423
31424   rtx out_up   = code == ASHIFT ? out_low : out_high;
31425   rtx out_down = code == ASHIFT ? out_high : out_low;
31426   rtx in_up   = code == ASHIFT ? in_low : in_high;
31427   rtx in_down = code == ASHIFT ? in_high : in_low;
31428
31429   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31430   gcc_assert (out
31431               && (REG_P (out) || GET_CODE (out) == SUBREG)
31432               && GET_MODE (out) == DImode);
31433   gcc_assert (in
31434               && (REG_P (in) || GET_CODE (in) == SUBREG)
31435               && GET_MODE (in) == DImode);
31436   gcc_assert (amount
31437               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31438                    && GET_MODE (amount) == SImode)
31439                   || CONST_INT_P (amount)));
31440   gcc_assert (scratch1 == NULL
31441               || (GET_CODE (scratch1) == SCRATCH)
31442               || (GET_MODE (scratch1) == SImode
31443                   && REG_P (scratch1)));
31444   gcc_assert (scratch2 == NULL
31445               || (GET_CODE (scratch2) == SCRATCH)
31446               || (GET_MODE (scratch2) == SImode
31447                   && REG_P (scratch2)));
31448   gcc_assert (!REG_P (out) || !REG_P (amount)
31449               || !HARD_REGISTER_P (out)
31450               || (REGNO (out) != REGNO (amount)
31451                   && REGNO (out) + 1 != REGNO (amount)));
31452
31453   /* Macros to make following code more readable.  */
31454   #define SUB_32(DEST,SRC) \
31455             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31456   #define RSB_32(DEST,SRC) \
31457             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31458   #define SUB_S_32(DEST,SRC) \
31459             gen_addsi3_compare0 ((DEST), (SRC), \
31460                                  GEN_INT (-32))
31461   #define SET(DEST,SRC) \
31462             gen_rtx_SET ((DEST), (SRC))
31463   #define SHIFT(CODE,SRC,AMOUNT) \
31464             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31465   #define LSHIFT(CODE,SRC,AMOUNT) \
31466             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31467                             SImode, (SRC), (AMOUNT))
31468   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31469             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31470                             SImode, (SRC), (AMOUNT))
31471   #define ORR(A,B) \
31472             gen_rtx_IOR (SImode, (A), (B))
31473   #define BRANCH(COND,LABEL) \
31474             gen_arm_cond_branch ((LABEL), \
31475                                  gen_rtx_ ## COND (CCmode, cc_reg, \
31476                                                    const0_rtx), \
31477                                  cc_reg)
31478
31479   /* Shifts by register and shifts by constant are handled separately.  */
31480   if (CONST_INT_P (amount))
31481     {
31482       /* We have a shift-by-constant.  */
31483
31484       /* First, handle out-of-range shift amounts.
31485          In both cases we try to match the result an ARM instruction in a
31486          shift-by-register would give.  This helps reduce execution
31487          differences between optimization levels, but it won't stop other
31488          parts of the compiler doing different things.  This is "undefined
31489          behavior, in any case.  */
31490       if (INTVAL (amount) <= 0)
31491         emit_insn (gen_movdi (out, in));
31492       else if (INTVAL (amount) >= 64)
31493         {
31494           if (code == ASHIFTRT)
31495             {
31496               rtx const31_rtx = GEN_INT (31);
31497               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31498               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31499             }
31500           else
31501             emit_insn (gen_movdi (out, const0_rtx));
31502         }
31503
31504       /* Now handle valid shifts. */
31505       else if (INTVAL (amount) < 32)
31506         {
31507           /* Shifts by a constant less than 32.  */
31508           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31509
31510           /* Clearing the out register in DImode first avoids lots
31511              of spilling and results in less stack usage.
31512              Later this redundant insn is completely removed.
31513              Do that only if "in" and "out" are different registers.  */
31514           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
31515             emit_insn (SET (out, const0_rtx));
31516           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31517           emit_insn (SET (out_down,
31518                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
31519                                out_down)));
31520           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31521         }
31522       else
31523         {
31524           /* Shifts by a constant greater than 31.  */
31525           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31526
31527           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
31528             emit_insn (SET (out, const0_rtx));
31529           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31530           if (code == ASHIFTRT)
31531             emit_insn (gen_ashrsi3 (out_up, in_up,
31532                                     GEN_INT (31)));
31533           else
31534             emit_insn (SET (out_up, const0_rtx));
31535         }
31536     }
31537   else
31538     {
31539       /* We have a shift-by-register.  */
31540       rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
31541
31542       /* This alternative requires the scratch registers.  */
31543       gcc_assert (scratch1 && REG_P (scratch1));
31544       gcc_assert (scratch2 && REG_P (scratch2));
31545
31546       /* We will need the values "amount-32" and "32-amount" later.
31547          Swapping them around now allows the later code to be more general. */
31548       switch (code)
31549         {
31550         case ASHIFT:
31551           emit_insn (SUB_32 (scratch1, amount));
31552           emit_insn (RSB_32 (scratch2, amount));
31553           break;
31554         case ASHIFTRT:
31555           emit_insn (RSB_32 (scratch1, amount));
31556           /* Also set CC = amount > 32.  */
31557           emit_insn (SUB_S_32 (scratch2, amount));
31558           break;
31559         case LSHIFTRT:
31560           emit_insn (RSB_32 (scratch1, amount));
31561           emit_insn (SUB_32 (scratch2, amount));
31562           break;
31563         default:
31564           gcc_unreachable ();
31565         }
31566
31567       /* Emit code like this:
31568
31569          arithmetic-left:
31570             out_down = in_down << amount;
31571             out_down = (in_up << (amount - 32)) | out_down;
31572             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31573             out_up = in_up << amount;
31574
31575          arithmetic-right:
31576             out_down = in_down >> amount;
31577             out_down = (in_up << (32 - amount)) | out_down;
31578             if (amount < 32)
31579               out_down = ((signed)in_up >> (amount - 32)) | out_down;
31580             out_up = in_up << amount;
31581
31582          logical-right:
31583             out_down = in_down >> amount;
31584             out_down = (in_up << (32 - amount)) | out_down;
31585             if (amount < 32)
31586               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31587             out_up = in_up << amount;
31588
31589           The ARM and Thumb2 variants are the same but implemented slightly
31590           differently.  If this were only called during expand we could just
31591           use the Thumb2 case and let combine do the right thing, but this
31592           can also be called from post-reload splitters.  */
31593
31594       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31595
31596       if (!TARGET_THUMB2)
31597         {
31598           /* Emit code for ARM mode.  */
31599           emit_insn (SET (out_down,
31600                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31601           if (code == ASHIFTRT)
31602             {
31603               rtx_code_label *done_label = gen_label_rtx ();
31604               emit_jump_insn (BRANCH (LT, done_label));
31605               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31606                                              out_down)));
31607               emit_label (done_label);
31608             }
31609           else
31610             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31611                                            out_down)));
31612         }
31613       else
31614         {
31615           /* Emit code for Thumb2 mode.
31616              Thumb2 can't do shift and or in one insn.  */
31617           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31618           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31619
31620           if (code == ASHIFTRT)
31621             {
31622               rtx_code_label *done_label = gen_label_rtx ();
31623               emit_jump_insn (BRANCH (LT, done_label));
31624               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31625               emit_insn (SET (out_down, ORR (out_down, scratch2)));
31626               emit_label (done_label);
31627             }
31628           else
31629             {
31630               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31631               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31632             }
31633         }
31634
31635       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31636     }
31637
31638   #undef SUB_32
31639   #undef RSB_32
31640   #undef SUB_S_32
31641   #undef SET
31642   #undef SHIFT
31643   #undef LSHIFT
31644   #undef REV_LSHIFT
31645   #undef ORR
31646   #undef BRANCH
31647 }
31648
31649 /* Returns true if the pattern is a valid symbolic address, which is either a
31650    symbol_ref or (symbol_ref + addend).
31651
31652    According to the ARM ELF ABI, the initial addend of REL-type relocations
31653    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
31654    literal field of the instruction as a 16-bit signed value in the range
31655    -32768 <= A < 32768.  */
31656
31657 bool
31658 arm_valid_symbolic_address_p (rtx addr)
31659 {
31660   rtx xop0, xop1 = NULL_RTX;
31661   rtx tmp = addr;
31662
31663   if (target_word_relocations)
31664     return false;
31665
31666   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
31667     return true;
31668
31669   /* (const (plus: symbol_ref const_int))  */
31670   if (GET_CODE (addr) == CONST)
31671     tmp = XEXP (addr, 0);
31672
31673   if (GET_CODE (tmp) == PLUS)
31674     {
31675       xop0 = XEXP (tmp, 0);
31676       xop1 = XEXP (tmp, 1);
31677
31678       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
31679           return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
31680     }
31681
31682   return false;
31683 }
31684
31685 /* Returns true if a valid comparison operation and makes
31686    the operands in a form that is valid.  */
31687 bool
31688 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31689 {
31690   enum rtx_code code = GET_CODE (*comparison);
31691   int code_int;
31692   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31693     ? GET_MODE (*op2) : GET_MODE (*op1);
31694
31695   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31696
31697   if (code == UNEQ || code == LTGT)
31698     return false;
31699
31700   code_int = (int)code;
31701   arm_canonicalize_comparison (&code_int, op1, op2, 0);
31702   PUT_CODE (*comparison, (enum rtx_code)code_int);
31703
31704   switch (mode)
31705     {
31706     case E_SImode:
31707       if (!arm_add_operand (*op1, mode))
31708         *op1 = force_reg (mode, *op1);
31709       if (!arm_add_operand (*op2, mode))
31710         *op2 = force_reg (mode, *op2);
31711       return true;
31712
31713     case E_DImode:
31714       /* gen_compare_reg() will sort out any invalid operands.  */
31715       return true;
31716
31717     case E_HFmode:
31718       if (!TARGET_VFP_FP16INST)
31719         break;
31720       /* FP16 comparisons are done in SF mode.  */
31721       mode = SFmode;
31722       *op1 = convert_to_mode (mode, *op1, 1);
31723       *op2 = convert_to_mode (mode, *op2, 1);
31724       /* Fall through.  */
31725     case E_SFmode:
31726     case E_DFmode:
31727       if (!vfp_compare_operand (*op1, mode))
31728         *op1 = force_reg (mode, *op1);
31729       if (!vfp_compare_operand (*op2, mode))
31730         *op2 = force_reg (mode, *op2);
31731       return true;
31732     default:
31733       break;
31734     }
31735
31736   return false;
31737
31738 }
31739
31740 /* Maximum number of instructions to set block of memory.  */
31741 static int
31742 arm_block_set_max_insns (void)
31743 {
31744   if (optimize_function_for_size_p (cfun))
31745     return 4;
31746   else
31747     return current_tune->max_insns_inline_memset;
31748 }
31749
31750 /* Return TRUE if it's profitable to set block of memory for
31751    non-vectorized case.  VAL is the value to set the memory
31752    with.  LENGTH is the number of bytes to set.  ALIGN is the
31753    alignment of the destination memory in bytes.  UNALIGNED_P
31754    is TRUE if we can only set the memory with instructions
31755    meeting alignment requirements.  USE_STRD_P is TRUE if we
31756    can use strd to set the memory.  */
31757 static bool
31758 arm_block_set_non_vect_profit_p (rtx val,
31759                                  unsigned HOST_WIDE_INT length,
31760                                  unsigned HOST_WIDE_INT align,
31761                                  bool unaligned_p, bool use_strd_p)
31762 {
31763   int num = 0;
31764   /* For leftovers in bytes of 0-7, we can set the memory block using
31765      strb/strh/str with minimum instruction number.  */
31766   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
31767
31768   if (unaligned_p)
31769     {
31770       num = arm_const_inline_cost (SET, val);
31771       num += length / align + length % align;
31772     }
31773   else if (use_strd_p)
31774     {
31775       num = arm_const_double_inline_cost (val);
31776       num += (length >> 3) + leftover[length & 7];
31777     }
31778   else
31779     {
31780       num = arm_const_inline_cost (SET, val);
31781       num += (length >> 2) + leftover[length & 3];
31782     }
31783
31784   /* We may be able to combine last pair STRH/STRB into a single STR
31785      by shifting one byte back.  */
31786   if (unaligned_access && length > 3 && (length & 3) == 3)
31787     num--;
31788
31789   return (num <= arm_block_set_max_insns ());
31790 }
31791
31792 /* Return TRUE if it's profitable to set block of memory for
31793    vectorized case.  LENGTH is the number of bytes to set.
31794    ALIGN is the alignment of destination memory in bytes.
31795    MODE is the vector mode used to set the memory.  */
31796 static bool
31797 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
31798                              unsigned HOST_WIDE_INT align,
31799                              machine_mode mode)
31800 {
31801   int num;
31802   bool unaligned_p = ((align & 3) != 0);
31803   unsigned int nelt = GET_MODE_NUNITS (mode);
31804
31805   /* Instruction loading constant value.  */
31806   num = 1;
31807   /* Instructions storing the memory.  */
31808   num += (length + nelt - 1) / nelt;
31809   /* Instructions adjusting the address expression.  Only need to
31810      adjust address expression if it's 4 bytes aligned and bytes
31811      leftover can only be stored by mis-aligned store instruction.  */
31812   if (!unaligned_p && (length & 3) != 0)
31813     num++;
31814
31815   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
31816   if (!unaligned_p && mode == V16QImode)
31817     num--;
31818
31819   return (num <= arm_block_set_max_insns ());
31820 }
31821
31822 /* Set a block of memory using vectorization instructions for the
31823    unaligned case.  We fill the first LENGTH bytes of the memory
31824    area starting from DSTBASE with byte constant VALUE.  ALIGN is
31825    the alignment requirement of memory.  Return TRUE if succeeded.  */
31826 static bool
31827 arm_block_set_unaligned_vect (rtx dstbase,
31828                               unsigned HOST_WIDE_INT length,
31829                               unsigned HOST_WIDE_INT value,
31830                               unsigned HOST_WIDE_INT align)
31831 {
31832   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
31833   rtx dst, mem;
31834   rtx val_vec, reg;
31835   rtx (*gen_func) (rtx, rtx);
31836   machine_mode mode;
31837   unsigned HOST_WIDE_INT v = value;
31838   unsigned int offset = 0;
31839   gcc_assert ((align & 0x3) != 0);
31840   nelt_v8 = GET_MODE_NUNITS (V8QImode);
31841   nelt_v16 = GET_MODE_NUNITS (V16QImode);
31842   if (length >= nelt_v16)
31843     {
31844       mode = V16QImode;
31845       gen_func = gen_movmisalignv16qi;
31846     }
31847   else
31848     {
31849       mode = V8QImode;
31850       gen_func = gen_movmisalignv8qi;
31851     }
31852   nelt_mode = GET_MODE_NUNITS (mode);
31853   gcc_assert (length >= nelt_mode);
31854   /* Skip if it isn't profitable.  */
31855   if (!arm_block_set_vect_profit_p (length, align, mode))
31856     return false;
31857
31858   dst = copy_addr_to_reg (XEXP (dstbase, 0));
31859   mem = adjust_automodify_address (dstbase, mode, dst, offset);
31860
31861   v = sext_hwi (v, BITS_PER_WORD);
31862
31863   reg = gen_reg_rtx (mode);
31864   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
31865   /* Emit instruction loading the constant value.  */
31866   emit_move_insn (reg, val_vec);
31867
31868   /* Handle nelt_mode bytes in a vector.  */
31869   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
31870     {
31871       emit_insn ((*gen_func) (mem, reg));
31872       if (i + 2 * nelt_mode <= length)
31873         {
31874           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
31875           offset += nelt_mode;
31876           mem = adjust_automodify_address (dstbase, mode, dst, offset);
31877         }
31878     }
31879
31880   /* If there are not less than nelt_v8 bytes leftover, we must be in
31881      V16QI mode.  */
31882   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
31883
31884   /* Handle (8, 16) bytes leftover.  */
31885   if (i + nelt_v8 < length)
31886     {
31887       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
31888       offset += length - i;
31889       mem = adjust_automodify_address (dstbase, mode, dst, offset);
31890
31891       /* We are shifting bytes back, set the alignment accordingly.  */
31892       if ((length & 1) != 0 && align >= 2)
31893         set_mem_align (mem, BITS_PER_UNIT);
31894
31895       emit_insn (gen_movmisalignv16qi (mem, reg));
31896     }
31897   /* Handle (0, 8] bytes leftover.  */
31898   else if (i < length && i + nelt_v8 >= length)
31899     {
31900       if (mode == V16QImode)
31901         reg = gen_lowpart (V8QImode, reg);
31902
31903       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
31904                                               + (nelt_mode - nelt_v8))));
31905       offset += (length - i) + (nelt_mode - nelt_v8);
31906       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
31907
31908       /* We are shifting bytes back, set the alignment accordingly.  */
31909       if ((length & 1) != 0 && align >= 2)
31910         set_mem_align (mem, BITS_PER_UNIT);
31911
31912       emit_insn (gen_movmisalignv8qi (mem, reg));
31913     }
31914
31915   return true;
31916 }
31917
31918 /* Set a block of memory using vectorization instructions for the
31919    aligned case.  We fill the first LENGTH bytes of the memory area
31920    starting from DSTBASE with byte constant VALUE.  ALIGN is the
31921    alignment requirement of memory.  Return TRUE if succeeded.  */
31922 static bool
31923 arm_block_set_aligned_vect (rtx dstbase,
31924                             unsigned HOST_WIDE_INT length,
31925                             unsigned HOST_WIDE_INT value,
31926                             unsigned HOST_WIDE_INT align)
31927 {
31928   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
31929   rtx dst, addr, mem;
31930   rtx val_vec, reg;
31931   machine_mode mode;
31932   unsigned int offset = 0;
31933
31934   gcc_assert ((align & 0x3) == 0);
31935   nelt_v8 = GET_MODE_NUNITS (V8QImode);
31936   nelt_v16 = GET_MODE_NUNITS (V16QImode);
31937   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
31938     mode = V16QImode;
31939   else
31940     mode = V8QImode;
31941
31942   nelt_mode = GET_MODE_NUNITS (mode);
31943   gcc_assert (length >= nelt_mode);
31944   /* Skip if it isn't profitable.  */
31945   if (!arm_block_set_vect_profit_p (length, align, mode))
31946     return false;
31947
31948   dst = copy_addr_to_reg (XEXP (dstbase, 0));
31949
31950   reg = gen_reg_rtx (mode);
31951   val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
31952   /* Emit instruction loading the constant value.  */
31953   emit_move_insn (reg, val_vec);
31954
31955   i = 0;
31956   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
31957   if (mode == V16QImode)
31958     {
31959       mem = adjust_automodify_address (dstbase, mode, dst, offset);
31960       emit_insn (gen_movmisalignv16qi (mem, reg));
31961       i += nelt_mode;
31962       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
31963       if (i + nelt_v8 < length && i + nelt_v16 > length)
31964         {
31965           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31966           offset += length - nelt_mode;
31967           mem = adjust_automodify_address (dstbase, mode, dst, offset);
31968           /* We are shifting bytes back, set the alignment accordingly.  */
31969           if ((length & 0x3) == 0)
31970             set_mem_align (mem, BITS_PER_UNIT * 4);
31971           else if ((length & 0x1) == 0)
31972             set_mem_align (mem, BITS_PER_UNIT * 2);
31973           else
31974             set_mem_align (mem, BITS_PER_UNIT);
31975
31976           emit_insn (gen_movmisalignv16qi (mem, reg));
31977           return true;
31978         }
31979       /* Fall through for bytes leftover.  */
31980       mode = V8QImode;
31981       nelt_mode = GET_MODE_NUNITS (mode);
31982       reg = gen_lowpart (V8QImode, reg);
31983     }
31984
31985   /* Handle 8 bytes in a vector.  */
31986   for (; (i + nelt_mode <= length); i += nelt_mode)
31987     {
31988       addr = plus_constant (Pmode, dst, i);
31989       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
31990       if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
31991         emit_move_insn (mem, reg);
31992       else
31993         emit_insn (gen_unaligned_storev8qi (mem, reg));
31994     }
31995
31996   /* Handle single word leftover by shifting 4 bytes back.  We can
31997      use aligned access for this case.  */
31998   if (i + UNITS_PER_WORD == length)
31999     {
32000       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32001       offset += i - UNITS_PER_WORD;
32002       mem = adjust_automodify_address (dstbase, mode, addr, offset);
32003       /* We are shifting 4 bytes back, set the alignment accordingly.  */
32004       if (align > UNITS_PER_WORD)
32005         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32006
32007       emit_insn (gen_unaligned_storev8qi (mem, reg));
32008     }
32009   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32010      We have to use unaligned access for this case.  */
32011   else if (i < length)
32012     {
32013       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32014       offset += length - nelt_mode;
32015       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32016       /* We are shifting bytes back, set the alignment accordingly.  */
32017       if ((length & 1) == 0)
32018         set_mem_align (mem, BITS_PER_UNIT * 2);
32019       else
32020         set_mem_align (mem, BITS_PER_UNIT);
32021
32022       emit_insn (gen_movmisalignv8qi (mem, reg));
32023     }
32024
32025   return true;
32026 }
32027
32028 /* Set a block of memory using plain strh/strb instructions, only
32029    using instructions allowed by ALIGN on processor.  We fill the
32030    first LENGTH bytes of the memory area starting from DSTBASE
32031    with byte constant VALUE.  ALIGN is the alignment requirement
32032    of memory.  */
32033 static bool
32034 arm_block_set_unaligned_non_vect (rtx dstbase,
32035                                   unsigned HOST_WIDE_INT length,
32036                                   unsigned HOST_WIDE_INT value,
32037                                   unsigned HOST_WIDE_INT align)
32038 {
32039   unsigned int i;
32040   rtx dst, addr, mem;
32041   rtx val_exp, val_reg, reg;
32042   machine_mode mode;
32043   HOST_WIDE_INT v = value;
32044
32045   gcc_assert (align == 1 || align == 2);
32046
32047   if (align == 2)
32048     v |= (value << BITS_PER_UNIT);
32049
32050   v = sext_hwi (v, BITS_PER_WORD);
32051   val_exp = GEN_INT (v);
32052   /* Skip if it isn't profitable.  */
32053   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32054                                         align, true, false))
32055     return false;
32056
32057   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32058   mode = (align == 2 ? HImode : QImode);
32059   val_reg = force_reg (SImode, val_exp);
32060   reg = gen_lowpart (mode, val_reg);
32061
32062   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
32063     {
32064       addr = plus_constant (Pmode, dst, i);
32065       mem = adjust_automodify_address (dstbase, mode, addr, i);
32066       emit_move_insn (mem, reg);
32067     }
32068
32069   /* Handle single byte leftover.  */
32070   if (i + 1 == length)
32071     {
32072       reg = gen_lowpart (QImode, val_reg);
32073       addr = plus_constant (Pmode, dst, i);
32074       mem = adjust_automodify_address (dstbase, QImode, addr, i);
32075       emit_move_insn (mem, reg);
32076       i++;
32077     }
32078
32079   gcc_assert (i == length);
32080   return true;
32081 }
32082
32083 /* Set a block of memory using plain strd/str/strh/strb instructions,
32084    to permit unaligned copies on processors which support unaligned
32085    semantics for those instructions.  We fill the first LENGTH bytes
32086    of the memory area starting from DSTBASE with byte constant VALUE.
32087    ALIGN is the alignment requirement of memory.  */
32088 static bool
32089 arm_block_set_aligned_non_vect (rtx dstbase,
32090                                 unsigned HOST_WIDE_INT length,
32091                                 unsigned HOST_WIDE_INT value,
32092                                 unsigned HOST_WIDE_INT align)
32093 {
32094   unsigned int i;
32095   rtx dst, addr, mem;
32096   rtx val_exp, val_reg, reg;
32097   unsigned HOST_WIDE_INT v;
32098   bool use_strd_p;
32099
32100   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32101                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32102
32103   v = (value | (value << 8) | (value << 16) | (value << 24));
32104   if (length < UNITS_PER_WORD)
32105     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32106
32107   if (use_strd_p)
32108     v |= (v << BITS_PER_WORD);
32109   else
32110     v = sext_hwi (v, BITS_PER_WORD);
32111
32112   val_exp = GEN_INT (v);
32113   /* Skip if it isn't profitable.  */
32114   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32115                                         align, false, use_strd_p))
32116     {
32117       if (!use_strd_p)
32118         return false;
32119
32120       /* Try without strd.  */
32121       v = (v >> BITS_PER_WORD);
32122       v = sext_hwi (v, BITS_PER_WORD);
32123       val_exp = GEN_INT (v);
32124       use_strd_p = false;
32125       if (!arm_block_set_non_vect_profit_p (val_exp, length,
32126                                             align, false, use_strd_p))
32127         return false;
32128     }
32129
32130   i = 0;
32131   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32132   /* Handle double words using strd if possible.  */
32133   if (use_strd_p)
32134     {
32135       val_reg = force_reg (DImode, val_exp);
32136       reg = val_reg;
32137       for (; (i + 8 <= length); i += 8)
32138         {
32139           addr = plus_constant (Pmode, dst, i);
32140           mem = adjust_automodify_address (dstbase, DImode, addr, i);
32141           if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32142             emit_move_insn (mem, reg);
32143           else
32144             emit_insn (gen_unaligned_storedi (mem, reg));
32145         }
32146     }
32147   else
32148     val_reg = force_reg (SImode, val_exp);
32149
32150   /* Handle words.  */
32151   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32152   for (; (i + 4 <= length); i += 4)
32153     {
32154       addr = plus_constant (Pmode, dst, i);
32155       mem = adjust_automodify_address (dstbase, SImode, addr, i);
32156       if ((align & 3) == 0)
32157         emit_move_insn (mem, reg);
32158       else
32159         emit_insn (gen_unaligned_storesi (mem, reg));
32160     }
32161
32162   /* Merge last pair of STRH and STRB into a STR if possible.  */
32163   if (unaligned_access && i > 0 && (i + 3) == length)
32164     {
32165       addr = plus_constant (Pmode, dst, i - 1);
32166       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32167       /* We are shifting one byte back, set the alignment accordingly.  */
32168       if ((align & 1) == 0)
32169         set_mem_align (mem, BITS_PER_UNIT);
32170
32171       /* Most likely this is an unaligned access, and we can't tell at
32172          compilation time.  */
32173       emit_insn (gen_unaligned_storesi (mem, reg));
32174       return true;
32175     }
32176
32177   /* Handle half word leftover.  */
32178   if (i + 2 <= length)
32179     {
32180       reg = gen_lowpart (HImode, val_reg);
32181       addr = plus_constant (Pmode, dst, i);
32182       mem = adjust_automodify_address (dstbase, HImode, addr, i);
32183       if ((align & 1) == 0)
32184         emit_move_insn (mem, reg);
32185       else
32186         emit_insn (gen_unaligned_storehi (mem, reg));
32187
32188       i += 2;
32189     }
32190
32191   /* Handle single byte leftover.  */
32192   if (i + 1 == length)
32193     {
32194       reg = gen_lowpart (QImode, val_reg);
32195       addr = plus_constant (Pmode, dst, i);
32196       mem = adjust_automodify_address (dstbase, QImode, addr, i);
32197       emit_move_insn (mem, reg);
32198     }
32199
32200   return true;
32201 }
32202
32203 /* Set a block of memory using vectorization instructions for both
32204    aligned and unaligned cases.  We fill the first LENGTH bytes of
32205    the memory area starting from DSTBASE with byte constant VALUE.
32206    ALIGN is the alignment requirement of memory.  */
32207 static bool
32208 arm_block_set_vect (rtx dstbase,
32209                     unsigned HOST_WIDE_INT length,
32210                     unsigned HOST_WIDE_INT value,
32211                     unsigned HOST_WIDE_INT align)
32212 {
32213   /* Check whether we need to use unaligned store instruction.  */
32214   if (((align & 3) != 0 || (length & 3) != 0)
32215       /* Check whether unaligned store instruction is available.  */
32216       && (!unaligned_access || BYTES_BIG_ENDIAN))
32217     return false;
32218
32219   if ((align & 3) == 0)
32220     return arm_block_set_aligned_vect (dstbase, length, value, align);
32221   else
32222     return arm_block_set_unaligned_vect (dstbase, length, value, align);
32223 }
32224
32225 /* Expand string store operation.  Firstly we try to do that by using
32226    vectorization instructions, then try with ARM unaligned access and
32227    double-word store if profitable.  OPERANDS[0] is the destination,
32228    OPERANDS[1] is the number of bytes, operands[2] is the value to
32229    initialize the memory, OPERANDS[3] is the known alignment of the
32230    destination.  */
32231 bool
32232 arm_gen_setmem (rtx *operands)
32233 {
32234   rtx dstbase = operands[0];
32235   unsigned HOST_WIDE_INT length;
32236   unsigned HOST_WIDE_INT value;
32237   unsigned HOST_WIDE_INT align;
32238
32239   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32240     return false;
32241
32242   length = UINTVAL (operands[1]);
32243   if (length > 64)
32244     return false;
32245
32246   value = (UINTVAL (operands[2]) & 0xFF);
32247   align = UINTVAL (operands[3]);
32248   if (TARGET_NEON && length >= 8
32249       && current_tune->string_ops_prefer_neon
32250       && arm_block_set_vect (dstbase, length, value, align))
32251     return true;
32252
32253   if (!unaligned_access && (align & 3) != 0)
32254     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32255
32256   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32257 }
32258
32259
32260 static bool
32261 arm_macro_fusion_p (void)
32262 {
32263   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
32264 }
32265
32266 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
32267    for MOVW / MOVT macro fusion.  */
32268
32269 static bool
32270 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
32271 {
32272   /* We are trying to fuse
32273      movw imm / movt imm
32274     instructions as a group that gets scheduled together.  */
32275
32276   rtx set_dest = SET_DEST (curr_set);
32277
32278   if (GET_MODE (set_dest) != SImode)
32279     return false;
32280
32281   /* We are trying to match:
32282      prev (movw)  == (set (reg r0) (const_int imm16))
32283      curr (movt) == (set (zero_extract (reg r0)
32284                                         (const_int 16)
32285                                         (const_int 16))
32286                           (const_int imm16_1))
32287      or
32288      prev (movw) == (set (reg r1)
32289                           (high (symbol_ref ("SYM"))))
32290     curr (movt) == (set (reg r0)
32291                         (lo_sum (reg r1)
32292                                 (symbol_ref ("SYM"))))  */
32293
32294     if (GET_CODE (set_dest) == ZERO_EXTRACT)
32295       {
32296         if (CONST_INT_P (SET_SRC (curr_set))
32297             && CONST_INT_P (SET_SRC (prev_set))
32298             && REG_P (XEXP (set_dest, 0))
32299             && REG_P (SET_DEST (prev_set))
32300             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
32301           return true;
32302
32303       }
32304     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
32305              && REG_P (SET_DEST (curr_set))
32306              && REG_P (SET_DEST (prev_set))
32307              && GET_CODE (SET_SRC (prev_set)) == HIGH
32308              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
32309       return true;
32310
32311   return false;
32312 }
32313
32314 static bool
32315 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
32316 {
32317   rtx prev_set = single_set (prev);
32318   rtx curr_set = single_set (curr);
32319
32320   if (!prev_set
32321       || !curr_set)
32322     return false;
32323
32324   if (any_condjump_p (curr))
32325     return false;
32326
32327   if (!arm_macro_fusion_p ())
32328     return false;
32329
32330   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
32331       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
32332     return true;
32333
32334   return false;
32335 }
32336
32337 /* Return true iff the instruction fusion described by OP is enabled.  */
32338 bool
32339 arm_fusion_enabled_p (tune_params::fuse_ops op)
32340 {
32341   return current_tune->fusible_ops & op;
32342 }
32343
32344 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
32345    scheduled for speculative execution.  Reject the long-running division
32346    and square-root instructions.  */
32347
32348 static bool
32349 arm_sched_can_speculate_insn (rtx_insn *insn)
32350 {
32351   switch (get_attr_type (insn))
32352     {
32353       case TYPE_SDIV:
32354       case TYPE_UDIV:
32355       case TYPE_FDIVS:
32356       case TYPE_FDIVD:
32357       case TYPE_FSQRTS:
32358       case TYPE_FSQRTD:
32359       case TYPE_NEON_FP_SQRT_S:
32360       case TYPE_NEON_FP_SQRT_D:
32361       case TYPE_NEON_FP_SQRT_S_Q:
32362       case TYPE_NEON_FP_SQRT_D_Q:
32363       case TYPE_NEON_FP_DIV_S:
32364       case TYPE_NEON_FP_DIV_D:
32365       case TYPE_NEON_FP_DIV_S_Q:
32366       case TYPE_NEON_FP_DIV_D_Q:
32367         return false;
32368       default:
32369         return true;
32370     }
32371 }
32372
32373 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
32374
32375 static unsigned HOST_WIDE_INT
32376 arm_asan_shadow_offset (void)
32377 {
32378   return HOST_WIDE_INT_1U << 29;
32379 }
32380
32381
32382 /* This is a temporary fix for PR60655.  Ideally we need
32383    to handle most of these cases in the generic part but
32384    currently we reject minus (..) (sym_ref).  We try to
32385    ameliorate the case with minus (sym_ref1) (sym_ref2)
32386    where they are in the same section.  */
32387
32388 static bool
32389 arm_const_not_ok_for_debug_p (rtx p)
32390 {
32391   tree decl_op0 = NULL;
32392   tree decl_op1 = NULL;
32393
32394   if (GET_CODE (p) == UNSPEC)
32395     return true;
32396   if (GET_CODE (p) == MINUS)
32397     {
32398       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
32399         {
32400           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
32401           if (decl_op1
32402               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
32403               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
32404             {
32405               if ((VAR_P (decl_op1)
32406                    || TREE_CODE (decl_op1) == CONST_DECL)
32407                   && (VAR_P (decl_op0)
32408                       || TREE_CODE (decl_op0) == CONST_DECL))
32409                 return (get_variable_section (decl_op1, false)
32410                         != get_variable_section (decl_op0, false));
32411
32412               if (TREE_CODE (decl_op1) == LABEL_DECL
32413                   && TREE_CODE (decl_op0) == LABEL_DECL)
32414                 return (DECL_CONTEXT (decl_op1)
32415                         != DECL_CONTEXT (decl_op0));
32416             }
32417
32418           return true;
32419         }
32420     }
32421
32422   return false;
32423 }
32424
32425 /* return TRUE if x is a reference to a value in a constant pool */
32426 extern bool
32427 arm_is_constant_pool_ref (rtx x)
32428 {
32429   return (MEM_P (x)
32430           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
32431           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
32432 }
32433
32434 /* Remember the last target of arm_set_current_function.  */
32435 static GTY(()) tree arm_previous_fndecl;
32436
32437 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
32438
32439 void
32440 save_restore_target_globals (tree new_tree)
32441 {
32442   /* If we have a previous state, use it.  */
32443   if (TREE_TARGET_GLOBALS (new_tree))
32444     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32445   else if (new_tree == target_option_default_node)
32446     restore_target_globals (&default_target_globals);
32447   else
32448     {
32449       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
32450       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
32451     }
32452
32453   arm_option_params_internal ();
32454 }
32455
32456 /* Invalidate arm_previous_fndecl.  */
32457
32458 void
32459 arm_reset_previous_fndecl (void)
32460 {
32461   arm_previous_fndecl = NULL_TREE;
32462 }
32463
32464 /* Establish appropriate back-end context for processing the function
32465    FNDECL.  The argument might be NULL to indicate processing at top
32466    level, outside of any function scope.  */
32467
32468 static void
32469 arm_set_current_function (tree fndecl)
32470 {
32471   if (!fndecl || fndecl == arm_previous_fndecl)
32472     return;
32473
32474   tree old_tree = (arm_previous_fndecl
32475                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
32476                    : NULL_TREE);
32477
32478   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
32479
32480   /* If current function has no attributes but previous one did,
32481      use the default node.  */
32482   if (! new_tree && old_tree)
32483     new_tree = target_option_default_node;
32484
32485   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
32486      the default have been handled by save_restore_target_globals from
32487      arm_pragma_target_parse.  */
32488   if (old_tree == new_tree)
32489     return;
32490
32491   arm_previous_fndecl = fndecl;
32492
32493   /* First set the target options.  */
32494   cl_target_option_restore (&global_options, &global_options_set,
32495                             TREE_TARGET_OPTION (new_tree));
32496
32497   save_restore_target_globals (new_tree);
32498
32499   arm_override_options_after_change_1 (&global_options, &global_options_set);
32500 }
32501
32502 /* Implement TARGET_OPTION_PRINT.  */
32503
32504 static void
32505 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
32506 {
32507   int flags = ptr->x_target_flags;
32508   const char *fpu_name;
32509
32510   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
32511               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
32512
32513   fprintf (file, "%*sselected isa %s\n", indent, "",
32514            TARGET_THUMB2_P (flags) ? "thumb2" :
32515            TARGET_THUMB_P (flags) ? "thumb1" :
32516            "arm");
32517
32518   if (ptr->x_arm_arch_string)
32519     fprintf (file, "%*sselected architecture %s\n", indent, "",
32520              ptr->x_arm_arch_string);
32521
32522   if (ptr->x_arm_cpu_string)
32523     fprintf (file, "%*sselected CPU %s\n", indent, "",
32524              ptr->x_arm_cpu_string);
32525
32526   if (ptr->x_arm_tune_string)
32527     fprintf (file, "%*sselected tune %s\n", indent, "",
32528              ptr->x_arm_tune_string);
32529
32530   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
32531 }
32532
32533 /* Hook to determine if one function can safely inline another.  */
32534
32535 static bool
32536 arm_can_inline_p (tree caller, tree callee)
32537 {
32538   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
32539   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
32540   bool can_inline = true;
32541
32542   struct cl_target_option *caller_opts
32543         = TREE_TARGET_OPTION (caller_tree ? caller_tree
32544                                            : target_option_default_node);
32545
32546   struct cl_target_option *callee_opts
32547         = TREE_TARGET_OPTION (callee_tree ? callee_tree
32548                                            : target_option_default_node);
32549
32550   if (callee_opts == caller_opts)
32551     return true;
32552
32553   /* Callee's ISA features should be a subset of the caller's.  */
32554   struct arm_build_target caller_target;
32555   struct arm_build_target callee_target;
32556   caller_target.isa = sbitmap_alloc (isa_num_bits);
32557   callee_target.isa = sbitmap_alloc (isa_num_bits);
32558
32559   arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
32560                               false);
32561   arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
32562                               false);
32563   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
32564     can_inline = false;
32565
32566   sbitmap_free (caller_target.isa);
32567   sbitmap_free (callee_target.isa);
32568
32569   /* OK to inline between different modes.
32570      Function with mode specific instructions, e.g using asm,
32571      must be explicitly protected with noinline.  */
32572   return can_inline;
32573 }
32574
32575 /* Hook to fix function's alignment affected by target attribute.  */
32576
32577 static void
32578 arm_relayout_function (tree fndecl)
32579 {
32580   if (DECL_USER_ALIGN (fndecl))
32581     return;
32582
32583   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
32584
32585   if (!callee_tree)
32586     callee_tree = target_option_default_node;
32587
32588   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
32589   SET_DECL_ALIGN
32590     (fndecl,
32591      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
32592 }
32593
32594 /* Inner function to process the attribute((target(...))), take an argument and
32595    set the current options from the argument.  If we have a list, recursively
32596    go over the list.  */
32597
32598 static bool
32599 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
32600 {
32601   if (TREE_CODE (args) == TREE_LIST)
32602     {
32603       bool ret = true;
32604
32605       for (; args; args = TREE_CHAIN (args))
32606         if (TREE_VALUE (args)
32607             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
32608           ret = false;
32609       return ret;
32610     }
32611
32612   else if (TREE_CODE (args) != STRING_CST)
32613     {
32614       error ("attribute %<target%> argument not a string");
32615       return false;
32616     }
32617
32618   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
32619   char *q;
32620
32621   while ((q = strtok (argstr, ",")) != NULL)
32622     {
32623       argstr = NULL;
32624       if (!strcmp (q, "thumb"))
32625         {
32626           opts->x_target_flags |= MASK_THUMB;
32627           if (TARGET_FDPIC && !arm_arch_thumb2)
32628             sorry ("FDPIC mode is not supported in Thumb-1 mode");
32629         }
32630
32631       else if (!strcmp (q, "arm"))
32632         opts->x_target_flags &= ~MASK_THUMB;
32633
32634       else if (!strcmp (q, "general-regs-only"))
32635         opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
32636
32637       else if (!strncmp (q, "fpu=", 4))
32638         {
32639           int fpu_index;
32640           if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
32641                                        &fpu_index, CL_TARGET))
32642             {
32643               error ("invalid fpu for target attribute or pragma %qs", q);
32644               return false;
32645             }
32646           if (fpu_index == TARGET_FPU_auto)
32647             {
32648               /* This doesn't really make sense until we support
32649                  general dynamic selection of the architecture and all
32650                  sub-features.  */
32651               sorry ("auto fpu selection not currently permitted here");
32652               return false;
32653             }
32654           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
32655         }
32656       else if (!strncmp (q, "arch=", 5))
32657         {
32658           char *arch = q + 5;
32659           const arch_option *arm_selected_arch
32660              = arm_parse_arch_option_name (all_architectures, "arch", arch);
32661
32662           if (!arm_selected_arch)
32663             {
32664               error ("invalid architecture for target attribute or pragma %qs",
32665                      q);
32666               return false;
32667             }
32668
32669           opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
32670         }
32671       else if (q[0] == '+')
32672         {
32673           opts->x_arm_arch_string
32674             = xasprintf ("%s%s", opts->x_arm_arch_string, q);
32675         }
32676       else
32677         {
32678           error ("unknown target attribute or pragma %qs", q);
32679           return false;
32680         }
32681     }
32682
32683   return true;
32684 }
32685
32686 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
32687
32688 tree
32689 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
32690                                  struct gcc_options *opts_set)
32691 {
32692   struct cl_target_option cl_opts;
32693
32694   if (!arm_valid_target_attribute_rec (args, opts))
32695     return NULL_TREE;
32696
32697   cl_target_option_save (&cl_opts, opts, opts_set);
32698   arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
32699   arm_option_check_internal (opts);
32700   /* Do any overrides, such as global options arch=xxx.
32701      We do this since arm_active_target was overridden.  */
32702   arm_option_reconfigure_globals ();
32703   arm_options_perform_arch_sanity_checks ();
32704   arm_option_override_internal (opts, opts_set);
32705
32706   return build_target_option_node (opts, opts_set);
32707 }
32708
32709 static void
32710 add_attribute (const char * mode, tree *attributes)
32711 {
32712   size_t len = strlen (mode);
32713   tree value = build_string (len, mode);
32714
32715   TREE_TYPE (value) = build_array_type (char_type_node,
32716                                         build_index_type (size_int (len)));
32717
32718   *attributes = tree_cons (get_identifier ("target"),
32719                            build_tree_list (NULL_TREE, value),
32720                            *attributes);
32721 }
32722
32723 /* For testing. Insert thumb or arm modes alternatively on functions.  */
32724
32725 static void
32726 arm_insert_attributes (tree fndecl, tree * attributes)
32727 {
32728   const char *mode;
32729
32730   if (! TARGET_FLIP_THUMB)
32731     return;
32732
32733   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
32734       || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
32735    return;
32736
32737   /* Nested definitions must inherit mode.  */
32738   if (current_function_decl)
32739    {
32740      mode = TARGET_THUMB ? "thumb" : "arm";
32741      add_attribute (mode, attributes);
32742      return;
32743    }
32744
32745   /* If there is already a setting don't change it.  */
32746   if (lookup_attribute ("target", *attributes) != NULL)
32747     return;
32748
32749   mode = thumb_flipper ? "thumb" : "arm";
32750   add_attribute (mode, attributes);
32751
32752   thumb_flipper = !thumb_flipper;
32753 }
32754
32755 /* Hook to validate attribute((target("string"))).  */
32756
32757 static bool
32758 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
32759                               tree args, int ARG_UNUSED (flags))
32760 {
32761   bool ret = true;
32762   struct gcc_options func_options, func_options_set;
32763   tree cur_tree, new_optimize;
32764   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
32765
32766   /* Get the optimization options of the current function.  */
32767   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32768
32769   /* If the function changed the optimization levels as well as setting target
32770      options, start with the optimizations specified.  */
32771   if (!func_optimize)
32772     func_optimize = optimization_default_node;
32773
32774   /* Init func_options.  */
32775   memset (&func_options, 0, sizeof (func_options));
32776   init_options_struct (&func_options, NULL);
32777   lang_hooks.init_options_struct (&func_options);
32778   memset (&func_options_set, 0, sizeof (func_options_set));
32779
32780   /* Initialize func_options to the defaults.  */
32781   cl_optimization_restore (&func_options, &func_options_set,
32782                            TREE_OPTIMIZATION (func_optimize));
32783
32784   cl_target_option_restore (&func_options, &func_options_set,
32785                             TREE_TARGET_OPTION (target_option_default_node));
32786
32787   /* Set func_options flags with new target mode.  */
32788   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
32789                                               &func_options_set);
32790
32791   if (cur_tree == NULL_TREE)
32792     ret = false;
32793
32794   new_optimize = build_optimization_node (&func_options, &func_options_set);
32795
32796   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
32797
32798   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
32799
32800   return ret;
32801 }
32802
32803 /* Match an ISA feature bitmap to a named FPU.  We always use the
32804    first entry that exactly matches the feature set, so that we
32805    effectively canonicalize the FPU name for the assembler.  */
32806 static const char*
32807 arm_identify_fpu_from_isa (sbitmap isa)
32808 {
32809   auto_sbitmap fpubits (isa_num_bits);
32810   auto_sbitmap cand_fpubits (isa_num_bits);
32811
32812   bitmap_and (fpubits, isa, isa_all_fpubits_internal);
32813
32814   /* If there are no ISA feature bits relating to the FPU, we must be
32815      doing soft-float.  */
32816   if (bitmap_empty_p (fpubits))
32817     return "softvfp";
32818
32819   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
32820     {
32821       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
32822       if (bitmap_equal_p (fpubits, cand_fpubits))
32823         return all_fpus[i].name;
32824     }
32825   /* We must find an entry, or things have gone wrong.  */
32826   gcc_unreachable ();
32827 }
32828
32829 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
32830    by the function fndecl.  */
32831 void
32832 arm_declare_function_name (FILE *stream, const char *name, tree decl)
32833 {
32834   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
32835
32836   struct cl_target_option *targ_options;
32837   if (target_parts)
32838     targ_options = TREE_TARGET_OPTION (target_parts);
32839   else
32840     targ_options = TREE_TARGET_OPTION (target_option_current_node);
32841   gcc_assert (targ_options);
32842
32843   /* Only update the assembler .arch string if it is distinct from the last
32844      such string we printed. arch_to_print is set conditionally in case
32845      targ_options->x_arm_arch_string is NULL which can be the case
32846      when cc1 is invoked directly without passing -march option.  */
32847   std::string arch_to_print;
32848   if (targ_options->x_arm_arch_string)
32849     arch_to_print = targ_options->x_arm_arch_string;
32850
32851   if (arch_to_print != arm_last_printed_arch_string)
32852     {
32853       std::string arch_name
32854         = arch_to_print.substr (0, arch_to_print.find ("+"));
32855       asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
32856       const arch_option *arch
32857         = arm_parse_arch_option_name (all_architectures, "-march",
32858                                       targ_options->x_arm_arch_string);
32859       auto_sbitmap opt_bits (isa_num_bits);
32860
32861       gcc_assert (arch);
32862       if (arch->common.extensions)
32863         {
32864           for (const struct cpu_arch_extension *opt = arch->common.extensions;
32865                opt->name != NULL;
32866                opt++)
32867             {
32868               if (!opt->remove)
32869                 {
32870                   arm_initialize_isa (opt_bits, opt->isa_bits);
32871                   /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft"
32872                      and "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and
32873                      MVE with floating point instructions is disabled.  So the
32874                      following check restricts the printing of ".arch_extension
32875                      mve" and ".arch_extension fp" (for mve.fp) in the assembly
32876                      file.    MVE needs this special behaviour because the
32877                      feature bit "mve" and "mve_float" are not part of
32878                      "fpu bits", so they are not cleared when -mfloat-abi=soft
32879                      (i.e nofp) but the marco TARGET_HAVE_MVE and
32880                      TARGET_HAVE_MVE_FLOAT are disabled.  */
32881                   if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
32882                       || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
32883                           && !TARGET_HAVE_MVE_FLOAT))
32884                     continue;
32885                   if (bitmap_subset_p (opt_bits, arm_active_target.isa)
32886                       && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
32887                     asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
32888                                  opt->name);
32889                 }
32890              }
32891         }
32892
32893       arm_last_printed_arch_string = arch_to_print;
32894     }
32895
32896   fprintf (stream, "\t.syntax unified\n");
32897
32898   if (TARGET_THUMB)
32899     {
32900       if (is_called_in_ARM_mode (decl)
32901           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
32902               && cfun->is_thunk))
32903         fprintf (stream, "\t.code 32\n");
32904       else if (TARGET_THUMB1)
32905         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
32906       else
32907         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
32908     }
32909   else
32910     fprintf (stream, "\t.arm\n");
32911
32912   std::string fpu_to_print
32913     = TARGET_SOFT_FLOAT
32914         ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
32915
32916   if (!(!strcmp (fpu_to_print.c_str (), "softvfp") && TARGET_VFP_BASE)
32917       && (fpu_to_print != arm_last_printed_arch_string))
32918     {
32919       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
32920       arm_last_printed_fpu_string = fpu_to_print;
32921     }
32922
32923   if (TARGET_POKE_FUNCTION_NAME)
32924     arm_poke_function_name (stream, (const char *) name);
32925 }
32926
32927 /* If MEM is in the form of [base+offset], extract the two parts
32928    of address and set to BASE and OFFSET, otherwise return false
32929    after clearing BASE and OFFSET.  */
32930
32931 static bool
32932 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
32933 {
32934   rtx addr;
32935
32936   gcc_assert (MEM_P (mem));
32937
32938   addr = XEXP (mem, 0);
32939
32940   /* Strip off const from addresses like (const (addr)).  */
32941   if (GET_CODE (addr) == CONST)
32942     addr = XEXP (addr, 0);
32943
32944   if (GET_CODE (addr) == REG)
32945     {
32946       *base = addr;
32947       *offset = const0_rtx;
32948       return true;
32949     }
32950
32951   if (GET_CODE (addr) == PLUS
32952       && GET_CODE (XEXP (addr, 0)) == REG
32953       && CONST_INT_P (XEXP (addr, 1)))
32954     {
32955       *base = XEXP (addr, 0);
32956       *offset = XEXP (addr, 1);
32957       return true;
32958     }
32959
32960   *base = NULL_RTX;
32961   *offset = NULL_RTX;
32962
32963   return false;
32964 }
32965
32966 /* If INSN is a load or store of address in the form of [base+offset],
32967    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
32968    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
32969    otherwise return FALSE.  */
32970
32971 static bool
32972 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
32973 {
32974   rtx x, dest, src;
32975
32976   gcc_assert (INSN_P (insn));
32977   x = PATTERN (insn);
32978   if (GET_CODE (x) != SET)
32979     return false;
32980
32981   src = SET_SRC (x);
32982   dest = SET_DEST (x);
32983   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
32984     {
32985       *is_load = false;
32986       extract_base_offset_in_addr (dest, base, offset);
32987     }
32988   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
32989     {
32990       *is_load = true;
32991       extract_base_offset_in_addr (src, base, offset);
32992     }
32993   else
32994     return false;
32995
32996   return (*base != NULL_RTX && *offset != NULL_RTX);
32997 }
32998
32999 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33000
33001    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33002    and PRI are only calculated for these instructions.  For other instruction,
33003    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
33004    instruction fusion can be supported by returning different priorities.
33005
33006    It's important that irrelevant instructions get the largest FUSION_PRI.  */
33007
33008 static void
33009 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33010                            int *fusion_pri, int *pri)
33011 {
33012   int tmp, off_val;
33013   bool is_load;
33014   rtx base, offset;
33015
33016   gcc_assert (INSN_P (insn));
33017
33018   tmp = max_pri - 1;
33019   if (!fusion_load_store (insn, &base, &offset, &is_load))
33020     {
33021       *pri = tmp;
33022       *fusion_pri = tmp;
33023       return;
33024     }
33025
33026   /* Load goes first.  */
33027   if (is_load)
33028     *fusion_pri = tmp - 1;
33029   else
33030     *fusion_pri = tmp - 2;
33031
33032   tmp /= 2;
33033
33034   /* INSN with smaller base register goes first.  */
33035   tmp -= ((REGNO (base) & 0xff) << 20);
33036
33037   /* INSN with smaller offset goes first.  */
33038   off_val = (int)(INTVAL (offset));
33039   if (off_val >= 0)
33040     tmp -= (off_val & 0xfffff);
33041   else
33042     tmp += ((- off_val) & 0xfffff);
33043
33044   *pri = tmp;
33045   return;
33046 }
33047
33048
33049 /* Construct and return a PARALLEL RTX vector with elements numbering the
33050    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
33051    the vector - from the perspective of the architecture.  This does not
33052    line up with GCC's perspective on lane numbers, so we end up with
33053    different masks depending on our target endian-ness.  The diagram
33054    below may help.  We must draw the distinction when building masks
33055    which select one half of the vector.  An instruction selecting
33056    architectural low-lanes for a big-endian target, must be described using
33057    a mask selecting GCC high-lanes.
33058
33059                  Big-Endian             Little-Endian
33060
33061 GCC             0   1   2   3           3   2   1   0
33062               | x | x | x | x |       | x | x | x | x |
33063 Architecture    3   2   1   0           3   2   1   0
33064
33065 Low Mask:         { 2, 3 }                { 0, 1 }
33066 High Mask:        { 0, 1 }                { 2, 3 }
33067 */
33068
33069 rtx
33070 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
33071 {
33072   int nunits = GET_MODE_NUNITS (mode);
33073   rtvec v = rtvec_alloc (nunits / 2);
33074   int high_base = nunits / 2;
33075   int low_base = 0;
33076   int base;
33077   rtx t1;
33078   int i;
33079
33080   if (BYTES_BIG_ENDIAN)
33081     base = high ? low_base : high_base;
33082   else
33083     base = high ? high_base : low_base;
33084
33085   for (i = 0; i < nunits / 2; i++)
33086     RTVEC_ELT (v, i) = GEN_INT (base + i);
33087
33088   t1 = gen_rtx_PARALLEL (mode, v);
33089   return t1;
33090 }
33091
33092 /* Check OP for validity as a PARALLEL RTX vector with elements
33093    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
33094    from the perspective of the architecture.  See the diagram above
33095    arm_simd_vect_par_cnst_half_p for more details.  */
33096
33097 bool
33098 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
33099                                        bool high)
33100 {
33101   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
33102   HOST_WIDE_INT count_op = XVECLEN (op, 0);
33103   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
33104   int i = 0;
33105
33106   if (!VECTOR_MODE_P (mode))
33107     return false;
33108
33109   if (count_op != count_ideal)
33110     return false;
33111
33112   for (i = 0; i < count_ideal; i++)
33113     {
33114       rtx elt_op = XVECEXP (op, 0, i);
33115       rtx elt_ideal = XVECEXP (ideal, 0, i);
33116
33117       if (!CONST_INT_P (elt_op)
33118           || INTVAL (elt_ideal) != INTVAL (elt_op))
33119         return false;
33120     }
33121   return true;
33122 }
33123
33124 /* Can output mi_thunk for all cases except for non-zero vcall_offset
33125    in Thumb1.  */
33126 static bool
33127 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
33128                          const_tree)
33129 {
33130   /* For now, we punt and not handle this for TARGET_THUMB1.  */
33131   if (vcall_offset && TARGET_THUMB1)
33132     return false;
33133
33134   /* Otherwise ok.  */
33135   return true;
33136 }
33137
33138 /* Generate RTL for a conditional branch with rtx comparison CODE in
33139    mode CC_MODE. The destination of the unlikely conditional branch
33140    is LABEL_REF.  */
33141
33142 void
33143 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
33144                           rtx label_ref)
33145 {
33146   rtx x;
33147   x = gen_rtx_fmt_ee (code, VOIDmode,
33148                       gen_rtx_REG (cc_mode, CC_REGNUM),
33149                       const0_rtx);
33150
33151   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
33152                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
33153                             pc_rtx);
33154   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
33155 }
33156
33157 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
33158
33159    For pure-code sections there is no letter code for this attribute, so
33160    output all the section flags numerically when this is needed.  */
33161
33162 static bool
33163 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
33164 {
33165
33166   if (flags & SECTION_ARM_PURECODE)
33167     {
33168       *num = 0x20000000;
33169
33170       if (!(flags & SECTION_DEBUG))
33171         *num |= 0x2;
33172       if (flags & SECTION_EXCLUDE)
33173         *num |= 0x80000000;
33174       if (flags & SECTION_WRITE)
33175         *num |= 0x1;
33176       if (flags & SECTION_CODE)
33177         *num |= 0x4;
33178       if (flags & SECTION_MERGE)
33179         *num |= 0x10;
33180       if (flags & SECTION_STRINGS)
33181         *num |= 0x20;
33182       if (flags & SECTION_TLS)
33183         *num |= 0x400;
33184       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
33185         *num |= 0x200;
33186
33187         return true;
33188     }
33189
33190   return false;
33191 }
33192
33193 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
33194
33195    If pure-code is passed as an option, make sure all functions are in
33196    sections that have the SHF_ARM_PURECODE attribute.  */
33197
33198 static section *
33199 arm_function_section (tree decl, enum node_frequency freq,
33200                       bool startup, bool exit)
33201 {
33202   const char * section_name;
33203   section * sec;
33204
33205   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
33206     return default_function_section (decl, freq, startup, exit);
33207
33208   if (!target_pure_code)
33209     return default_function_section (decl, freq, startup, exit);
33210
33211
33212   section_name = DECL_SECTION_NAME (decl);
33213
33214   /* If a function is not in a named section then it falls under the 'default'
33215      text section, also known as '.text'.  We can preserve previous behavior as
33216      the default text section already has the SHF_ARM_PURECODE section
33217      attribute.  */
33218   if (!section_name)
33219     {
33220       section *default_sec = default_function_section (decl, freq, startup,
33221                                                        exit);
33222
33223       /* If default_sec is not null, then it must be a special section like for
33224          example .text.startup.  We set the pure-code attribute and return the
33225          same section to preserve existing behavior.  */
33226       if (default_sec)
33227           default_sec->common.flags |= SECTION_ARM_PURECODE;
33228       return default_sec;
33229     }
33230
33231   /* Otherwise look whether a section has already been created with
33232      'section_name'.  */
33233   sec = get_named_section (decl, section_name, 0);
33234   if (!sec)
33235     /* If that is not the case passing NULL as the section's name to
33236        'get_named_section' will create a section with the declaration's
33237        section name.  */
33238     sec = get_named_section (decl, NULL, 0);
33239
33240   /* Set the SHF_ARM_PURECODE attribute.  */
33241   sec->common.flags |= SECTION_ARM_PURECODE;
33242
33243   return sec;
33244 }
33245
33246 /* Implements the TARGET_SECTION_FLAGS hook.
33247
33248    If DECL is a function declaration and pure-code is passed as an option
33249    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
33250    section's name and RELOC indicates whether the declarations initializer may
33251    contain runtime relocations.  */
33252
33253 static unsigned int
33254 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
33255 {
33256   unsigned int flags = default_section_type_flags (decl, name, reloc);
33257
33258   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
33259     flags |= SECTION_ARM_PURECODE;
33260
33261   return flags;
33262 }
33263
33264 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
33265
33266 static void
33267 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
33268                            rtx op0, rtx op1,
33269                            rtx *quot_p, rtx *rem_p)
33270 {
33271   if (mode == SImode)
33272     gcc_assert (!TARGET_IDIV);
33273
33274   scalar_int_mode libval_mode
33275     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
33276
33277   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
33278                                         libval_mode, op0, mode, op1, mode);
33279
33280   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
33281   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
33282                                        GET_MODE_SIZE (mode));
33283
33284   gcc_assert (quotient);
33285   gcc_assert (remainder);
33286
33287   *quot_p = quotient;
33288   *rem_p = remainder;
33289 }
33290
33291 /*  This function checks for the availability of the coprocessor builtin passed
33292     in BUILTIN for the current target.  Returns true if it is available and
33293     false otherwise.  If a BUILTIN is passed for which this function has not
33294     been implemented it will cause an exception.  */
33295
33296 bool
33297 arm_coproc_builtin_available (enum unspecv builtin)
33298 {
33299   /* None of these builtins are available in Thumb mode if the target only
33300      supports Thumb-1.  */
33301   if (TARGET_THUMB1)
33302     return false;
33303
33304   switch (builtin)
33305     {
33306       case VUNSPEC_CDP:
33307       case VUNSPEC_LDC:
33308       case VUNSPEC_LDCL:
33309       case VUNSPEC_STC:
33310       case VUNSPEC_STCL:
33311       case VUNSPEC_MCR:
33312       case VUNSPEC_MRC:
33313         if (arm_arch4)
33314           return true;
33315         break;
33316       case VUNSPEC_CDP2:
33317       case VUNSPEC_LDC2:
33318       case VUNSPEC_LDC2L:
33319       case VUNSPEC_STC2:
33320       case VUNSPEC_STC2L:
33321       case VUNSPEC_MCR2:
33322       case VUNSPEC_MRC2:
33323         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
33324            ARMv8-{A,M}.  */
33325         if (arm_arch5t)
33326           return true;
33327         break;
33328       case VUNSPEC_MCRR:
33329       case VUNSPEC_MRRC:
33330         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
33331            ARMv8-{A,M}.  */
33332         if (arm_arch6 || arm_arch5te)
33333           return true;
33334         break;
33335       case VUNSPEC_MCRR2:
33336       case VUNSPEC_MRRC2:
33337         if (arm_arch6)
33338           return true;
33339         break;
33340       default:
33341         gcc_unreachable ();
33342     }
33343   return false;
33344 }
33345
33346 /* This function returns true if OP is a valid memory operand for the ldc and
33347    stc coprocessor instructions and false otherwise.  */
33348
33349 bool
33350 arm_coproc_ldc_stc_legitimate_address (rtx op)
33351 {
33352   HOST_WIDE_INT range;
33353   /* Has to be a memory operand.  */
33354   if (!MEM_P (op))
33355     return false;
33356
33357   op = XEXP (op, 0);
33358
33359   /* We accept registers.  */
33360   if (REG_P (op))
33361     return true;
33362
33363   switch GET_CODE (op)
33364     {
33365       case PLUS:
33366         {
33367           /* Or registers with an offset.  */
33368           if (!REG_P (XEXP (op, 0)))
33369             return false;
33370
33371           op = XEXP (op, 1);
33372
33373           /* The offset must be an immediate though.  */
33374           if (!CONST_INT_P (op))
33375             return false;
33376
33377           range = INTVAL (op);
33378
33379           /* Within the range of [-1020,1020].  */
33380           if (!IN_RANGE (range, -1020, 1020))
33381             return false;
33382
33383           /* And a multiple of 4.  */
33384           return (range % 4) == 0;
33385         }
33386       case PRE_INC:
33387       case POST_INC:
33388       case PRE_DEC:
33389       case POST_DEC:
33390         return REG_P (XEXP (op, 0));
33391       default:
33392         gcc_unreachable ();
33393     }
33394   return false;
33395 }
33396
33397 /* Return the diagnostic message string if conversion from FROMTYPE to
33398    TOTYPE is not allowed, NULL otherwise.  */
33399
33400 static const char *
33401 arm_invalid_conversion (const_tree fromtype, const_tree totype)
33402 {
33403   if (element_mode (fromtype) != element_mode (totype))
33404     {
33405       /* Do no allow conversions to/from BFmode scalar types.  */
33406       if (TYPE_MODE (fromtype) == BFmode)
33407         return N_("invalid conversion from type %<bfloat16_t%>");
33408       if (TYPE_MODE (totype) == BFmode)
33409         return N_("invalid conversion to type %<bfloat16_t%>");
33410     }
33411
33412   /* Conversion allowed.  */
33413   return NULL;
33414 }
33415
33416 /* Return the diagnostic message string if the unary operation OP is
33417    not permitted on TYPE, NULL otherwise.  */
33418
33419 static const char *
33420 arm_invalid_unary_op (int op, const_tree type)
33421 {
33422   /* Reject all single-operand operations on BFmode except for &.  */
33423   if (element_mode (type) == BFmode && op != ADDR_EXPR)
33424     return N_("operation not permitted on type %<bfloat16_t%>");
33425
33426   /* Operation allowed.  */
33427   return NULL;
33428 }
33429
33430 /* Return the diagnostic message string if the binary operation OP is
33431    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
33432
33433 static const char *
33434 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
33435                            const_tree type2)
33436 {
33437   /* Reject all 2-operand operations on BFmode.  */
33438   if (element_mode (type1) == BFmode
33439       || element_mode (type2) == BFmode)
33440     return N_("operation not permitted on type %<bfloat16_t%>");
33441
33442   /* Operation allowed.  */
33443   return NULL;
33444 }
33445
33446 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
33447
33448    In VFPv1, VFP registers could only be accessed in the mode they were
33449    set, so subregs would be invalid there.  However, we don't support
33450    VFPv1 at the moment, and the restriction was lifted in VFPv2.
33451
33452    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
33453    VFP registers in little-endian order.  We can't describe that accurately to
33454    GCC, so avoid taking subregs of such values.
33455
33456    The only exception is going from a 128-bit to a 64-bit type.  In that
33457    case the data layout happens to be consistent for big-endian, so we
33458    explicitly allow that case.  */
33459
33460 static bool
33461 arm_can_change_mode_class (machine_mode from, machine_mode to,
33462                            reg_class_t rclass)
33463 {
33464   if (TARGET_BIG_END
33465       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
33466       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
33467           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
33468       && reg_classes_intersect_p (VFP_REGS, rclass))
33469     return false;
33470   return true;
33471 }
33472
33473 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
33474    strcpy from constants will be faster.  */
33475
33476 static HOST_WIDE_INT
33477 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
33478 {
33479   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
33480   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
33481     return MAX (align, BITS_PER_WORD * factor);
33482   return align;
33483 }
33484
33485 /* Emit a speculation barrier on target architectures that do not have
33486    DSB/ISB directly.  Such systems probably don't need a barrier
33487    themselves, but if the code is ever run on a later architecture, it
33488    might become a problem.  */
33489 void
33490 arm_emit_speculation_barrier_function ()
33491 {
33492   emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
33493 }
33494
33495 /* Have we recorded an explicit access to the Q bit of APSR?.  */
33496 bool
33497 arm_q_bit_access (void)
33498 {
33499   if (cfun && cfun->decl)
33500     return lookup_attribute ("acle qbit",
33501                              DECL_ATTRIBUTES (cfun->decl));
33502   return true;
33503 }
33504
33505 /* Have we recorded an explicit access to the GE bits of PSTATE?.  */
33506 bool
33507 arm_ge_bits_access (void)
33508 {
33509   if (cfun && cfun->decl)
33510     return lookup_attribute ("acle gebits",
33511                              DECL_ATTRIBUTES (cfun->decl));
33512   return true;
33513 }
33514
33515 /* NULL if insn INSN is valid within a low-overhead loop.
33516    Otherwise return why doloop cannot be applied.  */
33517
33518 static const char *
33519 arm_invalid_within_doloop (const rtx_insn *insn)
33520 {
33521   if (!TARGET_HAVE_LOB)
33522     return default_invalid_within_doloop (insn);
33523
33524   if (CALL_P (insn))
33525     return "Function call in the loop.";
33526
33527   if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
33528     return "LR is used inside loop.";
33529
33530   return NULL;
33531 }
33532
33533 bool
33534 arm_target_insn_ok_for_lob (rtx insn)
33535 {
33536   basic_block bb = BLOCK_FOR_INSN (insn);
33537   /* Make sure the basic block of the target insn is a simple latch
33538      having as single predecessor and successor the body of the loop
33539      itself.  Only simple loops with a single basic block as body are
33540      supported for 'low over head loop' making sure that LE target is
33541      above LE itself in the generated code.  */
33542
33543   return single_succ_p (bb)
33544     && single_pred_p (bb)
33545     && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
33546     && contains_no_active_insn_p (bb);
33547 }
33548
33549 #if CHECKING_P
33550 namespace selftest {
33551
33552 /* Scan the static data tables generated by parsecpu.awk looking for
33553    potential issues with the data.  We primarily check for
33554    inconsistencies in the option extensions at present (extensions
33555    that duplicate others but aren't marked as aliases).  Furthermore,
33556    for correct canonicalization later options must never be a subset
33557    of an earlier option.  Any extension should also only specify other
33558    feature bits and never an architecture bit.  The architecture is inferred
33559    from the declaration of the extension.  */
33560 static void
33561 arm_test_cpu_arch_data (void)
33562 {
33563   const arch_option *arch;
33564   const cpu_option *cpu;
33565   auto_sbitmap target_isa (isa_num_bits);
33566   auto_sbitmap isa1 (isa_num_bits);
33567   auto_sbitmap isa2 (isa_num_bits);
33568
33569   for (arch = all_architectures; arch->common.name != NULL; ++arch)
33570     {
33571       const cpu_arch_extension *ext1, *ext2;
33572
33573       if (arch->common.extensions == NULL)
33574         continue;
33575
33576       arm_initialize_isa (target_isa, arch->common.isa_bits);
33577
33578       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
33579         {
33580           if (ext1->alias)
33581             continue;
33582
33583           arm_initialize_isa (isa1, ext1->isa_bits);
33584           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
33585             {
33586               if (ext2->alias || ext1->remove != ext2->remove)
33587                 continue;
33588
33589               arm_initialize_isa (isa2, ext2->isa_bits);
33590               /* If the option is a subset of the parent option, it doesn't
33591                  add anything and so isn't useful.  */
33592               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
33593
33594               /* If the extension specifies any architectural bits then
33595                  disallow it.  Extensions should only specify feature bits.  */
33596               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
33597             }
33598         }
33599     }
33600
33601   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
33602     {
33603       const cpu_arch_extension *ext1, *ext2;
33604
33605       if (cpu->common.extensions == NULL)
33606         continue;
33607
33608       arm_initialize_isa (target_isa, arch->common.isa_bits);
33609
33610       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
33611         {
33612           if (ext1->alias)
33613             continue;
33614
33615           arm_initialize_isa (isa1, ext1->isa_bits);
33616           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
33617             {
33618               if (ext2->alias || ext1->remove != ext2->remove)
33619                 continue;
33620
33621               arm_initialize_isa (isa2, ext2->isa_bits);
33622               /* If the option is a subset of the parent option, it doesn't
33623                  add anything and so isn't useful.  */
33624               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
33625
33626               /* If the extension specifies any architectural bits then
33627                  disallow it.  Extensions should only specify feature bits.  */
33628               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
33629             }
33630         }
33631     }
33632 }
33633
33634 /* Scan the static data tables generated by parsecpu.awk looking for
33635    potential issues with the data.  Here we check for consistency between the
33636    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
33637    a feature bit that is not defined by any FPU flag.  */
33638 static void
33639 arm_test_fpu_data (void)
33640 {
33641   auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
33642   auto_sbitmap fpubits (isa_num_bits);
33643   auto_sbitmap tmpset (isa_num_bits);
33644
33645   static const enum isa_feature fpu_bitlist_internal[]
33646     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
33647   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
33648
33649   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33650   {
33651     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
33652     bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
33653     bitmap_clear (isa_all_fpubits_internal);
33654     bitmap_copy (isa_all_fpubits_internal, tmpset);
33655   }
33656
33657   if (!bitmap_empty_p (isa_all_fpubits_internal))
33658     {
33659         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
33660                          " group that are not defined by any FPU.\n"
33661                          "       Check your arm-cpus.in.\n");
33662         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
33663     }
33664 }
33665
33666 static void
33667 arm_run_selftests (void)
33668 {
33669   arm_test_cpu_arch_data ();
33670   arm_test_fpu_data ();
33671 }
33672 } /* Namespace selftest.  */
33673
33674 #undef TARGET_RUN_TARGET_SELFTESTS
33675 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
33676 #endif /* CHECKING_P */
33677
33678 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
33679    Unlike the arm version, we do NOT implement asm flag outputs.  */
33680
33681 rtx_insn *
33682 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
33683                       vec<const char *> &constraints,
33684                       vec<rtx> &/*clobbers*/, HARD_REG_SET &/*clobbered_regs*/)
33685 {
33686   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
33687     if (strncmp (constraints[i], "=@cc", 4) == 0)
33688       {
33689         sorry ("asm flags not supported in thumb1 mode");
33690         break;
33691       }
33692   return NULL;
33693 }
33694
33695 /* Generate code to enable conditional branches in functions over 1 MiB.
33696    Parameters are:
33697      operands: is the operands list of the asm insn (see arm_cond_branch or
33698        arm_cond_branch_reversed).
33699      pos_label: is an index into the operands array where operands[pos_label] is
33700        the asm label of the final jump destination.
33701      dest: is a string which is used to generate the asm label of the intermediate
33702        destination
33703    branch_format: is a string denoting the intermediate branch format, e.g.
33704      "beq", "bne", etc.  */
33705
33706 const char *
33707 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
33708                     const char * branch_format)
33709 {
33710   rtx_code_label * tmp_label = gen_label_rtx ();
33711   char label_buf[256];
33712   char buffer[128];
33713   ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
33714                         CODE_LABEL_NUMBER (tmp_label));
33715   const char *label_ptr = arm_strip_name_encoding (label_buf);
33716   rtx dest_label = operands[pos_label];
33717   operands[pos_label] = tmp_label;
33718
33719   snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
33720   output_asm_insn (buffer, operands);
33721
33722   snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
33723   operands[pos_label] = dest_label;
33724   output_asm_insn (buffer, operands);
33725   return "";
33726 }
33727
33728 /* If given mode matches, load from memory to LO_REGS.
33729    (i.e [Rn], Rn <= LO_REGS).  */
33730 enum reg_class
33731 arm_mode_base_reg_class (machine_mode mode)
33732 {
33733   if (TARGET_HAVE_MVE
33734       && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
33735     return LO_REGS;
33736
33737   return MODE_BASE_REG_REG_CLASS (mode);
33738 }
33739
33740 struct gcc_target targetm = TARGET_INITIALIZER;
33741
33742 #include "gt-arm.h"