5d9c995e5c643528acb4d86583ce442dd232e58a
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2020 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "df.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "flags.h"
53 #include "reload.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "cfgrtl.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "intl.h"
61 #include "libfuncs.h"
62 #include "opts.h"
63 #include "dumpfile.h"
64 #include "target-globals.h"
65 #include "builtins.h"
66 #include "tm-constrs.h"
67 #include "rtl-iter.h"
68 #include "optabs-libfuncs.h"
69 #include "gimplify.h"
70 #include "gimple.h"
71 #include "selftest.h"
72
73 /* This file should be included last. */
74 #include "target-def.h"
75
76 /* Forward definitions of types. */
77 typedef struct minipool_node Mnode;
78 typedef struct minipool_fixup Mfix;
79
80 /* The last .arch and .fpu assembly strings that we printed. */
81 static std::string arm_last_printed_arch_string;
82 static std::string arm_last_printed_fpu_string;
83
84 void (*arm_lang_output_object_attributes_hook)(void);
85
86 struct four_ints
87 {
88 int i[4];
89 };
90
91 /* Forward function declarations. */
92 static bool arm_const_not_ok_for_debug_p (rtx);
93 static int arm_needs_doubleword_align (machine_mode, const_tree);
94 static int arm_compute_static_chain_stack_bytes (void);
95 static arm_stack_offsets *arm_get_frame_offsets (void);
96 static void arm_compute_frame_layout (void);
97 static void arm_add_gc_roots (void);
98 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
99 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
100 static unsigned bit_count (unsigned long);
101 static unsigned bitmap_popcount (const sbitmap);
102 static int arm_address_register_rtx_p (rtx, int);
103 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
104 static bool is_called_in_ARM_mode (tree);
105 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
106 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
107 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
108 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
109 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
110 inline static int thumb1_index_register_rtx_p (rtx, int);
111 static int thumb_far_jump_used_p (void);
112 static bool thumb_force_lr_save (void);
113 static unsigned arm_size_return_regs (void);
114 static bool arm_assemble_integer (rtx, unsigned int, int);
115 static void arm_print_operand (FILE *, rtx, int);
116 static void arm_print_operand_address (FILE *, machine_mode, rtx);
117 static bool arm_print_operand_punct_valid_p (unsigned char code);
118 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
119 static arm_cc get_arm_condition_code (rtx);
120 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
121 static const char *output_multi_immediate (rtx *, const char *, const char *,
122 int, HOST_WIDE_INT);
123 static const char *shift_op (rtx, HOST_WIDE_INT *);
124 static struct machine_function *arm_init_machine_status (void);
125 static void thumb_exit (FILE *, int);
126 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
127 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
128 static Mnode *add_minipool_forward_ref (Mfix *);
129 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
130 static Mnode *add_minipool_backward_ref (Mfix *);
131 static void assign_minipool_offsets (Mfix *);
132 static void arm_print_value (FILE *, rtx);
133 static void dump_minipool (rtx_insn *);
134 static int arm_barrier_cost (rtx_insn *);
135 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
136 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
137 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
138 machine_mode, rtx);
139 static void arm_reorg (void);
140 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
141 static unsigned long arm_compute_save_reg0_reg12_mask (void);
142 static unsigned long arm_compute_save_core_reg_mask (void);
143 static unsigned long arm_isr_value (tree);
144 static unsigned long arm_compute_func_type (void);
145 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
146 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
148 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
149 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
150 #endif
151 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
152 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
153 static void arm_output_function_epilogue (FILE *);
154 static void arm_output_function_prologue (FILE *);
155 static int arm_comp_type_attributes (const_tree, const_tree);
156 static void arm_set_default_type_attributes (tree);
157 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
158 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
159 static int optimal_immediate_sequence (enum rtx_code code,
160 unsigned HOST_WIDE_INT val,
161 struct four_ints *return_sequence);
162 static int optimal_immediate_sequence_1 (enum rtx_code code,
163 unsigned HOST_WIDE_INT val,
164 struct four_ints *return_sequence,
165 int i);
166 static int arm_get_strip_length (int);
167 static bool arm_function_ok_for_sibcall (tree, tree);
168 static machine_mode arm_promote_function_mode (const_tree,
169 machine_mode, int *,
170 const_tree, int);
171 static bool arm_return_in_memory (const_tree, const_tree);
172 static rtx arm_function_value (const_tree, const_tree, bool);
173 static rtx arm_libcall_value_1 (machine_mode);
174 static rtx arm_libcall_value (machine_mode, const_rtx);
175 static bool arm_function_value_regno_p (const unsigned int);
176 static void arm_internal_label (FILE *, const char *, unsigned long);
177 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
178 tree);
179 static bool arm_have_conditional_execution (void);
180 static bool arm_cannot_force_const_mem (machine_mode, rtx);
181 static bool arm_legitimate_constant_p (machine_mode, rtx);
182 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
183 static int arm_insn_cost (rtx_insn *, bool);
184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
187 static void emit_constant_insn (rtx cond, rtx pattern);
188 static rtx_insn *emit_set_insn (rtx, rtx);
189 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
190 static rtx emit_multi_reg_push (unsigned long, unsigned long);
191 static void arm_emit_multi_reg_pop (unsigned long);
192 static int vfp_emit_fstmd (int, int);
193 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
194 static int arm_arg_partial_bytes (cumulative_args_t,
195 const function_arg_info &);
196 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
197 static void arm_function_arg_advance (cumulative_args_t,
198 const function_arg_info &);
199 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
200 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
201 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
202 const_tree);
203 static rtx aapcs_libcall_value (machine_mode);
204 static int aapcs_select_return_coproc (const_tree, const_tree);
205
206 #ifdef OBJECT_FORMAT_ELF
207 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
208 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
209 #endif
210 #ifndef ARM_PE
211 static void arm_encode_section_info (tree, rtx, int);
212 #endif
213
214 static void arm_file_end (void);
215 static void arm_file_start (void);
216 static void arm_insert_attributes (tree, tree *);
217
218 static void arm_setup_incoming_varargs (cumulative_args_t,
219 const function_arg_info &, int *, int);
220 static bool arm_pass_by_reference (cumulative_args_t,
221 const function_arg_info &);
222 static bool arm_promote_prototypes (const_tree);
223 static bool arm_default_short_enums (void);
224 static bool arm_align_anon_bitfield (void);
225 static bool arm_return_in_msb (const_tree);
226 static bool arm_must_pass_in_stack (const function_arg_info &);
227 static bool arm_return_in_memory (const_tree, const_tree);
228 #if ARM_UNWIND_INFO
229 static void arm_unwind_emit (FILE *, rtx_insn *);
230 static bool arm_output_ttype (rtx);
231 static void arm_asm_emit_except_personality (rtx);
232 #endif
233 static void arm_asm_init_sections (void);
234 static rtx arm_dwarf_register_span (rtx);
235
236 static tree arm_cxx_guard_type (void);
237 static bool arm_cxx_guard_mask_bit (void);
238 static tree arm_get_cookie_size (tree);
239 static bool arm_cookie_has_size (void);
240 static bool arm_cxx_cdtor_returns_this (void);
241 static bool arm_cxx_key_method_may_be_inline (void);
242 static void arm_cxx_determine_class_data_visibility (tree);
243 static bool arm_cxx_class_data_always_comdat (void);
244 static bool arm_cxx_use_aeabi_atexit (void);
245 static void arm_init_libfuncs (void);
246 static tree arm_build_builtin_va_list (void);
247 static void arm_expand_builtin_va_start (tree, rtx);
248 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
249 static void arm_option_override (void);
250 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
251 struct cl_target_option *);
252 static void arm_override_options_after_change (void);
253 static void arm_option_print (FILE *, int, struct cl_target_option *);
254 static void arm_set_current_function (tree);
255 static bool arm_can_inline_p (tree, tree);
256 static void arm_relayout_function (tree);
257 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
258 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
259 static bool arm_sched_can_speculate_insn (rtx_insn *);
260 static bool arm_macro_fusion_p (void);
261 static bool arm_cannot_copy_insn_p (rtx_insn *);
262 static int arm_issue_rate (void);
263 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
264 static int arm_first_cycle_multipass_dfa_lookahead (void);
265 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
266 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
267 static bool arm_output_addr_const_extra (FILE *, rtx);
268 static bool arm_allocate_stack_slots_for_args (void);
269 static bool arm_warn_func_return (tree);
270 static tree arm_promoted_type (const_tree t);
271 static bool arm_scalar_mode_supported_p (scalar_mode);
272 static bool arm_frame_pointer_required (void);
273 static bool arm_can_eliminate (const int, const int);
274 static void arm_asm_trampoline_template (FILE *);
275 static void arm_trampoline_init (rtx, tree, rtx);
276 static rtx arm_trampoline_adjust_address (rtx);
277 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
278 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
279 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
280 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
281 static bool arm_array_mode_supported_p (machine_mode,
282 unsigned HOST_WIDE_INT);
283 static machine_mode arm_preferred_simd_mode (scalar_mode);
284 static bool arm_class_likely_spilled_p (reg_class_t);
285 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
286 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
287 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
288 const_tree type,
289 int misalignment,
290 bool is_packed);
291 static void arm_conditional_register_usage (void);
292 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
293 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
294 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
295 static int arm_default_branch_cost (bool, bool);
296 static int arm_cortex_a5_branch_cost (bool, bool);
297 static int arm_cortex_m_branch_cost (bool, bool);
298 static int arm_cortex_m7_branch_cost (bool, bool);
299
300 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
301 const vec_perm_indices &);
302
303 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
304
305 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
306 tree vectype,
307 int misalign ATTRIBUTE_UNUSED);
308 static unsigned arm_add_stmt_cost (vec_info *vinfo, void *data, int count,
309 enum vect_cost_for_stmt kind,
310 struct _stmt_vec_info *stmt_info,
311 tree vectype, int misalign,
312 enum vect_cost_model_location where);
313
314 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
315 bool op0_preserve_value);
316 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
317
318 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
319 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
320 const_tree);
321 static section *arm_function_section (tree, enum node_frequency, bool, bool);
322 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
323 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
324 int reloc);
325 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
326 static opt_scalar_float_mode arm_floatn_mode (int, bool);
327 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
328 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
329 static bool arm_modes_tieable_p (machine_mode, machine_mode);
330 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
331 static rtx_insn * thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
332 vec<const char *> &, vec<rtx> &,
333 HARD_REG_SET &);
334 \f
335 /* Table of machine attributes. */
336 static const struct attribute_spec arm_attribute_table[] =
337 {
338 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
339 affects_type_identity, handler, exclude } */
340 /* Function calls made to this symbol must be done indirectly, because
341 it may lie outside of the 26 bit addressing range of a normal function
342 call. */
343 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
344 /* Whereas these functions are always known to reside within the 26 bit
345 addressing range. */
346 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
347 /* Specify the procedure call conventions for a function. */
348 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
349 NULL },
350 /* Interrupt Service Routines have special prologue and epilogue requirements. */
351 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
352 NULL },
353 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
354 NULL },
355 { "naked", 0, 0, true, false, false, false,
356 arm_handle_fndecl_attribute, NULL },
357 #ifdef ARM_PE
358 /* ARM/PE has three new attributes:
359 interfacearm - ?
360 dllexport - for exporting a function/variable that will live in a dll
361 dllimport - for importing a function/variable from a dll
362
363 Microsoft allows multiple declspecs in one __declspec, separating
364 them with spaces. We do NOT support this. Instead, use __declspec
365 multiple times.
366 */
367 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
368 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
369 { "interfacearm", 0, 0, true, false, false, false,
370 arm_handle_fndecl_attribute, NULL },
371 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
372 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
373 NULL },
374 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
375 NULL },
376 { "notshared", 0, 0, false, true, false, false,
377 arm_handle_notshared_attribute, NULL },
378 #endif
379 /* ARMv8-M Security Extensions support. */
380 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
381 arm_handle_cmse_nonsecure_entry, NULL },
382 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
383 arm_handle_cmse_nonsecure_call, NULL },
384 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL },
385 { NULL, 0, 0, false, false, false, false, NULL, NULL }
386 };
387 \f
388 /* Initialize the GCC target structure. */
389 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
390 #undef TARGET_MERGE_DECL_ATTRIBUTES
391 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
392 #endif
393
394 #undef TARGET_CHECK_BUILTIN_CALL
395 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
396
397 #undef TARGET_LEGITIMIZE_ADDRESS
398 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
399
400 #undef TARGET_ATTRIBUTE_TABLE
401 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
402
403 #undef TARGET_INSERT_ATTRIBUTES
404 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
405
406 #undef TARGET_ASM_FILE_START
407 #define TARGET_ASM_FILE_START arm_file_start
408 #undef TARGET_ASM_FILE_END
409 #define TARGET_ASM_FILE_END arm_file_end
410
411 #undef TARGET_ASM_ALIGNED_SI_OP
412 #define TARGET_ASM_ALIGNED_SI_OP NULL
413 #undef TARGET_ASM_INTEGER
414 #define TARGET_ASM_INTEGER arm_assemble_integer
415
416 #undef TARGET_PRINT_OPERAND
417 #define TARGET_PRINT_OPERAND arm_print_operand
418 #undef TARGET_PRINT_OPERAND_ADDRESS
419 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
420 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
421 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
422
423 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
424 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
425
426 #undef TARGET_ASM_FUNCTION_PROLOGUE
427 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
428
429 #undef TARGET_ASM_FUNCTION_EPILOGUE
430 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
431
432 #undef TARGET_CAN_INLINE_P
433 #define TARGET_CAN_INLINE_P arm_can_inline_p
434
435 #undef TARGET_RELAYOUT_FUNCTION
436 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
437
438 #undef TARGET_OPTION_OVERRIDE
439 #define TARGET_OPTION_OVERRIDE arm_option_override
440
441 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
442 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
443
444 #undef TARGET_OPTION_RESTORE
445 #define TARGET_OPTION_RESTORE arm_option_restore
446
447 #undef TARGET_OPTION_PRINT
448 #define TARGET_OPTION_PRINT arm_option_print
449
450 #undef TARGET_COMP_TYPE_ATTRIBUTES
451 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
452
453 #undef TARGET_SCHED_CAN_SPECULATE_INSN
454 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
455
456 #undef TARGET_SCHED_MACRO_FUSION_P
457 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
458
459 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
460 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
461
462 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
463 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
464
465 #undef TARGET_SCHED_ADJUST_COST
466 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
467
468 #undef TARGET_SET_CURRENT_FUNCTION
469 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
470
471 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
472 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
473
474 #undef TARGET_SCHED_REORDER
475 #define TARGET_SCHED_REORDER arm_sched_reorder
476
477 #undef TARGET_REGISTER_MOVE_COST
478 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
479
480 #undef TARGET_MEMORY_MOVE_COST
481 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
482
483 #undef TARGET_ENCODE_SECTION_INFO
484 #ifdef ARM_PE
485 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
486 #else
487 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
488 #endif
489
490 #undef TARGET_STRIP_NAME_ENCODING
491 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
492
493 #undef TARGET_ASM_INTERNAL_LABEL
494 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
495
496 #undef TARGET_FLOATN_MODE
497 #define TARGET_FLOATN_MODE arm_floatn_mode
498
499 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
500 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
501
502 #undef TARGET_FUNCTION_VALUE
503 #define TARGET_FUNCTION_VALUE arm_function_value
504
505 #undef TARGET_LIBCALL_VALUE
506 #define TARGET_LIBCALL_VALUE arm_libcall_value
507
508 #undef TARGET_FUNCTION_VALUE_REGNO_P
509 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
510
511 #undef TARGET_ASM_OUTPUT_MI_THUNK
512 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
513 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
514 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
515
516 #undef TARGET_RTX_COSTS
517 #define TARGET_RTX_COSTS arm_rtx_costs
518 #undef TARGET_ADDRESS_COST
519 #define TARGET_ADDRESS_COST arm_address_cost
520 #undef TARGET_INSN_COST
521 #define TARGET_INSN_COST arm_insn_cost
522
523 #undef TARGET_SHIFT_TRUNCATION_MASK
524 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
525 #undef TARGET_VECTOR_MODE_SUPPORTED_P
526 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
527 #undef TARGET_ARRAY_MODE_SUPPORTED_P
528 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
529 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
530 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
531 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
532 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
533 arm_autovectorize_vector_modes
534
535 #undef TARGET_MACHINE_DEPENDENT_REORG
536 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
537
538 #undef TARGET_INIT_BUILTINS
539 #define TARGET_INIT_BUILTINS arm_init_builtins
540 #undef TARGET_EXPAND_BUILTIN
541 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
542 #undef TARGET_BUILTIN_DECL
543 #define TARGET_BUILTIN_DECL arm_builtin_decl
544
545 #undef TARGET_INIT_LIBFUNCS
546 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
547
548 #undef TARGET_PROMOTE_FUNCTION_MODE
549 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
550 #undef TARGET_PROMOTE_PROTOTYPES
551 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
552 #undef TARGET_PASS_BY_REFERENCE
553 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
554 #undef TARGET_ARG_PARTIAL_BYTES
555 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
556 #undef TARGET_FUNCTION_ARG
557 #define TARGET_FUNCTION_ARG arm_function_arg
558 #undef TARGET_FUNCTION_ARG_ADVANCE
559 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
560 #undef TARGET_FUNCTION_ARG_PADDING
561 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
562 #undef TARGET_FUNCTION_ARG_BOUNDARY
563 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
564
565 #undef TARGET_SETUP_INCOMING_VARARGS
566 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
567
568 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
569 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
570
571 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
572 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
573 #undef TARGET_TRAMPOLINE_INIT
574 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
575 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
576 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
577
578 #undef TARGET_WARN_FUNC_RETURN
579 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
580
581 #undef TARGET_DEFAULT_SHORT_ENUMS
582 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
583
584 #undef TARGET_ALIGN_ANON_BITFIELD
585 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
586
587 #undef TARGET_NARROW_VOLATILE_BITFIELD
588 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
589
590 #undef TARGET_CXX_GUARD_TYPE
591 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
592
593 #undef TARGET_CXX_GUARD_MASK_BIT
594 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
595
596 #undef TARGET_CXX_GET_COOKIE_SIZE
597 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
598
599 #undef TARGET_CXX_COOKIE_HAS_SIZE
600 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
601
602 #undef TARGET_CXX_CDTOR_RETURNS_THIS
603 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
604
605 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
606 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
607
608 #undef TARGET_CXX_USE_AEABI_ATEXIT
609 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
610
611 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
612 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
613 arm_cxx_determine_class_data_visibility
614
615 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
616 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
617
618 #undef TARGET_RETURN_IN_MSB
619 #define TARGET_RETURN_IN_MSB arm_return_in_msb
620
621 #undef TARGET_RETURN_IN_MEMORY
622 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
623
624 #undef TARGET_MUST_PASS_IN_STACK
625 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
626
627 #if ARM_UNWIND_INFO
628 #undef TARGET_ASM_UNWIND_EMIT
629 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
630
631 /* EABI unwinding tables use a different format for the typeinfo tables. */
632 #undef TARGET_ASM_TTYPE
633 #define TARGET_ASM_TTYPE arm_output_ttype
634
635 #undef TARGET_ARM_EABI_UNWINDER
636 #define TARGET_ARM_EABI_UNWINDER true
637
638 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
639 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
640
641 #endif /* ARM_UNWIND_INFO */
642
643 #undef TARGET_ASM_INIT_SECTIONS
644 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
645
646 #undef TARGET_DWARF_REGISTER_SPAN
647 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
648
649 #undef TARGET_CANNOT_COPY_INSN_P
650 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
651
652 #ifdef HAVE_AS_TLS
653 #undef TARGET_HAVE_TLS
654 #define TARGET_HAVE_TLS true
655 #endif
656
657 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
658 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
659
660 #undef TARGET_LEGITIMATE_CONSTANT_P
661 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
662
663 #undef TARGET_CANNOT_FORCE_CONST_MEM
664 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
665
666 #undef TARGET_MAX_ANCHOR_OFFSET
667 #define TARGET_MAX_ANCHOR_OFFSET 4095
668
669 /* The minimum is set such that the total size of the block
670 for a particular anchor is -4088 + 1 + 4095 bytes, which is
671 divisible by eight, ensuring natural spacing of anchors. */
672 #undef TARGET_MIN_ANCHOR_OFFSET
673 #define TARGET_MIN_ANCHOR_OFFSET -4088
674
675 #undef TARGET_SCHED_ISSUE_RATE
676 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
677
678 #undef TARGET_SCHED_VARIABLE_ISSUE
679 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
680
681 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
682 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
683 arm_first_cycle_multipass_dfa_lookahead
684
685 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
686 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
687 arm_first_cycle_multipass_dfa_lookahead_guard
688
689 #undef TARGET_MANGLE_TYPE
690 #define TARGET_MANGLE_TYPE arm_mangle_type
691
692 #undef TARGET_INVALID_CONVERSION
693 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
694
695 #undef TARGET_INVALID_UNARY_OP
696 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
697
698 #undef TARGET_INVALID_BINARY_OP
699 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
700
701 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
702 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
703
704 #undef TARGET_BUILD_BUILTIN_VA_LIST
705 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
706 #undef TARGET_EXPAND_BUILTIN_VA_START
707 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
708 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
709 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
710
711 #ifdef HAVE_AS_TLS
712 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
713 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
714 #endif
715
716 #undef TARGET_LEGITIMATE_ADDRESS_P
717 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
718
719 #undef TARGET_PREFERRED_RELOAD_CLASS
720 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
721
722 #undef TARGET_PROMOTED_TYPE
723 #define TARGET_PROMOTED_TYPE arm_promoted_type
724
725 #undef TARGET_SCALAR_MODE_SUPPORTED_P
726 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
727
728 #undef TARGET_COMPUTE_FRAME_LAYOUT
729 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
730
731 #undef TARGET_FRAME_POINTER_REQUIRED
732 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
733
734 #undef TARGET_CAN_ELIMINATE
735 #define TARGET_CAN_ELIMINATE arm_can_eliminate
736
737 #undef TARGET_CONDITIONAL_REGISTER_USAGE
738 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
739
740 #undef TARGET_CLASS_LIKELY_SPILLED_P
741 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
742
743 #undef TARGET_VECTORIZE_BUILTINS
744 #define TARGET_VECTORIZE_BUILTINS
745
746 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
747 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
748 arm_builtin_vectorized_function
749
750 #undef TARGET_VECTOR_ALIGNMENT
751 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
752
753 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
754 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
755 arm_vector_alignment_reachable
756
757 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
758 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
759 arm_builtin_support_vector_misalignment
760
761 #undef TARGET_PREFERRED_RENAME_CLASS
762 #define TARGET_PREFERRED_RENAME_CLASS \
763 arm_preferred_rename_class
764
765 #undef TARGET_VECTORIZE_VEC_PERM_CONST
766 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
767
768 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
769 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
770 arm_builtin_vectorization_cost
771 #undef TARGET_VECTORIZE_ADD_STMT_COST
772 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
773
774 #undef TARGET_CANONICALIZE_COMPARISON
775 #define TARGET_CANONICALIZE_COMPARISON \
776 arm_canonicalize_comparison
777
778 #undef TARGET_ASAN_SHADOW_OFFSET
779 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
780
781 #undef MAX_INSN_PER_IT_BLOCK
782 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
783
784 #undef TARGET_CAN_USE_DOLOOP_P
785 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
786
787 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
788 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
789
790 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
791 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
792
793 #undef TARGET_SCHED_FUSION_PRIORITY
794 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
795
796 #undef TARGET_ASM_FUNCTION_SECTION
797 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
798
799 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
800 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
801
802 #undef TARGET_SECTION_TYPE_FLAGS
803 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
804
805 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
806 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
807
808 #undef TARGET_C_EXCESS_PRECISION
809 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
810
811 /* Although the architecture reserves bits 0 and 1, only the former is
812 used for ARM/Thumb ISA selection in v7 and earlier versions. */
813 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
814 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
815
816 #undef TARGET_FIXED_CONDITION_CODE_REGS
817 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
818
819 #undef TARGET_HARD_REGNO_NREGS
820 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
821 #undef TARGET_HARD_REGNO_MODE_OK
822 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
823
824 #undef TARGET_MODES_TIEABLE_P
825 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
826
827 #undef TARGET_CAN_CHANGE_MODE_CLASS
828 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
829
830 #undef TARGET_CONSTANT_ALIGNMENT
831 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
832
833 #undef TARGET_INVALID_WITHIN_DOLOOP
834 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
835
836 #undef TARGET_MD_ASM_ADJUST
837 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
838 \f
839 /* Obstack for minipool constant handling. */
840 static struct obstack minipool_obstack;
841 static char * minipool_startobj;
842
843 /* The maximum number of insns skipped which
844 will be conditionalised if possible. */
845 static int max_insns_skipped = 5;
846
847 extern FILE * asm_out_file;
848
849 /* True if we are currently building a constant table. */
850 int making_const_table;
851
852 /* The processor for which instructions should be scheduled. */
853 enum processor_type arm_tune = TARGET_CPU_arm_none;
854
855 /* The current tuning set. */
856 const struct tune_params *current_tune;
857
858 /* Which floating point hardware to schedule for. */
859 int arm_fpu_attr;
860
861 /* Used for Thumb call_via trampolines. */
862 rtx thumb_call_via_label[14];
863 static int thumb_call_reg_needed;
864
865 /* The bits in this mask specify which instruction scheduling options should
866 be used. */
867 unsigned int tune_flags = 0;
868
869 /* The highest ARM architecture version supported by the
870 target. */
871 enum base_architecture arm_base_arch = BASE_ARCH_0;
872
873 /* Active target architecture and tuning. */
874
875 struct arm_build_target arm_active_target;
876
877 /* The following are used in the arm.md file as equivalents to bits
878 in the above two flag variables. */
879
880 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
881 int arm_arch4 = 0;
882
883 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
884 int arm_arch4t = 0;
885
886 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
887 int arm_arch5t = 0;
888
889 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
890 int arm_arch5te = 0;
891
892 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
893 int arm_arch6 = 0;
894
895 /* Nonzero if this chip supports the ARM 6K extensions. */
896 int arm_arch6k = 0;
897
898 /* Nonzero if this chip supports the ARM 6KZ extensions. */
899 int arm_arch6kz = 0;
900
901 /* Nonzero if instructions present in ARMv6-M can be used. */
902 int arm_arch6m = 0;
903
904 /* Nonzero if this chip supports the ARM 7 extensions. */
905 int arm_arch7 = 0;
906
907 /* Nonzero if this chip supports the Large Physical Address Extension. */
908 int arm_arch_lpae = 0;
909
910 /* Nonzero if instructions not present in the 'M' profile can be used. */
911 int arm_arch_notm = 0;
912
913 /* Nonzero if instructions present in ARMv7E-M can be used. */
914 int arm_arch7em = 0;
915
916 /* Nonzero if instructions present in ARMv8 can be used. */
917 int arm_arch8 = 0;
918
919 /* Nonzero if this chip supports the ARMv8.1 extensions. */
920 int arm_arch8_1 = 0;
921
922 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
923 int arm_arch8_2 = 0;
924
925 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
926 int arm_arch8_3 = 0;
927
928 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
929 int arm_arch8_4 = 0;
930 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
931 extensions. */
932 int arm_arch8_1m_main = 0;
933
934 /* Nonzero if this chip supports the FP16 instructions extension of ARM
935 Architecture 8.2. */
936 int arm_fp16_inst = 0;
937
938 /* Nonzero if this chip can benefit from load scheduling. */
939 int arm_ld_sched = 0;
940
941 /* Nonzero if this chip is a StrongARM. */
942 int arm_tune_strongarm = 0;
943
944 /* Nonzero if this chip supports Intel Wireless MMX technology. */
945 int arm_arch_iwmmxt = 0;
946
947 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
948 int arm_arch_iwmmxt2 = 0;
949
950 /* Nonzero if this chip is an XScale. */
951 int arm_arch_xscale = 0;
952
953 /* Nonzero if tuning for XScale */
954 int arm_tune_xscale = 0;
955
956 /* Nonzero if we want to tune for stores that access the write-buffer.
957 This typically means an ARM6 or ARM7 with MMU or MPU. */
958 int arm_tune_wbuf = 0;
959
960 /* Nonzero if tuning for Cortex-A9. */
961 int arm_tune_cortex_a9 = 0;
962
963 /* Nonzero if we should define __THUMB_INTERWORK__ in the
964 preprocessor.
965 XXX This is a bit of a hack, it's intended to help work around
966 problems in GLD which doesn't understand that armv5t code is
967 interworking clean. */
968 int arm_cpp_interwork = 0;
969
970 /* Nonzero if chip supports Thumb 1. */
971 int arm_arch_thumb1;
972
973 /* Nonzero if chip supports Thumb 2. */
974 int arm_arch_thumb2;
975
976 /* Nonzero if chip supports integer division instruction. */
977 int arm_arch_arm_hwdiv;
978 int arm_arch_thumb_hwdiv;
979
980 /* Nonzero if chip disallows volatile memory access in IT block. */
981 int arm_arch_no_volatile_ce;
982
983 /* Nonzero if we shouldn't use literal pools. */
984 bool arm_disable_literal_pool = false;
985
986 /* The register number to be used for the PIC offset register. */
987 unsigned arm_pic_register = INVALID_REGNUM;
988
989 enum arm_pcs arm_pcs_default;
990
991 /* For an explanation of these variables, see final_prescan_insn below. */
992 int arm_ccfsm_state;
993 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
994 enum arm_cond_code arm_current_cc;
995
996 rtx arm_target_insn;
997 int arm_target_label;
998 /* The number of conditionally executed insns, including the current insn. */
999 int arm_condexec_count = 0;
1000 /* A bitmask specifying the patterns for the IT block.
1001 Zero means do not output an IT block before this insn. */
1002 int arm_condexec_mask = 0;
1003 /* The number of bits used in arm_condexec_mask. */
1004 int arm_condexec_masklen = 0;
1005
1006 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1007 int arm_arch_crc = 0;
1008
1009 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1010 int arm_arch_dotprod = 0;
1011
1012 /* Nonzero if chip supports the ARMv8-M security extensions. */
1013 int arm_arch_cmse = 0;
1014
1015 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1016 int arm_m_profile_small_mul = 0;
1017
1018 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1019 int arm_arch_i8mm = 0;
1020
1021 /* Nonzero if chip supports the BFloat16 instructions. */
1022 int arm_arch_bf16 = 0;
1023
1024 /* Nonzero if chip supports the Custom Datapath Extension. */
1025 int arm_arch_cde = 0;
1026 int arm_arch_cde_coproc = 0;
1027 const int arm_arch_cde_coproc_bits[] = {
1028 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1029 };
1030
1031 /* The condition codes of the ARM, and the inverse function. */
1032 static const char * const arm_condition_codes[] =
1033 {
1034 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1035 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1036 };
1037
1038 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1039 int arm_regs_in_sequence[] =
1040 {
1041 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1042 };
1043
1044 #define DEF_FP_SYSREG(reg) #reg,
1045 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1046 FP_SYSREGS
1047 };
1048 #undef DEF_FP_SYSREG
1049
1050 #define ARM_LSL_NAME "lsl"
1051 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1052
1053 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1054 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1055 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1056 \f
1057 /* Initialization code. */
1058
1059 struct cpu_tune
1060 {
1061 enum processor_type scheduler;
1062 unsigned int tune_flags;
1063 const struct tune_params *tune;
1064 };
1065
1066 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1067 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1068 { \
1069 num_slots, \
1070 l1_size, \
1071 l1_line_size \
1072 }
1073
1074 /* arm generic vectorizer costs. */
1075 static const
1076 struct cpu_vec_costs arm_default_vec_cost = {
1077 1, /* scalar_stmt_cost. */
1078 1, /* scalar load_cost. */
1079 1, /* scalar_store_cost. */
1080 1, /* vec_stmt_cost. */
1081 1, /* vec_to_scalar_cost. */
1082 1, /* scalar_to_vec_cost. */
1083 1, /* vec_align_load_cost. */
1084 1, /* vec_unalign_load_cost. */
1085 1, /* vec_unalign_store_cost. */
1086 1, /* vec_store_cost. */
1087 3, /* cond_taken_branch_cost. */
1088 1, /* cond_not_taken_branch_cost. */
1089 };
1090
1091 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1092 #include "aarch-cost-tables.h"
1093
1094
1095
1096 const struct cpu_cost_table cortexa9_extra_costs =
1097 {
1098 /* ALU */
1099 {
1100 0, /* arith. */
1101 0, /* logical. */
1102 0, /* shift. */
1103 COSTS_N_INSNS (1), /* shift_reg. */
1104 COSTS_N_INSNS (1), /* arith_shift. */
1105 COSTS_N_INSNS (2), /* arith_shift_reg. */
1106 0, /* log_shift. */
1107 COSTS_N_INSNS (1), /* log_shift_reg. */
1108 COSTS_N_INSNS (1), /* extend. */
1109 COSTS_N_INSNS (2), /* extend_arith. */
1110 COSTS_N_INSNS (1), /* bfi. */
1111 COSTS_N_INSNS (1), /* bfx. */
1112 0, /* clz. */
1113 0, /* rev. */
1114 0, /* non_exec. */
1115 true /* non_exec_costs_exec. */
1116 },
1117 {
1118 /* MULT SImode */
1119 {
1120 COSTS_N_INSNS (3), /* simple. */
1121 COSTS_N_INSNS (3), /* flag_setting. */
1122 COSTS_N_INSNS (2), /* extend. */
1123 COSTS_N_INSNS (3), /* add. */
1124 COSTS_N_INSNS (2), /* extend_add. */
1125 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1126 },
1127 /* MULT DImode */
1128 {
1129 0, /* simple (N/A). */
1130 0, /* flag_setting (N/A). */
1131 COSTS_N_INSNS (4), /* extend. */
1132 0, /* add (N/A). */
1133 COSTS_N_INSNS (4), /* extend_add. */
1134 0 /* idiv (N/A). */
1135 }
1136 },
1137 /* LD/ST */
1138 {
1139 COSTS_N_INSNS (2), /* load. */
1140 COSTS_N_INSNS (2), /* load_sign_extend. */
1141 COSTS_N_INSNS (2), /* ldrd. */
1142 COSTS_N_INSNS (2), /* ldm_1st. */
1143 1, /* ldm_regs_per_insn_1st. */
1144 2, /* ldm_regs_per_insn_subsequent. */
1145 COSTS_N_INSNS (5), /* loadf. */
1146 COSTS_N_INSNS (5), /* loadd. */
1147 COSTS_N_INSNS (1), /* load_unaligned. */
1148 COSTS_N_INSNS (2), /* store. */
1149 COSTS_N_INSNS (2), /* strd. */
1150 COSTS_N_INSNS (2), /* stm_1st. */
1151 1, /* stm_regs_per_insn_1st. */
1152 2, /* stm_regs_per_insn_subsequent. */
1153 COSTS_N_INSNS (1), /* storef. */
1154 COSTS_N_INSNS (1), /* stored. */
1155 COSTS_N_INSNS (1), /* store_unaligned. */
1156 COSTS_N_INSNS (1), /* loadv. */
1157 COSTS_N_INSNS (1) /* storev. */
1158 },
1159 {
1160 /* FP SFmode */
1161 {
1162 COSTS_N_INSNS (14), /* div. */
1163 COSTS_N_INSNS (4), /* mult. */
1164 COSTS_N_INSNS (7), /* mult_addsub. */
1165 COSTS_N_INSNS (30), /* fma. */
1166 COSTS_N_INSNS (3), /* addsub. */
1167 COSTS_N_INSNS (1), /* fpconst. */
1168 COSTS_N_INSNS (1), /* neg. */
1169 COSTS_N_INSNS (3), /* compare. */
1170 COSTS_N_INSNS (3), /* widen. */
1171 COSTS_N_INSNS (3), /* narrow. */
1172 COSTS_N_INSNS (3), /* toint. */
1173 COSTS_N_INSNS (3), /* fromint. */
1174 COSTS_N_INSNS (3) /* roundint. */
1175 },
1176 /* FP DFmode */
1177 {
1178 COSTS_N_INSNS (24), /* div. */
1179 COSTS_N_INSNS (5), /* mult. */
1180 COSTS_N_INSNS (8), /* mult_addsub. */
1181 COSTS_N_INSNS (30), /* fma. */
1182 COSTS_N_INSNS (3), /* addsub. */
1183 COSTS_N_INSNS (1), /* fpconst. */
1184 COSTS_N_INSNS (1), /* neg. */
1185 COSTS_N_INSNS (3), /* compare. */
1186 COSTS_N_INSNS (3), /* widen. */
1187 COSTS_N_INSNS (3), /* narrow. */
1188 COSTS_N_INSNS (3), /* toint. */
1189 COSTS_N_INSNS (3), /* fromint. */
1190 COSTS_N_INSNS (3) /* roundint. */
1191 }
1192 },
1193 /* Vector */
1194 {
1195 COSTS_N_INSNS (1) /* alu. */
1196 }
1197 };
1198
1199 const struct cpu_cost_table cortexa8_extra_costs =
1200 {
1201 /* ALU */
1202 {
1203 0, /* arith. */
1204 0, /* logical. */
1205 COSTS_N_INSNS (1), /* shift. */
1206 0, /* shift_reg. */
1207 COSTS_N_INSNS (1), /* arith_shift. */
1208 0, /* arith_shift_reg. */
1209 COSTS_N_INSNS (1), /* log_shift. */
1210 0, /* log_shift_reg. */
1211 0, /* extend. */
1212 0, /* extend_arith. */
1213 0, /* bfi. */
1214 0, /* bfx. */
1215 0, /* clz. */
1216 0, /* rev. */
1217 0, /* non_exec. */
1218 true /* non_exec_costs_exec. */
1219 },
1220 {
1221 /* MULT SImode */
1222 {
1223 COSTS_N_INSNS (1), /* simple. */
1224 COSTS_N_INSNS (1), /* flag_setting. */
1225 COSTS_N_INSNS (1), /* extend. */
1226 COSTS_N_INSNS (1), /* add. */
1227 COSTS_N_INSNS (1), /* extend_add. */
1228 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1229 },
1230 /* MULT DImode */
1231 {
1232 0, /* simple (N/A). */
1233 0, /* flag_setting (N/A). */
1234 COSTS_N_INSNS (2), /* extend. */
1235 0, /* add (N/A). */
1236 COSTS_N_INSNS (2), /* extend_add. */
1237 0 /* idiv (N/A). */
1238 }
1239 },
1240 /* LD/ST */
1241 {
1242 COSTS_N_INSNS (1), /* load. */
1243 COSTS_N_INSNS (1), /* load_sign_extend. */
1244 COSTS_N_INSNS (1), /* ldrd. */
1245 COSTS_N_INSNS (1), /* ldm_1st. */
1246 1, /* ldm_regs_per_insn_1st. */
1247 2, /* ldm_regs_per_insn_subsequent. */
1248 COSTS_N_INSNS (1), /* loadf. */
1249 COSTS_N_INSNS (1), /* loadd. */
1250 COSTS_N_INSNS (1), /* load_unaligned. */
1251 COSTS_N_INSNS (1), /* store. */
1252 COSTS_N_INSNS (1), /* strd. */
1253 COSTS_N_INSNS (1), /* stm_1st. */
1254 1, /* stm_regs_per_insn_1st. */
1255 2, /* stm_regs_per_insn_subsequent. */
1256 COSTS_N_INSNS (1), /* storef. */
1257 COSTS_N_INSNS (1), /* stored. */
1258 COSTS_N_INSNS (1), /* store_unaligned. */
1259 COSTS_N_INSNS (1), /* loadv. */
1260 COSTS_N_INSNS (1) /* storev. */
1261 },
1262 {
1263 /* FP SFmode */
1264 {
1265 COSTS_N_INSNS (36), /* div. */
1266 COSTS_N_INSNS (11), /* mult. */
1267 COSTS_N_INSNS (20), /* mult_addsub. */
1268 COSTS_N_INSNS (30), /* fma. */
1269 COSTS_N_INSNS (9), /* addsub. */
1270 COSTS_N_INSNS (3), /* fpconst. */
1271 COSTS_N_INSNS (3), /* neg. */
1272 COSTS_N_INSNS (6), /* compare. */
1273 COSTS_N_INSNS (4), /* widen. */
1274 COSTS_N_INSNS (4), /* narrow. */
1275 COSTS_N_INSNS (8), /* toint. */
1276 COSTS_N_INSNS (8), /* fromint. */
1277 COSTS_N_INSNS (8) /* roundint. */
1278 },
1279 /* FP DFmode */
1280 {
1281 COSTS_N_INSNS (64), /* div. */
1282 COSTS_N_INSNS (16), /* mult. */
1283 COSTS_N_INSNS (25), /* mult_addsub. */
1284 COSTS_N_INSNS (30), /* fma. */
1285 COSTS_N_INSNS (9), /* addsub. */
1286 COSTS_N_INSNS (3), /* fpconst. */
1287 COSTS_N_INSNS (3), /* neg. */
1288 COSTS_N_INSNS (6), /* compare. */
1289 COSTS_N_INSNS (6), /* widen. */
1290 COSTS_N_INSNS (6), /* narrow. */
1291 COSTS_N_INSNS (8), /* toint. */
1292 COSTS_N_INSNS (8), /* fromint. */
1293 COSTS_N_INSNS (8) /* roundint. */
1294 }
1295 },
1296 /* Vector */
1297 {
1298 COSTS_N_INSNS (1) /* alu. */
1299 }
1300 };
1301
1302 const struct cpu_cost_table cortexa5_extra_costs =
1303 {
1304 /* ALU */
1305 {
1306 0, /* arith. */
1307 0, /* logical. */
1308 COSTS_N_INSNS (1), /* shift. */
1309 COSTS_N_INSNS (1), /* shift_reg. */
1310 COSTS_N_INSNS (1), /* arith_shift. */
1311 COSTS_N_INSNS (1), /* arith_shift_reg. */
1312 COSTS_N_INSNS (1), /* log_shift. */
1313 COSTS_N_INSNS (1), /* log_shift_reg. */
1314 COSTS_N_INSNS (1), /* extend. */
1315 COSTS_N_INSNS (1), /* extend_arith. */
1316 COSTS_N_INSNS (1), /* bfi. */
1317 COSTS_N_INSNS (1), /* bfx. */
1318 COSTS_N_INSNS (1), /* clz. */
1319 COSTS_N_INSNS (1), /* rev. */
1320 0, /* non_exec. */
1321 true /* non_exec_costs_exec. */
1322 },
1323
1324 {
1325 /* MULT SImode */
1326 {
1327 0, /* simple. */
1328 COSTS_N_INSNS (1), /* flag_setting. */
1329 COSTS_N_INSNS (1), /* extend. */
1330 COSTS_N_INSNS (1), /* add. */
1331 COSTS_N_INSNS (1), /* extend_add. */
1332 COSTS_N_INSNS (7) /* idiv. */
1333 },
1334 /* MULT DImode */
1335 {
1336 0, /* simple (N/A). */
1337 0, /* flag_setting (N/A). */
1338 COSTS_N_INSNS (1), /* extend. */
1339 0, /* add. */
1340 COSTS_N_INSNS (2), /* extend_add. */
1341 0 /* idiv (N/A). */
1342 }
1343 },
1344 /* LD/ST */
1345 {
1346 COSTS_N_INSNS (1), /* load. */
1347 COSTS_N_INSNS (1), /* load_sign_extend. */
1348 COSTS_N_INSNS (6), /* ldrd. */
1349 COSTS_N_INSNS (1), /* ldm_1st. */
1350 1, /* ldm_regs_per_insn_1st. */
1351 2, /* ldm_regs_per_insn_subsequent. */
1352 COSTS_N_INSNS (2), /* loadf. */
1353 COSTS_N_INSNS (4), /* loadd. */
1354 COSTS_N_INSNS (1), /* load_unaligned. */
1355 COSTS_N_INSNS (1), /* store. */
1356 COSTS_N_INSNS (3), /* strd. */
1357 COSTS_N_INSNS (1), /* stm_1st. */
1358 1, /* stm_regs_per_insn_1st. */
1359 2, /* stm_regs_per_insn_subsequent. */
1360 COSTS_N_INSNS (2), /* storef. */
1361 COSTS_N_INSNS (2), /* stored. */
1362 COSTS_N_INSNS (1), /* store_unaligned. */
1363 COSTS_N_INSNS (1), /* loadv. */
1364 COSTS_N_INSNS (1) /* storev. */
1365 },
1366 {
1367 /* FP SFmode */
1368 {
1369 COSTS_N_INSNS (15), /* div. */
1370 COSTS_N_INSNS (3), /* mult. */
1371 COSTS_N_INSNS (7), /* mult_addsub. */
1372 COSTS_N_INSNS (7), /* fma. */
1373 COSTS_N_INSNS (3), /* addsub. */
1374 COSTS_N_INSNS (3), /* fpconst. */
1375 COSTS_N_INSNS (3), /* neg. */
1376 COSTS_N_INSNS (3), /* compare. */
1377 COSTS_N_INSNS (3), /* widen. */
1378 COSTS_N_INSNS (3), /* narrow. */
1379 COSTS_N_INSNS (3), /* toint. */
1380 COSTS_N_INSNS (3), /* fromint. */
1381 COSTS_N_INSNS (3) /* roundint. */
1382 },
1383 /* FP DFmode */
1384 {
1385 COSTS_N_INSNS (30), /* div. */
1386 COSTS_N_INSNS (6), /* mult. */
1387 COSTS_N_INSNS (10), /* mult_addsub. */
1388 COSTS_N_INSNS (7), /* fma. */
1389 COSTS_N_INSNS (3), /* addsub. */
1390 COSTS_N_INSNS (3), /* fpconst. */
1391 COSTS_N_INSNS (3), /* neg. */
1392 COSTS_N_INSNS (3), /* compare. */
1393 COSTS_N_INSNS (3), /* widen. */
1394 COSTS_N_INSNS (3), /* narrow. */
1395 COSTS_N_INSNS (3), /* toint. */
1396 COSTS_N_INSNS (3), /* fromint. */
1397 COSTS_N_INSNS (3) /* roundint. */
1398 }
1399 },
1400 /* Vector */
1401 {
1402 COSTS_N_INSNS (1) /* alu. */
1403 }
1404 };
1405
1406
1407 const struct cpu_cost_table cortexa7_extra_costs =
1408 {
1409 /* ALU */
1410 {
1411 0, /* arith. */
1412 0, /* logical. */
1413 COSTS_N_INSNS (1), /* shift. */
1414 COSTS_N_INSNS (1), /* shift_reg. */
1415 COSTS_N_INSNS (1), /* arith_shift. */
1416 COSTS_N_INSNS (1), /* arith_shift_reg. */
1417 COSTS_N_INSNS (1), /* log_shift. */
1418 COSTS_N_INSNS (1), /* log_shift_reg. */
1419 COSTS_N_INSNS (1), /* extend. */
1420 COSTS_N_INSNS (1), /* extend_arith. */
1421 COSTS_N_INSNS (1), /* bfi. */
1422 COSTS_N_INSNS (1), /* bfx. */
1423 COSTS_N_INSNS (1), /* clz. */
1424 COSTS_N_INSNS (1), /* rev. */
1425 0, /* non_exec. */
1426 true /* non_exec_costs_exec. */
1427 },
1428
1429 {
1430 /* MULT SImode */
1431 {
1432 0, /* simple. */
1433 COSTS_N_INSNS (1), /* flag_setting. */
1434 COSTS_N_INSNS (1), /* extend. */
1435 COSTS_N_INSNS (1), /* add. */
1436 COSTS_N_INSNS (1), /* extend_add. */
1437 COSTS_N_INSNS (7) /* idiv. */
1438 },
1439 /* MULT DImode */
1440 {
1441 0, /* simple (N/A). */
1442 0, /* flag_setting (N/A). */
1443 COSTS_N_INSNS (1), /* extend. */
1444 0, /* add. */
1445 COSTS_N_INSNS (2), /* extend_add. */
1446 0 /* idiv (N/A). */
1447 }
1448 },
1449 /* LD/ST */
1450 {
1451 COSTS_N_INSNS (1), /* load. */
1452 COSTS_N_INSNS (1), /* load_sign_extend. */
1453 COSTS_N_INSNS (3), /* ldrd. */
1454 COSTS_N_INSNS (1), /* ldm_1st. */
1455 1, /* ldm_regs_per_insn_1st. */
1456 2, /* ldm_regs_per_insn_subsequent. */
1457 COSTS_N_INSNS (2), /* loadf. */
1458 COSTS_N_INSNS (2), /* loadd. */
1459 COSTS_N_INSNS (1), /* load_unaligned. */
1460 COSTS_N_INSNS (1), /* store. */
1461 COSTS_N_INSNS (3), /* strd. */
1462 COSTS_N_INSNS (1), /* stm_1st. */
1463 1, /* stm_regs_per_insn_1st. */
1464 2, /* stm_regs_per_insn_subsequent. */
1465 COSTS_N_INSNS (2), /* storef. */
1466 COSTS_N_INSNS (2), /* stored. */
1467 COSTS_N_INSNS (1), /* store_unaligned. */
1468 COSTS_N_INSNS (1), /* loadv. */
1469 COSTS_N_INSNS (1) /* storev. */
1470 },
1471 {
1472 /* FP SFmode */
1473 {
1474 COSTS_N_INSNS (15), /* div. */
1475 COSTS_N_INSNS (3), /* mult. */
1476 COSTS_N_INSNS (7), /* mult_addsub. */
1477 COSTS_N_INSNS (7), /* fma. */
1478 COSTS_N_INSNS (3), /* addsub. */
1479 COSTS_N_INSNS (3), /* fpconst. */
1480 COSTS_N_INSNS (3), /* neg. */
1481 COSTS_N_INSNS (3), /* compare. */
1482 COSTS_N_INSNS (3), /* widen. */
1483 COSTS_N_INSNS (3), /* narrow. */
1484 COSTS_N_INSNS (3), /* toint. */
1485 COSTS_N_INSNS (3), /* fromint. */
1486 COSTS_N_INSNS (3) /* roundint. */
1487 },
1488 /* FP DFmode */
1489 {
1490 COSTS_N_INSNS (30), /* div. */
1491 COSTS_N_INSNS (6), /* mult. */
1492 COSTS_N_INSNS (10), /* mult_addsub. */
1493 COSTS_N_INSNS (7), /* fma. */
1494 COSTS_N_INSNS (3), /* addsub. */
1495 COSTS_N_INSNS (3), /* fpconst. */
1496 COSTS_N_INSNS (3), /* neg. */
1497 COSTS_N_INSNS (3), /* compare. */
1498 COSTS_N_INSNS (3), /* widen. */
1499 COSTS_N_INSNS (3), /* narrow. */
1500 COSTS_N_INSNS (3), /* toint. */
1501 COSTS_N_INSNS (3), /* fromint. */
1502 COSTS_N_INSNS (3) /* roundint. */
1503 }
1504 },
1505 /* Vector */
1506 {
1507 COSTS_N_INSNS (1) /* alu. */
1508 }
1509 };
1510
1511 const struct cpu_cost_table cortexa12_extra_costs =
1512 {
1513 /* ALU */
1514 {
1515 0, /* arith. */
1516 0, /* logical. */
1517 0, /* shift. */
1518 COSTS_N_INSNS (1), /* shift_reg. */
1519 COSTS_N_INSNS (1), /* arith_shift. */
1520 COSTS_N_INSNS (1), /* arith_shift_reg. */
1521 COSTS_N_INSNS (1), /* log_shift. */
1522 COSTS_N_INSNS (1), /* log_shift_reg. */
1523 0, /* extend. */
1524 COSTS_N_INSNS (1), /* extend_arith. */
1525 0, /* bfi. */
1526 COSTS_N_INSNS (1), /* bfx. */
1527 COSTS_N_INSNS (1), /* clz. */
1528 COSTS_N_INSNS (1), /* rev. */
1529 0, /* non_exec. */
1530 true /* non_exec_costs_exec. */
1531 },
1532 /* MULT SImode */
1533 {
1534 {
1535 COSTS_N_INSNS (2), /* simple. */
1536 COSTS_N_INSNS (3), /* flag_setting. */
1537 COSTS_N_INSNS (2), /* extend. */
1538 COSTS_N_INSNS (3), /* add. */
1539 COSTS_N_INSNS (2), /* extend_add. */
1540 COSTS_N_INSNS (18) /* idiv. */
1541 },
1542 /* MULT DImode */
1543 {
1544 0, /* simple (N/A). */
1545 0, /* flag_setting (N/A). */
1546 COSTS_N_INSNS (3), /* extend. */
1547 0, /* add (N/A). */
1548 COSTS_N_INSNS (3), /* extend_add. */
1549 0 /* idiv (N/A). */
1550 }
1551 },
1552 /* LD/ST */
1553 {
1554 COSTS_N_INSNS (3), /* load. */
1555 COSTS_N_INSNS (3), /* load_sign_extend. */
1556 COSTS_N_INSNS (3), /* ldrd. */
1557 COSTS_N_INSNS (3), /* ldm_1st. */
1558 1, /* ldm_regs_per_insn_1st. */
1559 2, /* ldm_regs_per_insn_subsequent. */
1560 COSTS_N_INSNS (3), /* loadf. */
1561 COSTS_N_INSNS (3), /* loadd. */
1562 0, /* load_unaligned. */
1563 0, /* store. */
1564 0, /* strd. */
1565 0, /* stm_1st. */
1566 1, /* stm_regs_per_insn_1st. */
1567 2, /* stm_regs_per_insn_subsequent. */
1568 COSTS_N_INSNS (2), /* storef. */
1569 COSTS_N_INSNS (2), /* stored. */
1570 0, /* store_unaligned. */
1571 COSTS_N_INSNS (1), /* loadv. */
1572 COSTS_N_INSNS (1) /* storev. */
1573 },
1574 {
1575 /* FP SFmode */
1576 {
1577 COSTS_N_INSNS (17), /* div. */
1578 COSTS_N_INSNS (4), /* mult. */
1579 COSTS_N_INSNS (8), /* mult_addsub. */
1580 COSTS_N_INSNS (8), /* fma. */
1581 COSTS_N_INSNS (4), /* addsub. */
1582 COSTS_N_INSNS (2), /* fpconst. */
1583 COSTS_N_INSNS (2), /* neg. */
1584 COSTS_N_INSNS (2), /* compare. */
1585 COSTS_N_INSNS (4), /* widen. */
1586 COSTS_N_INSNS (4), /* narrow. */
1587 COSTS_N_INSNS (4), /* toint. */
1588 COSTS_N_INSNS (4), /* fromint. */
1589 COSTS_N_INSNS (4) /* roundint. */
1590 },
1591 /* FP DFmode */
1592 {
1593 COSTS_N_INSNS (31), /* div. */
1594 COSTS_N_INSNS (4), /* mult. */
1595 COSTS_N_INSNS (8), /* mult_addsub. */
1596 COSTS_N_INSNS (8), /* fma. */
1597 COSTS_N_INSNS (4), /* addsub. */
1598 COSTS_N_INSNS (2), /* fpconst. */
1599 COSTS_N_INSNS (2), /* neg. */
1600 COSTS_N_INSNS (2), /* compare. */
1601 COSTS_N_INSNS (4), /* widen. */
1602 COSTS_N_INSNS (4), /* narrow. */
1603 COSTS_N_INSNS (4), /* toint. */
1604 COSTS_N_INSNS (4), /* fromint. */
1605 COSTS_N_INSNS (4) /* roundint. */
1606 }
1607 },
1608 /* Vector */
1609 {
1610 COSTS_N_INSNS (1) /* alu. */
1611 }
1612 };
1613
1614 const struct cpu_cost_table cortexa15_extra_costs =
1615 {
1616 /* ALU */
1617 {
1618 0, /* arith. */
1619 0, /* logical. */
1620 0, /* shift. */
1621 0, /* shift_reg. */
1622 COSTS_N_INSNS (1), /* arith_shift. */
1623 COSTS_N_INSNS (1), /* arith_shift_reg. */
1624 COSTS_N_INSNS (1), /* log_shift. */
1625 COSTS_N_INSNS (1), /* log_shift_reg. */
1626 0, /* extend. */
1627 COSTS_N_INSNS (1), /* extend_arith. */
1628 COSTS_N_INSNS (1), /* bfi. */
1629 0, /* bfx. */
1630 0, /* clz. */
1631 0, /* rev. */
1632 0, /* non_exec. */
1633 true /* non_exec_costs_exec. */
1634 },
1635 /* MULT SImode */
1636 {
1637 {
1638 COSTS_N_INSNS (2), /* simple. */
1639 COSTS_N_INSNS (3), /* flag_setting. */
1640 COSTS_N_INSNS (2), /* extend. */
1641 COSTS_N_INSNS (2), /* add. */
1642 COSTS_N_INSNS (2), /* extend_add. */
1643 COSTS_N_INSNS (18) /* idiv. */
1644 },
1645 /* MULT DImode */
1646 {
1647 0, /* simple (N/A). */
1648 0, /* flag_setting (N/A). */
1649 COSTS_N_INSNS (3), /* extend. */
1650 0, /* add (N/A). */
1651 COSTS_N_INSNS (3), /* extend_add. */
1652 0 /* idiv (N/A). */
1653 }
1654 },
1655 /* LD/ST */
1656 {
1657 COSTS_N_INSNS (3), /* load. */
1658 COSTS_N_INSNS (3), /* load_sign_extend. */
1659 COSTS_N_INSNS (3), /* ldrd. */
1660 COSTS_N_INSNS (4), /* ldm_1st. */
1661 1, /* ldm_regs_per_insn_1st. */
1662 2, /* ldm_regs_per_insn_subsequent. */
1663 COSTS_N_INSNS (4), /* loadf. */
1664 COSTS_N_INSNS (4), /* loadd. */
1665 0, /* load_unaligned. */
1666 0, /* store. */
1667 0, /* strd. */
1668 COSTS_N_INSNS (1), /* stm_1st. */
1669 1, /* stm_regs_per_insn_1st. */
1670 2, /* stm_regs_per_insn_subsequent. */
1671 0, /* storef. */
1672 0, /* stored. */
1673 0, /* store_unaligned. */
1674 COSTS_N_INSNS (1), /* loadv. */
1675 COSTS_N_INSNS (1) /* storev. */
1676 },
1677 {
1678 /* FP SFmode */
1679 {
1680 COSTS_N_INSNS (17), /* div. */
1681 COSTS_N_INSNS (4), /* mult. */
1682 COSTS_N_INSNS (8), /* mult_addsub. */
1683 COSTS_N_INSNS (8), /* fma. */
1684 COSTS_N_INSNS (4), /* addsub. */
1685 COSTS_N_INSNS (2), /* fpconst. */
1686 COSTS_N_INSNS (2), /* neg. */
1687 COSTS_N_INSNS (5), /* compare. */
1688 COSTS_N_INSNS (4), /* widen. */
1689 COSTS_N_INSNS (4), /* narrow. */
1690 COSTS_N_INSNS (4), /* toint. */
1691 COSTS_N_INSNS (4), /* fromint. */
1692 COSTS_N_INSNS (4) /* roundint. */
1693 },
1694 /* FP DFmode */
1695 {
1696 COSTS_N_INSNS (31), /* div. */
1697 COSTS_N_INSNS (4), /* mult. */
1698 COSTS_N_INSNS (8), /* mult_addsub. */
1699 COSTS_N_INSNS (8), /* fma. */
1700 COSTS_N_INSNS (4), /* addsub. */
1701 COSTS_N_INSNS (2), /* fpconst. */
1702 COSTS_N_INSNS (2), /* neg. */
1703 COSTS_N_INSNS (2), /* compare. */
1704 COSTS_N_INSNS (4), /* widen. */
1705 COSTS_N_INSNS (4), /* narrow. */
1706 COSTS_N_INSNS (4), /* toint. */
1707 COSTS_N_INSNS (4), /* fromint. */
1708 COSTS_N_INSNS (4) /* roundint. */
1709 }
1710 },
1711 /* Vector */
1712 {
1713 COSTS_N_INSNS (1) /* alu. */
1714 }
1715 };
1716
1717 const struct cpu_cost_table v7m_extra_costs =
1718 {
1719 /* ALU */
1720 {
1721 0, /* arith. */
1722 0, /* logical. */
1723 0, /* shift. */
1724 0, /* shift_reg. */
1725 0, /* arith_shift. */
1726 COSTS_N_INSNS (1), /* arith_shift_reg. */
1727 0, /* log_shift. */
1728 COSTS_N_INSNS (1), /* log_shift_reg. */
1729 0, /* extend. */
1730 COSTS_N_INSNS (1), /* extend_arith. */
1731 0, /* bfi. */
1732 0, /* bfx. */
1733 0, /* clz. */
1734 0, /* rev. */
1735 COSTS_N_INSNS (1), /* non_exec. */
1736 false /* non_exec_costs_exec. */
1737 },
1738 {
1739 /* MULT SImode */
1740 {
1741 COSTS_N_INSNS (1), /* simple. */
1742 COSTS_N_INSNS (1), /* flag_setting. */
1743 COSTS_N_INSNS (2), /* extend. */
1744 COSTS_N_INSNS (1), /* add. */
1745 COSTS_N_INSNS (3), /* extend_add. */
1746 COSTS_N_INSNS (8) /* idiv. */
1747 },
1748 /* MULT DImode */
1749 {
1750 0, /* simple (N/A). */
1751 0, /* flag_setting (N/A). */
1752 COSTS_N_INSNS (2), /* extend. */
1753 0, /* add (N/A). */
1754 COSTS_N_INSNS (3), /* extend_add. */
1755 0 /* idiv (N/A). */
1756 }
1757 },
1758 /* LD/ST */
1759 {
1760 COSTS_N_INSNS (2), /* load. */
1761 0, /* load_sign_extend. */
1762 COSTS_N_INSNS (3), /* ldrd. */
1763 COSTS_N_INSNS (2), /* ldm_1st. */
1764 1, /* ldm_regs_per_insn_1st. */
1765 1, /* ldm_regs_per_insn_subsequent. */
1766 COSTS_N_INSNS (2), /* loadf. */
1767 COSTS_N_INSNS (3), /* loadd. */
1768 COSTS_N_INSNS (1), /* load_unaligned. */
1769 COSTS_N_INSNS (2), /* store. */
1770 COSTS_N_INSNS (3), /* strd. */
1771 COSTS_N_INSNS (2), /* stm_1st. */
1772 1, /* stm_regs_per_insn_1st. */
1773 1, /* stm_regs_per_insn_subsequent. */
1774 COSTS_N_INSNS (2), /* storef. */
1775 COSTS_N_INSNS (3), /* stored. */
1776 COSTS_N_INSNS (1), /* store_unaligned. */
1777 COSTS_N_INSNS (1), /* loadv. */
1778 COSTS_N_INSNS (1) /* storev. */
1779 },
1780 {
1781 /* FP SFmode */
1782 {
1783 COSTS_N_INSNS (7), /* div. */
1784 COSTS_N_INSNS (2), /* mult. */
1785 COSTS_N_INSNS (5), /* mult_addsub. */
1786 COSTS_N_INSNS (3), /* fma. */
1787 COSTS_N_INSNS (1), /* addsub. */
1788 0, /* fpconst. */
1789 0, /* neg. */
1790 0, /* compare. */
1791 0, /* widen. */
1792 0, /* narrow. */
1793 0, /* toint. */
1794 0, /* fromint. */
1795 0 /* roundint. */
1796 },
1797 /* FP DFmode */
1798 {
1799 COSTS_N_INSNS (15), /* div. */
1800 COSTS_N_INSNS (5), /* mult. */
1801 COSTS_N_INSNS (7), /* mult_addsub. */
1802 COSTS_N_INSNS (7), /* fma. */
1803 COSTS_N_INSNS (3), /* addsub. */
1804 0, /* fpconst. */
1805 0, /* neg. */
1806 0, /* compare. */
1807 0, /* widen. */
1808 0, /* narrow. */
1809 0, /* toint. */
1810 0, /* fromint. */
1811 0 /* roundint. */
1812 }
1813 },
1814 /* Vector */
1815 {
1816 COSTS_N_INSNS (1) /* alu. */
1817 }
1818 };
1819
1820 const struct addr_mode_cost_table generic_addr_mode_costs =
1821 {
1822 /* int. */
1823 {
1824 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1825 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1826 COSTS_N_INSNS (0) /* AMO_WB. */
1827 },
1828 /* float. */
1829 {
1830 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1831 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1832 COSTS_N_INSNS (0) /* AMO_WB. */
1833 },
1834 /* vector. */
1835 {
1836 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1837 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1838 COSTS_N_INSNS (0) /* AMO_WB. */
1839 }
1840 };
1841
1842 const struct tune_params arm_slowmul_tune =
1843 {
1844 &generic_extra_costs, /* Insn extra costs. */
1845 &generic_addr_mode_costs, /* Addressing mode costs. */
1846 NULL, /* Sched adj cost. */
1847 arm_default_branch_cost,
1848 &arm_default_vec_cost,
1849 3, /* Constant limit. */
1850 5, /* Max cond insns. */
1851 8, /* Memset max inline. */
1852 1, /* Issue rate. */
1853 ARM_PREFETCH_NOT_BENEFICIAL,
1854 tune_params::PREF_CONST_POOL_TRUE,
1855 tune_params::PREF_LDRD_FALSE,
1856 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1857 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1858 tune_params::DISPARAGE_FLAGS_NEITHER,
1859 tune_params::PREF_NEON_STRINGOPS_FALSE,
1860 tune_params::FUSE_NOTHING,
1861 tune_params::SCHED_AUTOPREF_OFF
1862 };
1863
1864 const struct tune_params arm_fastmul_tune =
1865 {
1866 &generic_extra_costs, /* Insn extra costs. */
1867 &generic_addr_mode_costs, /* Addressing mode costs. */
1868 NULL, /* Sched adj cost. */
1869 arm_default_branch_cost,
1870 &arm_default_vec_cost,
1871 1, /* Constant limit. */
1872 5, /* Max cond insns. */
1873 8, /* Memset max inline. */
1874 1, /* Issue rate. */
1875 ARM_PREFETCH_NOT_BENEFICIAL,
1876 tune_params::PREF_CONST_POOL_TRUE,
1877 tune_params::PREF_LDRD_FALSE,
1878 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1879 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1880 tune_params::DISPARAGE_FLAGS_NEITHER,
1881 tune_params::PREF_NEON_STRINGOPS_FALSE,
1882 tune_params::FUSE_NOTHING,
1883 tune_params::SCHED_AUTOPREF_OFF
1884 };
1885
1886 /* StrongARM has early execution of branches, so a sequence that is worth
1887 skipping is shorter. Set max_insns_skipped to a lower value. */
1888
1889 const struct tune_params arm_strongarm_tune =
1890 {
1891 &generic_extra_costs, /* Insn extra costs. */
1892 &generic_addr_mode_costs, /* Addressing mode costs. */
1893 NULL, /* Sched adj cost. */
1894 arm_default_branch_cost,
1895 &arm_default_vec_cost,
1896 1, /* Constant limit. */
1897 3, /* Max cond insns. */
1898 8, /* Memset max inline. */
1899 1, /* Issue rate. */
1900 ARM_PREFETCH_NOT_BENEFICIAL,
1901 tune_params::PREF_CONST_POOL_TRUE,
1902 tune_params::PREF_LDRD_FALSE,
1903 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1904 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1905 tune_params::DISPARAGE_FLAGS_NEITHER,
1906 tune_params::PREF_NEON_STRINGOPS_FALSE,
1907 tune_params::FUSE_NOTHING,
1908 tune_params::SCHED_AUTOPREF_OFF
1909 };
1910
1911 const struct tune_params arm_xscale_tune =
1912 {
1913 &generic_extra_costs, /* Insn extra costs. */
1914 &generic_addr_mode_costs, /* Addressing mode costs. */
1915 xscale_sched_adjust_cost,
1916 arm_default_branch_cost,
1917 &arm_default_vec_cost,
1918 2, /* Constant limit. */
1919 3, /* Max cond insns. */
1920 8, /* Memset max inline. */
1921 1, /* Issue rate. */
1922 ARM_PREFETCH_NOT_BENEFICIAL,
1923 tune_params::PREF_CONST_POOL_TRUE,
1924 tune_params::PREF_LDRD_FALSE,
1925 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1926 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1927 tune_params::DISPARAGE_FLAGS_NEITHER,
1928 tune_params::PREF_NEON_STRINGOPS_FALSE,
1929 tune_params::FUSE_NOTHING,
1930 tune_params::SCHED_AUTOPREF_OFF
1931 };
1932
1933 const struct tune_params arm_9e_tune =
1934 {
1935 &generic_extra_costs, /* Insn extra costs. */
1936 &generic_addr_mode_costs, /* Addressing mode costs. */
1937 NULL, /* Sched adj cost. */
1938 arm_default_branch_cost,
1939 &arm_default_vec_cost,
1940 1, /* Constant limit. */
1941 5, /* Max cond insns. */
1942 8, /* Memset max inline. */
1943 1, /* Issue rate. */
1944 ARM_PREFETCH_NOT_BENEFICIAL,
1945 tune_params::PREF_CONST_POOL_TRUE,
1946 tune_params::PREF_LDRD_FALSE,
1947 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1948 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1949 tune_params::DISPARAGE_FLAGS_NEITHER,
1950 tune_params::PREF_NEON_STRINGOPS_FALSE,
1951 tune_params::FUSE_NOTHING,
1952 tune_params::SCHED_AUTOPREF_OFF
1953 };
1954
1955 const struct tune_params arm_marvell_pj4_tune =
1956 {
1957 &generic_extra_costs, /* Insn extra costs. */
1958 &generic_addr_mode_costs, /* Addressing mode costs. */
1959 NULL, /* Sched adj cost. */
1960 arm_default_branch_cost,
1961 &arm_default_vec_cost,
1962 1, /* Constant limit. */
1963 5, /* Max cond insns. */
1964 8, /* Memset max inline. */
1965 2, /* Issue rate. */
1966 ARM_PREFETCH_NOT_BENEFICIAL,
1967 tune_params::PREF_CONST_POOL_TRUE,
1968 tune_params::PREF_LDRD_FALSE,
1969 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1970 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1971 tune_params::DISPARAGE_FLAGS_NEITHER,
1972 tune_params::PREF_NEON_STRINGOPS_FALSE,
1973 tune_params::FUSE_NOTHING,
1974 tune_params::SCHED_AUTOPREF_OFF
1975 };
1976
1977 const struct tune_params arm_v6t2_tune =
1978 {
1979 &generic_extra_costs, /* Insn extra costs. */
1980 &generic_addr_mode_costs, /* Addressing mode costs. */
1981 NULL, /* Sched adj cost. */
1982 arm_default_branch_cost,
1983 &arm_default_vec_cost,
1984 1, /* Constant limit. */
1985 5, /* Max cond insns. */
1986 8, /* Memset max inline. */
1987 1, /* Issue rate. */
1988 ARM_PREFETCH_NOT_BENEFICIAL,
1989 tune_params::PREF_CONST_POOL_FALSE,
1990 tune_params::PREF_LDRD_FALSE,
1991 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1992 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1993 tune_params::DISPARAGE_FLAGS_NEITHER,
1994 tune_params::PREF_NEON_STRINGOPS_FALSE,
1995 tune_params::FUSE_NOTHING,
1996 tune_params::SCHED_AUTOPREF_OFF
1997 };
1998
1999
2000 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2001 const struct tune_params arm_cortex_tune =
2002 {
2003 &generic_extra_costs,
2004 &generic_addr_mode_costs, /* Addressing mode costs. */
2005 NULL, /* Sched adj cost. */
2006 arm_default_branch_cost,
2007 &arm_default_vec_cost,
2008 1, /* Constant limit. */
2009 5, /* Max cond insns. */
2010 8, /* Memset max inline. */
2011 2, /* Issue rate. */
2012 ARM_PREFETCH_NOT_BENEFICIAL,
2013 tune_params::PREF_CONST_POOL_FALSE,
2014 tune_params::PREF_LDRD_FALSE,
2015 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2016 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2017 tune_params::DISPARAGE_FLAGS_NEITHER,
2018 tune_params::PREF_NEON_STRINGOPS_FALSE,
2019 tune_params::FUSE_NOTHING,
2020 tune_params::SCHED_AUTOPREF_OFF
2021 };
2022
2023 const struct tune_params arm_cortex_a8_tune =
2024 {
2025 &cortexa8_extra_costs,
2026 &generic_addr_mode_costs, /* Addressing mode costs. */
2027 NULL, /* Sched adj cost. */
2028 arm_default_branch_cost,
2029 &arm_default_vec_cost,
2030 1, /* Constant limit. */
2031 5, /* Max cond insns. */
2032 8, /* Memset max inline. */
2033 2, /* Issue rate. */
2034 ARM_PREFETCH_NOT_BENEFICIAL,
2035 tune_params::PREF_CONST_POOL_FALSE,
2036 tune_params::PREF_LDRD_FALSE,
2037 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2038 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2039 tune_params::DISPARAGE_FLAGS_NEITHER,
2040 tune_params::PREF_NEON_STRINGOPS_TRUE,
2041 tune_params::FUSE_NOTHING,
2042 tune_params::SCHED_AUTOPREF_OFF
2043 };
2044
2045 const struct tune_params arm_cortex_a7_tune =
2046 {
2047 &cortexa7_extra_costs,
2048 &generic_addr_mode_costs, /* Addressing mode costs. */
2049 NULL, /* Sched adj cost. */
2050 arm_default_branch_cost,
2051 &arm_default_vec_cost,
2052 1, /* Constant limit. */
2053 5, /* Max cond insns. */
2054 8, /* Memset max inline. */
2055 2, /* Issue rate. */
2056 ARM_PREFETCH_NOT_BENEFICIAL,
2057 tune_params::PREF_CONST_POOL_FALSE,
2058 tune_params::PREF_LDRD_FALSE,
2059 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2060 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2061 tune_params::DISPARAGE_FLAGS_NEITHER,
2062 tune_params::PREF_NEON_STRINGOPS_TRUE,
2063 tune_params::FUSE_NOTHING,
2064 tune_params::SCHED_AUTOPREF_OFF
2065 };
2066
2067 const struct tune_params arm_cortex_a15_tune =
2068 {
2069 &cortexa15_extra_costs,
2070 &generic_addr_mode_costs, /* Addressing mode costs. */
2071 NULL, /* Sched adj cost. */
2072 arm_default_branch_cost,
2073 &arm_default_vec_cost,
2074 1, /* Constant limit. */
2075 2, /* Max cond insns. */
2076 8, /* Memset max inline. */
2077 3, /* Issue rate. */
2078 ARM_PREFETCH_NOT_BENEFICIAL,
2079 tune_params::PREF_CONST_POOL_FALSE,
2080 tune_params::PREF_LDRD_TRUE,
2081 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2082 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2083 tune_params::DISPARAGE_FLAGS_ALL,
2084 tune_params::PREF_NEON_STRINGOPS_TRUE,
2085 tune_params::FUSE_NOTHING,
2086 tune_params::SCHED_AUTOPREF_FULL
2087 };
2088
2089 const struct tune_params arm_cortex_a35_tune =
2090 {
2091 &cortexa53_extra_costs,
2092 &generic_addr_mode_costs, /* Addressing mode costs. */
2093 NULL, /* Sched adj cost. */
2094 arm_default_branch_cost,
2095 &arm_default_vec_cost,
2096 1, /* Constant limit. */
2097 5, /* Max cond insns. */
2098 8, /* Memset max inline. */
2099 1, /* Issue rate. */
2100 ARM_PREFETCH_NOT_BENEFICIAL,
2101 tune_params::PREF_CONST_POOL_FALSE,
2102 tune_params::PREF_LDRD_FALSE,
2103 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2104 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2105 tune_params::DISPARAGE_FLAGS_NEITHER,
2106 tune_params::PREF_NEON_STRINGOPS_TRUE,
2107 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2108 tune_params::SCHED_AUTOPREF_OFF
2109 };
2110
2111 const struct tune_params arm_cortex_a53_tune =
2112 {
2113 &cortexa53_extra_costs,
2114 &generic_addr_mode_costs, /* Addressing mode costs. */
2115 NULL, /* Sched adj cost. */
2116 arm_default_branch_cost,
2117 &arm_default_vec_cost,
2118 1, /* Constant limit. */
2119 5, /* Max cond insns. */
2120 8, /* Memset max inline. */
2121 2, /* Issue rate. */
2122 ARM_PREFETCH_NOT_BENEFICIAL,
2123 tune_params::PREF_CONST_POOL_FALSE,
2124 tune_params::PREF_LDRD_FALSE,
2125 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2126 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2127 tune_params::DISPARAGE_FLAGS_NEITHER,
2128 tune_params::PREF_NEON_STRINGOPS_TRUE,
2129 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2130 tune_params::SCHED_AUTOPREF_OFF
2131 };
2132
2133 const struct tune_params arm_cortex_a57_tune =
2134 {
2135 &cortexa57_extra_costs,
2136 &generic_addr_mode_costs, /* addressing mode costs */
2137 NULL, /* Sched adj cost. */
2138 arm_default_branch_cost,
2139 &arm_default_vec_cost,
2140 1, /* Constant limit. */
2141 2, /* Max cond insns. */
2142 8, /* Memset max inline. */
2143 3, /* Issue rate. */
2144 ARM_PREFETCH_NOT_BENEFICIAL,
2145 tune_params::PREF_CONST_POOL_FALSE,
2146 tune_params::PREF_LDRD_TRUE,
2147 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2148 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2149 tune_params::DISPARAGE_FLAGS_ALL,
2150 tune_params::PREF_NEON_STRINGOPS_TRUE,
2151 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2152 tune_params::SCHED_AUTOPREF_FULL
2153 };
2154
2155 const struct tune_params arm_exynosm1_tune =
2156 {
2157 &exynosm1_extra_costs,
2158 &generic_addr_mode_costs, /* Addressing mode costs. */
2159 NULL, /* Sched adj cost. */
2160 arm_default_branch_cost,
2161 &arm_default_vec_cost,
2162 1, /* Constant limit. */
2163 2, /* Max cond insns. */
2164 8, /* Memset max inline. */
2165 3, /* Issue rate. */
2166 ARM_PREFETCH_NOT_BENEFICIAL,
2167 tune_params::PREF_CONST_POOL_FALSE,
2168 tune_params::PREF_LDRD_TRUE,
2169 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2170 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2171 tune_params::DISPARAGE_FLAGS_ALL,
2172 tune_params::PREF_NEON_STRINGOPS_TRUE,
2173 tune_params::FUSE_NOTHING,
2174 tune_params::SCHED_AUTOPREF_OFF
2175 };
2176
2177 const struct tune_params arm_xgene1_tune =
2178 {
2179 &xgene1_extra_costs,
2180 &generic_addr_mode_costs, /* Addressing mode costs. */
2181 NULL, /* Sched adj cost. */
2182 arm_default_branch_cost,
2183 &arm_default_vec_cost,
2184 1, /* Constant limit. */
2185 2, /* Max cond insns. */
2186 32, /* Memset max inline. */
2187 4, /* Issue rate. */
2188 ARM_PREFETCH_NOT_BENEFICIAL,
2189 tune_params::PREF_CONST_POOL_FALSE,
2190 tune_params::PREF_LDRD_TRUE,
2191 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2192 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2193 tune_params::DISPARAGE_FLAGS_ALL,
2194 tune_params::PREF_NEON_STRINGOPS_FALSE,
2195 tune_params::FUSE_NOTHING,
2196 tune_params::SCHED_AUTOPREF_OFF
2197 };
2198
2199 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2200 less appealing. Set max_insns_skipped to a low value. */
2201
2202 const struct tune_params arm_cortex_a5_tune =
2203 {
2204 &cortexa5_extra_costs,
2205 &generic_addr_mode_costs, /* Addressing mode costs. */
2206 NULL, /* Sched adj cost. */
2207 arm_cortex_a5_branch_cost,
2208 &arm_default_vec_cost,
2209 1, /* Constant limit. */
2210 1, /* Max cond insns. */
2211 8, /* Memset max inline. */
2212 2, /* Issue rate. */
2213 ARM_PREFETCH_NOT_BENEFICIAL,
2214 tune_params::PREF_CONST_POOL_FALSE,
2215 tune_params::PREF_LDRD_FALSE,
2216 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2217 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2218 tune_params::DISPARAGE_FLAGS_NEITHER,
2219 tune_params::PREF_NEON_STRINGOPS_TRUE,
2220 tune_params::FUSE_NOTHING,
2221 tune_params::SCHED_AUTOPREF_OFF
2222 };
2223
2224 const struct tune_params arm_cortex_a9_tune =
2225 {
2226 &cortexa9_extra_costs,
2227 &generic_addr_mode_costs, /* Addressing mode costs. */
2228 cortex_a9_sched_adjust_cost,
2229 arm_default_branch_cost,
2230 &arm_default_vec_cost,
2231 1, /* Constant limit. */
2232 5, /* Max cond insns. */
2233 8, /* Memset max inline. */
2234 2, /* Issue rate. */
2235 ARM_PREFETCH_BENEFICIAL(4,32,32),
2236 tune_params::PREF_CONST_POOL_FALSE,
2237 tune_params::PREF_LDRD_FALSE,
2238 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2239 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2240 tune_params::DISPARAGE_FLAGS_NEITHER,
2241 tune_params::PREF_NEON_STRINGOPS_FALSE,
2242 tune_params::FUSE_NOTHING,
2243 tune_params::SCHED_AUTOPREF_OFF
2244 };
2245
2246 const struct tune_params arm_cortex_a12_tune =
2247 {
2248 &cortexa12_extra_costs,
2249 &generic_addr_mode_costs, /* Addressing mode costs. */
2250 NULL, /* Sched adj cost. */
2251 arm_default_branch_cost,
2252 &arm_default_vec_cost, /* Vectorizer costs. */
2253 1, /* Constant limit. */
2254 2, /* Max cond insns. */
2255 8, /* Memset max inline. */
2256 2, /* Issue rate. */
2257 ARM_PREFETCH_NOT_BENEFICIAL,
2258 tune_params::PREF_CONST_POOL_FALSE,
2259 tune_params::PREF_LDRD_TRUE,
2260 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2261 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2262 tune_params::DISPARAGE_FLAGS_ALL,
2263 tune_params::PREF_NEON_STRINGOPS_TRUE,
2264 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2265 tune_params::SCHED_AUTOPREF_OFF
2266 };
2267
2268 const struct tune_params arm_cortex_a73_tune =
2269 {
2270 &cortexa57_extra_costs,
2271 &generic_addr_mode_costs, /* Addressing mode costs. */
2272 NULL, /* Sched adj cost. */
2273 arm_default_branch_cost,
2274 &arm_default_vec_cost, /* Vectorizer costs. */
2275 1, /* Constant limit. */
2276 2, /* Max cond insns. */
2277 8, /* Memset max inline. */
2278 2, /* Issue rate. */
2279 ARM_PREFETCH_NOT_BENEFICIAL,
2280 tune_params::PREF_CONST_POOL_FALSE,
2281 tune_params::PREF_LDRD_TRUE,
2282 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2283 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2284 tune_params::DISPARAGE_FLAGS_ALL,
2285 tune_params::PREF_NEON_STRINGOPS_TRUE,
2286 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2287 tune_params::SCHED_AUTOPREF_FULL
2288 };
2289
2290 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2291 cycle to execute each. An LDR from the constant pool also takes two cycles
2292 to execute, but mildly increases pipelining opportunity (consecutive
2293 loads/stores can be pipelined together, saving one cycle), and may also
2294 improve icache utilisation. Hence we prefer the constant pool for such
2295 processors. */
2296
2297 const struct tune_params arm_v7m_tune =
2298 {
2299 &v7m_extra_costs,
2300 &generic_addr_mode_costs, /* Addressing mode costs. */
2301 NULL, /* Sched adj cost. */
2302 arm_cortex_m_branch_cost,
2303 &arm_default_vec_cost,
2304 1, /* Constant limit. */
2305 2, /* Max cond insns. */
2306 8, /* Memset max inline. */
2307 1, /* Issue rate. */
2308 ARM_PREFETCH_NOT_BENEFICIAL,
2309 tune_params::PREF_CONST_POOL_TRUE,
2310 tune_params::PREF_LDRD_FALSE,
2311 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2312 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2313 tune_params::DISPARAGE_FLAGS_NEITHER,
2314 tune_params::PREF_NEON_STRINGOPS_FALSE,
2315 tune_params::FUSE_NOTHING,
2316 tune_params::SCHED_AUTOPREF_OFF
2317 };
2318
2319 /* Cortex-M7 tuning. */
2320
2321 const struct tune_params arm_cortex_m7_tune =
2322 {
2323 &v7m_extra_costs,
2324 &generic_addr_mode_costs, /* Addressing mode costs. */
2325 NULL, /* Sched adj cost. */
2326 arm_cortex_m7_branch_cost,
2327 &arm_default_vec_cost,
2328 0, /* Constant limit. */
2329 1, /* Max cond insns. */
2330 8, /* Memset max inline. */
2331 2, /* Issue rate. */
2332 ARM_PREFETCH_NOT_BENEFICIAL,
2333 tune_params::PREF_CONST_POOL_TRUE,
2334 tune_params::PREF_LDRD_FALSE,
2335 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2336 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2337 tune_params::DISPARAGE_FLAGS_NEITHER,
2338 tune_params::PREF_NEON_STRINGOPS_FALSE,
2339 tune_params::FUSE_NOTHING,
2340 tune_params::SCHED_AUTOPREF_OFF
2341 };
2342
2343 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2344 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2345 cortex-m23. */
2346 const struct tune_params arm_v6m_tune =
2347 {
2348 &generic_extra_costs, /* Insn extra costs. */
2349 &generic_addr_mode_costs, /* Addressing mode costs. */
2350 NULL, /* Sched adj cost. */
2351 arm_default_branch_cost,
2352 &arm_default_vec_cost, /* Vectorizer costs. */
2353 1, /* Constant limit. */
2354 5, /* Max cond insns. */
2355 8, /* Memset max inline. */
2356 1, /* Issue rate. */
2357 ARM_PREFETCH_NOT_BENEFICIAL,
2358 tune_params::PREF_CONST_POOL_FALSE,
2359 tune_params::PREF_LDRD_FALSE,
2360 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2361 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2362 tune_params::DISPARAGE_FLAGS_NEITHER,
2363 tune_params::PREF_NEON_STRINGOPS_FALSE,
2364 tune_params::FUSE_NOTHING,
2365 tune_params::SCHED_AUTOPREF_OFF
2366 };
2367
2368 const struct tune_params arm_fa726te_tune =
2369 {
2370 &generic_extra_costs, /* Insn extra costs. */
2371 &generic_addr_mode_costs, /* Addressing mode costs. */
2372 fa726te_sched_adjust_cost,
2373 arm_default_branch_cost,
2374 &arm_default_vec_cost,
2375 1, /* Constant limit. */
2376 5, /* Max cond insns. */
2377 8, /* Memset max inline. */
2378 2, /* Issue rate. */
2379 ARM_PREFETCH_NOT_BENEFICIAL,
2380 tune_params::PREF_CONST_POOL_TRUE,
2381 tune_params::PREF_LDRD_FALSE,
2382 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2383 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2384 tune_params::DISPARAGE_FLAGS_NEITHER,
2385 tune_params::PREF_NEON_STRINGOPS_FALSE,
2386 tune_params::FUSE_NOTHING,
2387 tune_params::SCHED_AUTOPREF_OFF
2388 };
2389
2390 /* Auto-generated CPU, FPU and architecture tables. */
2391 #include "arm-cpu-data.h"
2392
2393 /* The name of the preprocessor macro to define for this architecture. PROFILE
2394 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2395 is thus chosen to be big enough to hold the longest architecture name. */
2396
2397 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2398
2399 /* Supported TLS relocations. */
2400
2401 enum tls_reloc {
2402 TLS_GD32,
2403 TLS_GD32_FDPIC,
2404 TLS_LDM32,
2405 TLS_LDM32_FDPIC,
2406 TLS_LDO32,
2407 TLS_IE32,
2408 TLS_IE32_FDPIC,
2409 TLS_LE32,
2410 TLS_DESCSEQ /* GNU scheme */
2411 };
2412
2413 /* The maximum number of insns to be used when loading a constant. */
2414 inline static int
2415 arm_constant_limit (bool size_p)
2416 {
2417 return size_p ? 1 : current_tune->constant_limit;
2418 }
2419
2420 /* Emit an insn that's a simple single-set. Both the operands must be known
2421 to be valid. */
2422 inline static rtx_insn *
2423 emit_set_insn (rtx x, rtx y)
2424 {
2425 return emit_insn (gen_rtx_SET (x, y));
2426 }
2427
2428 /* Return the number of bits set in VALUE. */
2429 static unsigned
2430 bit_count (unsigned long value)
2431 {
2432 unsigned long count = 0;
2433
2434 while (value)
2435 {
2436 count++;
2437 value &= value - 1; /* Clear the least-significant set bit. */
2438 }
2439
2440 return count;
2441 }
2442
2443 /* Return the number of bits set in BMAP. */
2444 static unsigned
2445 bitmap_popcount (const sbitmap bmap)
2446 {
2447 unsigned int count = 0;
2448 unsigned int n = 0;
2449 sbitmap_iterator sbi;
2450
2451 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2452 count++;
2453 return count;
2454 }
2455
2456 typedef struct
2457 {
2458 machine_mode mode;
2459 const char *name;
2460 } arm_fixed_mode_set;
2461
2462 /* A small helper for setting fixed-point library libfuncs. */
2463
2464 static void
2465 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2466 const char *funcname, const char *modename,
2467 int num_suffix)
2468 {
2469 char buffer[50];
2470
2471 if (num_suffix == 0)
2472 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2473 else
2474 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2475
2476 set_optab_libfunc (optable, mode, buffer);
2477 }
2478
2479 static void
2480 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2481 machine_mode from, const char *funcname,
2482 const char *toname, const char *fromname)
2483 {
2484 char buffer[50];
2485 const char *maybe_suffix_2 = "";
2486
2487 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2488 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2489 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2490 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2491 maybe_suffix_2 = "2";
2492
2493 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2494 maybe_suffix_2);
2495
2496 set_conv_libfunc (optable, to, from, buffer);
2497 }
2498
2499 static GTY(()) rtx speculation_barrier_libfunc;
2500
2501 /* Record that we have no arithmetic or comparison libfuncs for
2502 machine mode MODE. */
2503
2504 static void
2505 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2506 {
2507 /* Arithmetic. */
2508 set_optab_libfunc (add_optab, mode, NULL);
2509 set_optab_libfunc (sdiv_optab, mode, NULL);
2510 set_optab_libfunc (smul_optab, mode, NULL);
2511 set_optab_libfunc (neg_optab, mode, NULL);
2512 set_optab_libfunc (sub_optab, mode, NULL);
2513
2514 /* Comparisons. */
2515 set_optab_libfunc (eq_optab, mode, NULL);
2516 set_optab_libfunc (ne_optab, mode, NULL);
2517 set_optab_libfunc (lt_optab, mode, NULL);
2518 set_optab_libfunc (le_optab, mode, NULL);
2519 set_optab_libfunc (ge_optab, mode, NULL);
2520 set_optab_libfunc (gt_optab, mode, NULL);
2521 set_optab_libfunc (unord_optab, mode, NULL);
2522 }
2523
2524 /* Set up library functions unique to ARM. */
2525 static void
2526 arm_init_libfuncs (void)
2527 {
2528 machine_mode mode_iter;
2529
2530 /* For Linux, we have access to kernel support for atomic operations. */
2531 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2532 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2533
2534 /* There are no special library functions unless we are using the
2535 ARM BPABI. */
2536 if (!TARGET_BPABI)
2537 return;
2538
2539 /* The functions below are described in Section 4 of the "Run-Time
2540 ABI for the ARM architecture", Version 1.0. */
2541
2542 /* Double-precision floating-point arithmetic. Table 2. */
2543 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2544 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2545 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2546 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2547 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2548
2549 /* Double-precision comparisons. Table 3. */
2550 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2551 set_optab_libfunc (ne_optab, DFmode, NULL);
2552 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2553 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2554 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2555 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2556 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2557
2558 /* Single-precision floating-point arithmetic. Table 4. */
2559 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2560 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2561 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2562 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2563 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2564
2565 /* Single-precision comparisons. Table 5. */
2566 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2567 set_optab_libfunc (ne_optab, SFmode, NULL);
2568 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2569 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2570 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2571 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2572 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2573
2574 /* Floating-point to integer conversions. Table 6. */
2575 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2576 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2577 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2578 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2579 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2580 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2581 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2582 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2583
2584 /* Conversions between floating types. Table 7. */
2585 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2586 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2587
2588 /* Integer to floating-point conversions. Table 8. */
2589 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2590 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2591 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2592 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2593 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2594 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2595 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2596 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2597
2598 /* Long long. Table 9. */
2599 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2600 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2601 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2602 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2603 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2604 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2605 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2606 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2607
2608 /* Integer (32/32->32) division. \S 4.3.1. */
2609 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2610 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2611
2612 /* The divmod functions are designed so that they can be used for
2613 plain division, even though they return both the quotient and the
2614 remainder. The quotient is returned in the usual location (i.e.,
2615 r0 for SImode, {r0, r1} for DImode), just as would be expected
2616 for an ordinary division routine. Because the AAPCS calling
2617 conventions specify that all of { r0, r1, r2, r3 } are
2618 callee-saved registers, there is no need to tell the compiler
2619 explicitly that those registers are clobbered by these
2620 routines. */
2621 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2622 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2623
2624 /* For SImode division the ABI provides div-without-mod routines,
2625 which are faster. */
2626 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2627 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2628
2629 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2630 divmod libcalls instead. */
2631 set_optab_libfunc (smod_optab, DImode, NULL);
2632 set_optab_libfunc (umod_optab, DImode, NULL);
2633 set_optab_libfunc (smod_optab, SImode, NULL);
2634 set_optab_libfunc (umod_optab, SImode, NULL);
2635
2636 /* Half-precision float operations. The compiler handles all operations
2637 with NULL libfuncs by converting the SFmode. */
2638 switch (arm_fp16_format)
2639 {
2640 case ARM_FP16_FORMAT_IEEE:
2641 case ARM_FP16_FORMAT_ALTERNATIVE:
2642
2643 /* Conversions. */
2644 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2645 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2646 ? "__gnu_f2h_ieee"
2647 : "__gnu_f2h_alternative"));
2648 set_conv_libfunc (sext_optab, SFmode, HFmode,
2649 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2650 ? "__gnu_h2f_ieee"
2651 : "__gnu_h2f_alternative"));
2652
2653 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2654 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2655 ? "__gnu_d2h_ieee"
2656 : "__gnu_d2h_alternative"));
2657
2658 arm_block_arith_comp_libfuncs_for_mode (HFmode);
2659 break;
2660
2661 default:
2662 break;
2663 }
2664
2665 /* For all possible libcalls in BFmode, record NULL. */
2666 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2667 {
2668 set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2669 set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2670 set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2671 set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2672 }
2673 arm_block_arith_comp_libfuncs_for_mode (BFmode);
2674
2675 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2676 {
2677 const arm_fixed_mode_set fixed_arith_modes[] =
2678 {
2679 { E_QQmode, "qq" },
2680 { E_UQQmode, "uqq" },
2681 { E_HQmode, "hq" },
2682 { E_UHQmode, "uhq" },
2683 { E_SQmode, "sq" },
2684 { E_USQmode, "usq" },
2685 { E_DQmode, "dq" },
2686 { E_UDQmode, "udq" },
2687 { E_TQmode, "tq" },
2688 { E_UTQmode, "utq" },
2689 { E_HAmode, "ha" },
2690 { E_UHAmode, "uha" },
2691 { E_SAmode, "sa" },
2692 { E_USAmode, "usa" },
2693 { E_DAmode, "da" },
2694 { E_UDAmode, "uda" },
2695 { E_TAmode, "ta" },
2696 { E_UTAmode, "uta" }
2697 };
2698 const arm_fixed_mode_set fixed_conv_modes[] =
2699 {
2700 { E_QQmode, "qq" },
2701 { E_UQQmode, "uqq" },
2702 { E_HQmode, "hq" },
2703 { E_UHQmode, "uhq" },
2704 { E_SQmode, "sq" },
2705 { E_USQmode, "usq" },
2706 { E_DQmode, "dq" },
2707 { E_UDQmode, "udq" },
2708 { E_TQmode, "tq" },
2709 { E_UTQmode, "utq" },
2710 { E_HAmode, "ha" },
2711 { E_UHAmode, "uha" },
2712 { E_SAmode, "sa" },
2713 { E_USAmode, "usa" },
2714 { E_DAmode, "da" },
2715 { E_UDAmode, "uda" },
2716 { E_TAmode, "ta" },
2717 { E_UTAmode, "uta" },
2718 { E_QImode, "qi" },
2719 { E_HImode, "hi" },
2720 { E_SImode, "si" },
2721 { E_DImode, "di" },
2722 { E_TImode, "ti" },
2723 { E_SFmode, "sf" },
2724 { E_DFmode, "df" }
2725 };
2726 unsigned int i, j;
2727
2728 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2729 {
2730 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2731 "add", fixed_arith_modes[i].name, 3);
2732 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2733 "ssadd", fixed_arith_modes[i].name, 3);
2734 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2735 "usadd", fixed_arith_modes[i].name, 3);
2736 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2737 "sub", fixed_arith_modes[i].name, 3);
2738 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2739 "sssub", fixed_arith_modes[i].name, 3);
2740 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2741 "ussub", fixed_arith_modes[i].name, 3);
2742 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2743 "mul", fixed_arith_modes[i].name, 3);
2744 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2745 "ssmul", fixed_arith_modes[i].name, 3);
2746 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2747 "usmul", fixed_arith_modes[i].name, 3);
2748 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2749 "div", fixed_arith_modes[i].name, 3);
2750 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2751 "udiv", fixed_arith_modes[i].name, 3);
2752 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2753 "ssdiv", fixed_arith_modes[i].name, 3);
2754 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2755 "usdiv", fixed_arith_modes[i].name, 3);
2756 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2757 "neg", fixed_arith_modes[i].name, 2);
2758 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2759 "ssneg", fixed_arith_modes[i].name, 2);
2760 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2761 "usneg", fixed_arith_modes[i].name, 2);
2762 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2763 "ashl", fixed_arith_modes[i].name, 3);
2764 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2765 "ashr", fixed_arith_modes[i].name, 3);
2766 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2767 "lshr", fixed_arith_modes[i].name, 3);
2768 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2769 "ssashl", fixed_arith_modes[i].name, 3);
2770 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2771 "usashl", fixed_arith_modes[i].name, 3);
2772 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2773 "cmp", fixed_arith_modes[i].name, 2);
2774 }
2775
2776 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2777 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2778 {
2779 if (i == j
2780 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2781 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2782 continue;
2783
2784 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2785 fixed_conv_modes[j].mode, "fract",
2786 fixed_conv_modes[i].name,
2787 fixed_conv_modes[j].name);
2788 arm_set_fixed_conv_libfunc (satfract_optab,
2789 fixed_conv_modes[i].mode,
2790 fixed_conv_modes[j].mode, "satfract",
2791 fixed_conv_modes[i].name,
2792 fixed_conv_modes[j].name);
2793 arm_set_fixed_conv_libfunc (fractuns_optab,
2794 fixed_conv_modes[i].mode,
2795 fixed_conv_modes[j].mode, "fractuns",
2796 fixed_conv_modes[i].name,
2797 fixed_conv_modes[j].name);
2798 arm_set_fixed_conv_libfunc (satfractuns_optab,
2799 fixed_conv_modes[i].mode,
2800 fixed_conv_modes[j].mode, "satfractuns",
2801 fixed_conv_modes[i].name,
2802 fixed_conv_modes[j].name);
2803 }
2804 }
2805
2806 if (TARGET_AAPCS_BASED)
2807 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2808
2809 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2810 }
2811
2812 /* On AAPCS systems, this is the "struct __va_list". */
2813 static GTY(()) tree va_list_type;
2814
2815 /* Return the type to use as __builtin_va_list. */
2816 static tree
2817 arm_build_builtin_va_list (void)
2818 {
2819 tree va_list_name;
2820 tree ap_field;
2821
2822 if (!TARGET_AAPCS_BASED)
2823 return std_build_builtin_va_list ();
2824
2825 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2826 defined as:
2827
2828 struct __va_list
2829 {
2830 void *__ap;
2831 };
2832
2833 The C Library ABI further reinforces this definition in \S
2834 4.1.
2835
2836 We must follow this definition exactly. The structure tag
2837 name is visible in C++ mangled names, and thus forms a part
2838 of the ABI. The field name may be used by people who
2839 #include <stdarg.h>. */
2840 /* Create the type. */
2841 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2842 /* Give it the required name. */
2843 va_list_name = build_decl (BUILTINS_LOCATION,
2844 TYPE_DECL,
2845 get_identifier ("__va_list"),
2846 va_list_type);
2847 DECL_ARTIFICIAL (va_list_name) = 1;
2848 TYPE_NAME (va_list_type) = va_list_name;
2849 TYPE_STUB_DECL (va_list_type) = va_list_name;
2850 /* Create the __ap field. */
2851 ap_field = build_decl (BUILTINS_LOCATION,
2852 FIELD_DECL,
2853 get_identifier ("__ap"),
2854 ptr_type_node);
2855 DECL_ARTIFICIAL (ap_field) = 1;
2856 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2857 TYPE_FIELDS (va_list_type) = ap_field;
2858 /* Compute its layout. */
2859 layout_type (va_list_type);
2860
2861 return va_list_type;
2862 }
2863
2864 /* Return an expression of type "void *" pointing to the next
2865 available argument in a variable-argument list. VALIST is the
2866 user-level va_list object, of type __builtin_va_list. */
2867 static tree
2868 arm_extract_valist_ptr (tree valist)
2869 {
2870 if (TREE_TYPE (valist) == error_mark_node)
2871 return error_mark_node;
2872
2873 /* On an AAPCS target, the pointer is stored within "struct
2874 va_list". */
2875 if (TARGET_AAPCS_BASED)
2876 {
2877 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2878 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2879 valist, ap_field, NULL_TREE);
2880 }
2881
2882 return valist;
2883 }
2884
2885 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2886 static void
2887 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2888 {
2889 valist = arm_extract_valist_ptr (valist);
2890 std_expand_builtin_va_start (valist, nextarg);
2891 }
2892
2893 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2894 static tree
2895 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2896 gimple_seq *post_p)
2897 {
2898 valist = arm_extract_valist_ptr (valist);
2899 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2900 }
2901
2902 /* Check any incompatible options that the user has specified. */
2903 static void
2904 arm_option_check_internal (struct gcc_options *opts)
2905 {
2906 int flags = opts->x_target_flags;
2907
2908 /* iWMMXt and NEON are incompatible. */
2909 if (TARGET_IWMMXT
2910 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2911 error ("iWMMXt and NEON are incompatible");
2912
2913 /* Make sure that the processor choice does not conflict with any of the
2914 other command line choices. */
2915 if (TARGET_ARM_P (flags)
2916 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2917 error ("target CPU does not support ARM mode");
2918
2919 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2920 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2921 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2922
2923 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2924 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2925
2926 /* If this target is normally configured to use APCS frames, warn if they
2927 are turned off and debugging is turned on. */
2928 if (TARGET_ARM_P (flags)
2929 && write_symbols != NO_DEBUG
2930 && !TARGET_APCS_FRAME
2931 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2932 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2933 "debugging");
2934
2935 /* iWMMXt unsupported under Thumb mode. */
2936 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2937 error ("iWMMXt unsupported under Thumb mode");
2938
2939 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2940 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2941
2942 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2943 {
2944 error ("RTP PIC is incompatible with Thumb");
2945 flag_pic = 0;
2946 }
2947
2948 if (target_pure_code || target_slow_flash_data)
2949 {
2950 const char *flag = (target_pure_code ? "-mpure-code" :
2951 "-mslow-flash-data");
2952 bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
2953
2954 /* We only support -mslow-flash-data on M-profile targets with
2955 MOVT. */
2956 if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
2957 error ("%s only supports non-pic code on M-profile targets with the "
2958 "MOVT instruction", flag);
2959
2960 /* We only support -mpure-code on M-profile targets. */
2961 if (target_pure_code && common_unsupported_modes)
2962 error ("%s only supports non-pic code on M-profile targets", flag);
2963
2964 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2965 -mword-relocations forbids relocation of MOVT/MOVW. */
2966 if (target_word_relocations)
2967 error ("%s incompatible with %<-mword-relocations%>", flag);
2968 }
2969 }
2970
2971 /* Recompute the global settings depending on target attribute options. */
2972
2973 static void
2974 arm_option_params_internal (void)
2975 {
2976 /* If we are not using the default (ARM mode) section anchor offset
2977 ranges, then set the correct ranges now. */
2978 if (TARGET_THUMB1)
2979 {
2980 /* Thumb-1 LDR instructions cannot have negative offsets.
2981 Permissible positive offset ranges are 5-bit (for byte loads),
2982 6-bit (for halfword loads), or 7-bit (for word loads).
2983 Empirical results suggest a 7-bit anchor range gives the best
2984 overall code size. */
2985 targetm.min_anchor_offset = 0;
2986 targetm.max_anchor_offset = 127;
2987 }
2988 else if (TARGET_THUMB2)
2989 {
2990 /* The minimum is set such that the total size of the block
2991 for a particular anchor is 248 + 1 + 4095 bytes, which is
2992 divisible by eight, ensuring natural spacing of anchors. */
2993 targetm.min_anchor_offset = -248;
2994 targetm.max_anchor_offset = 4095;
2995 }
2996 else
2997 {
2998 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2999 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3000 }
3001
3002 /* Increase the number of conditional instructions with -Os. */
3003 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3004
3005 /* For THUMB2, we limit the conditional sequence to one IT block. */
3006 if (TARGET_THUMB2)
3007 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3008
3009 if (TARGET_THUMB1)
3010 targetm.md_asm_adjust = thumb1_md_asm_adjust;
3011 else
3012 targetm.md_asm_adjust = arm_md_asm_adjust;
3013 }
3014
3015 /* True if -mflip-thumb should next add an attribute for the default
3016 mode, false if it should next add an attribute for the opposite mode. */
3017 static GTY(()) bool thumb_flipper;
3018
3019 /* Options after initial target override. */
3020 static GTY(()) tree init_optimize;
3021
3022 static void
3023 arm_override_options_after_change_1 (struct gcc_options *opts,
3024 struct gcc_options *opts_set)
3025 {
3026 /* -falign-functions without argument: supply one. */
3027 if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3028 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3029 && opts->x_optimize_size ? "2" : "4";
3030 }
3031
3032 /* Implement targetm.override_options_after_change. */
3033
3034 static void
3035 arm_override_options_after_change (void)
3036 {
3037 arm_override_options_after_change_1 (&global_options, &global_options_set);
3038 }
3039
3040 /* Implement TARGET_OPTION_RESTORE. */
3041 static void
3042 arm_option_restore (struct gcc_options */* opts */,
3043 struct gcc_options *opts_set, struct cl_target_option *ptr)
3044 {
3045 arm_configure_build_target (&arm_active_target, ptr, opts_set, false);
3046 }
3047
3048 /* Reset options between modes that the user has specified. */
3049 static void
3050 arm_option_override_internal (struct gcc_options *opts,
3051 struct gcc_options *opts_set)
3052 {
3053 arm_override_options_after_change_1 (opts, opts_set);
3054
3055 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3056 {
3057 /* The default is to enable interworking, so this warning message would
3058 be confusing to users who have just compiled with
3059 eg, -march=armv4. */
3060 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3061 opts->x_target_flags &= ~MASK_INTERWORK;
3062 }
3063
3064 if (TARGET_THUMB_P (opts->x_target_flags)
3065 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3066 {
3067 warning (0, "target CPU does not support THUMB instructions");
3068 opts->x_target_flags &= ~MASK_THUMB;
3069 }
3070
3071 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3072 {
3073 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3074 opts->x_target_flags &= ~MASK_APCS_FRAME;
3075 }
3076
3077 /* Callee super interworking implies thumb interworking. Adding
3078 this to the flags here simplifies the logic elsewhere. */
3079 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3080 opts->x_target_flags |= MASK_INTERWORK;
3081
3082 /* need to remember initial values so combinaisons of options like
3083 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3084 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3085
3086 if (! opts_set->x_arm_restrict_it)
3087 opts->x_arm_restrict_it = arm_arch8;
3088
3089 /* ARM execution state and M profile don't have [restrict] IT. */
3090 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3091 opts->x_arm_restrict_it = 0;
3092
3093 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3094 if (!opts_set->x_arm_restrict_it
3095 && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3096 opts->x_arm_restrict_it = 0;
3097
3098 /* Enable -munaligned-access by default for
3099 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3100 i.e. Thumb2 and ARM state only.
3101 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3102 - ARMv8 architecture-base processors.
3103
3104 Disable -munaligned-access by default for
3105 - all pre-ARMv6 architecture-based processors
3106 - ARMv6-M architecture-based processors
3107 - ARMv8-M Baseline processors. */
3108
3109 if (! opts_set->x_unaligned_access)
3110 {
3111 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3112 && arm_arch6 && (arm_arch_notm || arm_arch7));
3113 }
3114 else if (opts->x_unaligned_access == 1
3115 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3116 {
3117 warning (0, "target CPU does not support unaligned accesses");
3118 opts->x_unaligned_access = 0;
3119 }
3120
3121 /* Don't warn since it's on by default in -O2. */
3122 if (TARGET_THUMB1_P (opts->x_target_flags))
3123 opts->x_flag_schedule_insns = 0;
3124 else
3125 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3126
3127 /* Disable shrink-wrap when optimizing function for size, since it tends to
3128 generate additional returns. */
3129 if (optimize_function_for_size_p (cfun)
3130 && TARGET_THUMB2_P (opts->x_target_flags))
3131 opts->x_flag_shrink_wrap = false;
3132 else
3133 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3134
3135 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3136 - epilogue_insns - does not accurately model the corresponding insns
3137 emitted in the asm file. In particular, see the comment in thumb_exit
3138 'Find out how many of the (return) argument registers we can corrupt'.
3139 As a consequence, the epilogue may clobber registers without fipa-ra
3140 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3141 TODO: Accurately model clobbers for epilogue_insns and reenable
3142 fipa-ra. */
3143 if (TARGET_THUMB1_P (opts->x_target_flags))
3144 opts->x_flag_ipa_ra = 0;
3145 else
3146 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3147
3148 /* Thumb2 inline assembly code should always use unified syntax.
3149 This will apply to ARM and Thumb1 eventually. */
3150 if (TARGET_THUMB2_P (opts->x_target_flags))
3151 opts->x_inline_asm_unified = true;
3152
3153 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3154 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3155 #endif
3156 }
3157
3158 static sbitmap isa_all_fpubits_internal;
3159 static sbitmap isa_all_fpbits;
3160 static sbitmap isa_quirkbits;
3161
3162 /* Configure a build target TARGET from the user-specified options OPTS and
3163 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3164 architecture have been specified, but the two are not identical. */
3165 void
3166 arm_configure_build_target (struct arm_build_target *target,
3167 struct cl_target_option *opts,
3168 struct gcc_options *opts_set,
3169 bool warn_compatible)
3170 {
3171 const cpu_option *arm_selected_tune = NULL;
3172 const arch_option *arm_selected_arch = NULL;
3173 const cpu_option *arm_selected_cpu = NULL;
3174 const arm_fpu_desc *arm_selected_fpu = NULL;
3175 const char *tune_opts = NULL;
3176 const char *arch_opts = NULL;
3177 const char *cpu_opts = NULL;
3178
3179 bitmap_clear (target->isa);
3180 target->core_name = NULL;
3181 target->arch_name = NULL;
3182
3183 if (opts_set->x_arm_arch_string)
3184 {
3185 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3186 "-march",
3187 opts->x_arm_arch_string);
3188 arch_opts = strchr (opts->x_arm_arch_string, '+');
3189 }
3190
3191 if (opts_set->x_arm_cpu_string)
3192 {
3193 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3194 opts->x_arm_cpu_string);
3195 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3196 arm_selected_tune = arm_selected_cpu;
3197 /* If taking the tuning from -mcpu, we don't need to rescan the
3198 options for tuning. */
3199 }
3200
3201 if (opts_set->x_arm_tune_string)
3202 {
3203 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3204 opts->x_arm_tune_string);
3205 tune_opts = strchr (opts->x_arm_tune_string, '+');
3206 }
3207
3208 if (arm_selected_arch)
3209 {
3210 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3211 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3212 arch_opts);
3213
3214 if (arm_selected_cpu)
3215 {
3216 auto_sbitmap cpu_isa (isa_num_bits);
3217 auto_sbitmap isa_delta (isa_num_bits);
3218
3219 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3220 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3221 cpu_opts);
3222 bitmap_xor (isa_delta, cpu_isa, target->isa);
3223 /* Ignore any bits that are quirk bits. */
3224 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3225 /* Ignore (for now) any bits that might be set by -mfpu. */
3226 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits_internal);
3227
3228 /* And if the target ISA lacks floating point, ignore any
3229 extensions that depend on that. */
3230 if (!bitmap_bit_p (target->isa, isa_bit_vfpv2))
3231 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3232
3233 if (!bitmap_empty_p (isa_delta))
3234 {
3235 if (warn_compatible)
3236 warning (0, "switch %<-mcpu=%s%> conflicts "
3237 "with %<-march=%s%> switch",
3238 arm_selected_cpu->common.name,
3239 arm_selected_arch->common.name);
3240 /* -march wins for code generation.
3241 -mcpu wins for default tuning. */
3242 if (!arm_selected_tune)
3243 arm_selected_tune = arm_selected_cpu;
3244
3245 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3246 target->arch_name = arm_selected_arch->common.name;
3247 }
3248 else
3249 {
3250 /* Architecture and CPU are essentially the same.
3251 Prefer the CPU setting. */
3252 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3253 target->core_name = arm_selected_cpu->common.name;
3254 /* Copy the CPU's capabilities, so that we inherit the
3255 appropriate extensions and quirks. */
3256 bitmap_copy (target->isa, cpu_isa);
3257 }
3258 }
3259 else
3260 {
3261 /* Pick a CPU based on the architecture. */
3262 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3263 target->arch_name = arm_selected_arch->common.name;
3264 /* Note: target->core_name is left unset in this path. */
3265 }
3266 }
3267 else if (arm_selected_cpu)
3268 {
3269 target->core_name = arm_selected_cpu->common.name;
3270 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3271 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3272 cpu_opts);
3273 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3274 }
3275 /* If the user did not specify a processor or architecture, choose
3276 one for them. */
3277 else
3278 {
3279 const cpu_option *sel;
3280 auto_sbitmap sought_isa (isa_num_bits);
3281 bitmap_clear (sought_isa);
3282 auto_sbitmap default_isa (isa_num_bits);
3283
3284 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3285 TARGET_CPU_DEFAULT);
3286 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3287 gcc_assert (arm_selected_cpu->common.name);
3288
3289 /* RWE: All of the selection logic below (to the end of this
3290 'if' clause) looks somewhat suspect. It appears to be mostly
3291 there to support forcing thumb support when the default CPU
3292 does not have thumb (somewhat dubious in terms of what the
3293 user might be expecting). I think it should be removed once
3294 support for the pre-thumb era cores is removed. */
3295 sel = arm_selected_cpu;
3296 arm_initialize_isa (default_isa, sel->common.isa_bits);
3297 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3298 cpu_opts);
3299
3300 /* Now check to see if the user has specified any command line
3301 switches that require certain abilities from the cpu. */
3302
3303 if (TARGET_INTERWORK || TARGET_THUMB)
3304 bitmap_set_bit (sought_isa, isa_bit_thumb);
3305
3306 /* If there are such requirements and the default CPU does not
3307 satisfy them, we need to run over the complete list of
3308 cores looking for one that is satisfactory. */
3309 if (!bitmap_empty_p (sought_isa)
3310 && !bitmap_subset_p (sought_isa, default_isa))
3311 {
3312 auto_sbitmap candidate_isa (isa_num_bits);
3313 /* We're only interested in a CPU with at least the
3314 capabilities of the default CPU and the required
3315 additional features. */
3316 bitmap_ior (default_isa, default_isa, sought_isa);
3317
3318 /* Try to locate a CPU type that supports all of the abilities
3319 of the default CPU, plus the extra abilities requested by
3320 the user. */
3321 for (sel = all_cores; sel->common.name != NULL; sel++)
3322 {
3323 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3324 /* An exact match? */
3325 if (bitmap_equal_p (default_isa, candidate_isa))
3326 break;
3327 }
3328
3329 if (sel->common.name == NULL)
3330 {
3331 unsigned current_bit_count = isa_num_bits;
3332 const cpu_option *best_fit = NULL;
3333
3334 /* Ideally we would like to issue an error message here
3335 saying that it was not possible to find a CPU compatible
3336 with the default CPU, but which also supports the command
3337 line options specified by the programmer, and so they
3338 ought to use the -mcpu=<name> command line option to
3339 override the default CPU type.
3340
3341 If we cannot find a CPU that has exactly the
3342 characteristics of the default CPU and the given
3343 command line options we scan the array again looking
3344 for a best match. The best match must have at least
3345 the capabilities of the perfect match. */
3346 for (sel = all_cores; sel->common.name != NULL; sel++)
3347 {
3348 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3349
3350 if (bitmap_subset_p (default_isa, candidate_isa))
3351 {
3352 unsigned count;
3353
3354 bitmap_and_compl (candidate_isa, candidate_isa,
3355 default_isa);
3356 count = bitmap_popcount (candidate_isa);
3357
3358 if (count < current_bit_count)
3359 {
3360 best_fit = sel;
3361 current_bit_count = count;
3362 }
3363 }
3364
3365 gcc_assert (best_fit);
3366 sel = best_fit;
3367 }
3368 }
3369 arm_selected_cpu = sel;
3370 }
3371
3372 /* Now we know the CPU, we can finally initialize the target
3373 structure. */
3374 target->core_name = arm_selected_cpu->common.name;
3375 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3376 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3377 cpu_opts);
3378 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3379 }
3380
3381 gcc_assert (arm_selected_cpu);
3382 gcc_assert (arm_selected_arch);
3383
3384 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3385 {
3386 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3387 auto_sbitmap fpu_bits (isa_num_bits);
3388
3389 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3390 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3391 bitmap_ior (target->isa, target->isa, fpu_bits);
3392 }
3393
3394 /* There may be implied bits which we still need to enable. These are
3395 non-named features which are needed to complete other sets of features,
3396 but cannot be enabled from arm-cpus.in due to being shared between
3397 multiple fgroups. Each entry in all_implied_fbits is of the form
3398 ante -> cons, meaning that if the feature "ante" is enabled, we should
3399 implicitly enable "cons". */
3400 const struct fbit_implication *impl = all_implied_fbits;
3401 while (impl->ante)
3402 {
3403 if (bitmap_bit_p (target->isa, impl->ante))
3404 bitmap_set_bit (target->isa, impl->cons);
3405 impl++;
3406 }
3407
3408 if (!arm_selected_tune)
3409 arm_selected_tune = arm_selected_cpu;
3410 else /* Validate the features passed to -mtune. */
3411 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3412
3413 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3414
3415 /* Finish initializing the target structure. */
3416 target->arch_pp_name = arm_selected_arch->arch;
3417 target->base_arch = arm_selected_arch->base_arch;
3418 target->profile = arm_selected_arch->profile;
3419
3420 target->tune_flags = tune_data->tune_flags;
3421 target->tune = tune_data->tune;
3422 target->tune_core = tune_data->scheduler;
3423 arm_option_reconfigure_globals ();
3424 }
3425
3426 /* Fix up any incompatible options that the user has specified. */
3427 static void
3428 arm_option_override (void)
3429 {
3430 static const enum isa_feature fpu_bitlist_internal[]
3431 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3432 static const enum isa_feature fp_bitlist[]
3433 = { ISA_ALL_FP, isa_nobit };
3434 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3435 cl_target_option opts;
3436
3437 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3438 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3439
3440 isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3441 isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3442 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3443 arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3444
3445 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3446
3447 if (!global_options_set.x_arm_fpu_index)
3448 {
3449 bool ok;
3450 int fpu_index;
3451
3452 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3453 CL_TARGET);
3454 gcc_assert (ok);
3455 arm_fpu_index = (enum fpu_type) fpu_index;
3456 }
3457
3458 cl_target_option_save (&opts, &global_options, &global_options_set);
3459 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3460 true);
3461
3462 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3463 SUBTARGET_OVERRIDE_OPTIONS;
3464 #endif
3465
3466 /* Initialize boolean versions of the architectural flags, for use
3467 in the arm.md file and for enabling feature flags. */
3468 arm_option_reconfigure_globals ();
3469
3470 arm_tune = arm_active_target.tune_core;
3471 tune_flags = arm_active_target.tune_flags;
3472 current_tune = arm_active_target.tune;
3473
3474 /* TBD: Dwarf info for apcs frame is not handled yet. */
3475 if (TARGET_APCS_FRAME)
3476 flag_shrink_wrap = false;
3477
3478 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3479 {
3480 warning (0, "%<-mapcs-stack-check%> incompatible with "
3481 "%<-mno-apcs-frame%>");
3482 target_flags |= MASK_APCS_FRAME;
3483 }
3484
3485 if (TARGET_POKE_FUNCTION_NAME)
3486 target_flags |= MASK_APCS_FRAME;
3487
3488 if (TARGET_APCS_REENT && flag_pic)
3489 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3490
3491 if (TARGET_APCS_REENT)
3492 warning (0, "APCS reentrant code not supported. Ignored");
3493
3494 /* Set up some tuning parameters. */
3495 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3496 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3497 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3498 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3499 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3500 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3501
3502 /* For arm2/3 there is no need to do any scheduling if we are doing
3503 software floating-point. */
3504 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3505 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3506
3507 /* Override the default structure alignment for AAPCS ABI. */
3508 if (!global_options_set.x_arm_structure_size_boundary)
3509 {
3510 if (TARGET_AAPCS_BASED)
3511 arm_structure_size_boundary = 8;
3512 }
3513 else
3514 {
3515 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3516
3517 if (arm_structure_size_boundary != 8
3518 && arm_structure_size_boundary != 32
3519 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3520 {
3521 if (ARM_DOUBLEWORD_ALIGN)
3522 warning (0,
3523 "structure size boundary can only be set to 8, 32 or 64");
3524 else
3525 warning (0, "structure size boundary can only be set to 8 or 32");
3526 arm_structure_size_boundary
3527 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3528 }
3529 }
3530
3531 if (TARGET_VXWORKS_RTP)
3532 {
3533 if (!global_options_set.x_arm_pic_data_is_text_relative)
3534 arm_pic_data_is_text_relative = 0;
3535 }
3536 else if (flag_pic
3537 && !arm_pic_data_is_text_relative
3538 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3539 /* When text & data segments don't have a fixed displacement, the
3540 intended use is with a single, read only, pic base register.
3541 Unless the user explicitly requested not to do that, set
3542 it. */
3543 target_flags |= MASK_SINGLE_PIC_BASE;
3544
3545 /* If stack checking is disabled, we can use r10 as the PIC register,
3546 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3547 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3548 {
3549 if (TARGET_VXWORKS_RTP)
3550 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3551 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3552 }
3553
3554 if (flag_pic && TARGET_VXWORKS_RTP)
3555 arm_pic_register = 9;
3556
3557 /* If in FDPIC mode then force arm_pic_register to be r9. */
3558 if (TARGET_FDPIC)
3559 {
3560 arm_pic_register = FDPIC_REGNUM;
3561 if (TARGET_THUMB1)
3562 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3563 }
3564
3565 if (arm_pic_register_string != NULL)
3566 {
3567 int pic_register = decode_reg_name (arm_pic_register_string);
3568
3569 if (!flag_pic)
3570 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3571
3572 /* Prevent the user from choosing an obviously stupid PIC register. */
3573 else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3574 || pic_register == HARD_FRAME_POINTER_REGNUM
3575 || pic_register == STACK_POINTER_REGNUM
3576 || pic_register >= PC_REGNUM
3577 || (TARGET_VXWORKS_RTP
3578 && (unsigned int) pic_register != arm_pic_register))
3579 error ("unable to use %qs for PIC register", arm_pic_register_string);
3580 else
3581 arm_pic_register = pic_register;
3582 }
3583
3584 if (flag_pic)
3585 target_word_relocations = 1;
3586
3587 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3588 if (fix_cm3_ldrd == 2)
3589 {
3590 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3591 fix_cm3_ldrd = 1;
3592 else
3593 fix_cm3_ldrd = 0;
3594 }
3595
3596 /* Hot/Cold partitioning is not currently supported, since we can't
3597 handle literal pool placement in that case. */
3598 if (flag_reorder_blocks_and_partition)
3599 {
3600 inform (input_location,
3601 "%<-freorder-blocks-and-partition%> not supported "
3602 "on this architecture");
3603 flag_reorder_blocks_and_partition = 0;
3604 flag_reorder_blocks = 1;
3605 }
3606
3607 if (flag_pic)
3608 /* Hoisting PIC address calculations more aggressively provides a small,
3609 but measurable, size reduction for PIC code. Therefore, we decrease
3610 the bar for unrestricted expression hoisting to the cost of PIC address
3611 calculation, which is 2 instructions. */
3612 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3613 param_gcse_unrestricted_cost, 2);
3614
3615 /* ARM EABI defaults to strict volatile bitfields. */
3616 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3617 && abi_version_at_least(2))
3618 flag_strict_volatile_bitfields = 1;
3619
3620 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3621 have deemed it beneficial (signified by setting
3622 prefetch.num_slots to 1 or more). */
3623 if (flag_prefetch_loop_arrays < 0
3624 && HAVE_prefetch
3625 && optimize >= 3
3626 && current_tune->prefetch.num_slots > 0)
3627 flag_prefetch_loop_arrays = 1;
3628
3629 /* Set up parameters to be used in prefetching algorithm. Do not
3630 override the defaults unless we are tuning for a core we have
3631 researched values for. */
3632 if (current_tune->prefetch.num_slots > 0)
3633 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3634 param_simultaneous_prefetches,
3635 current_tune->prefetch.num_slots);
3636 if (current_tune->prefetch.l1_cache_line_size >= 0)
3637 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3638 param_l1_cache_line_size,
3639 current_tune->prefetch.l1_cache_line_size);
3640 if (current_tune->prefetch.l1_cache_size >= 0)
3641 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3642 param_l1_cache_size,
3643 current_tune->prefetch.l1_cache_size);
3644
3645 /* Look through ready list and all of queue for instructions
3646 relevant for L2 auto-prefetcher. */
3647 int sched_autopref_queue_depth;
3648
3649 switch (current_tune->sched_autopref)
3650 {
3651 case tune_params::SCHED_AUTOPREF_OFF:
3652 sched_autopref_queue_depth = -1;
3653 break;
3654
3655 case tune_params::SCHED_AUTOPREF_RANK:
3656 sched_autopref_queue_depth = 0;
3657 break;
3658
3659 case tune_params::SCHED_AUTOPREF_FULL:
3660 sched_autopref_queue_depth = max_insn_queue_index + 1;
3661 break;
3662
3663 default:
3664 gcc_unreachable ();
3665 }
3666
3667 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3668 param_sched_autopref_queue_depth,
3669 sched_autopref_queue_depth);
3670
3671 /* Currently, for slow flash data, we just disable literal pools. We also
3672 disable it for pure-code. */
3673 if (target_slow_flash_data || target_pure_code)
3674 arm_disable_literal_pool = true;
3675
3676 /* Disable scheduling fusion by default if it's not armv7 processor
3677 or doesn't prefer ldrd/strd. */
3678 if (flag_schedule_fusion == 2
3679 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3680 flag_schedule_fusion = 0;
3681
3682 /* Need to remember initial options before they are overriden. */
3683 init_optimize = build_optimization_node (&global_options,
3684 &global_options_set);
3685
3686 arm_options_perform_arch_sanity_checks ();
3687 arm_option_override_internal (&global_options, &global_options_set);
3688 arm_option_check_internal (&global_options);
3689 arm_option_params_internal ();
3690
3691 /* Create the default target_options structure. */
3692 target_option_default_node = target_option_current_node
3693 = build_target_option_node (&global_options, &global_options_set);
3694
3695 /* Register global variables with the garbage collector. */
3696 arm_add_gc_roots ();
3697
3698 /* Init initial mode for testing. */
3699 thumb_flipper = TARGET_THUMB;
3700 }
3701
3702
3703 /* Reconfigure global status flags from the active_target.isa. */
3704 void
3705 arm_option_reconfigure_globals (void)
3706 {
3707 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3708 arm_base_arch = arm_active_target.base_arch;
3709
3710 /* Initialize boolean versions of the architectural flags, for use
3711 in the arm.md file. */
3712 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3713 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3714 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3715 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3716 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3717 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3718 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3719 arm_arch6m = arm_arch6 && !arm_arch_notm;
3720 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3721 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3722 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3723 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3724 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3725 arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3726 arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3727 arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3728 isa_bit_armv8_1m_main);
3729 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3730 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3731 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3732 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3733 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3734 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3735 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3736 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3737 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3738 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3739 arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3740 arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3741
3742 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3743 if (arm_fp16_inst)
3744 {
3745 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3746 error ("selected fp16 options are incompatible");
3747 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3748 }
3749
3750 arm_arch_cde = 0;
3751 arm_arch_cde_coproc = 0;
3752 int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3753 isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3754 isa_bit_cdecp6, isa_bit_cdecp7};
3755 for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3756 {
3757 int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3758 if (cde_bit)
3759 {
3760 arm_arch_cde |= cde_bit;
3761 arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3762 }
3763 }
3764
3765 /* And finally, set up some quirks. */
3766 arm_arch_no_volatile_ce
3767 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3768 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3769 isa_bit_quirk_armv6kz);
3770
3771 /* Use the cp15 method if it is available. */
3772 if (target_thread_pointer == TP_AUTO)
3773 {
3774 if (arm_arch6k && !TARGET_THUMB1)
3775 target_thread_pointer = TP_CP15;
3776 else
3777 target_thread_pointer = TP_SOFT;
3778 }
3779 }
3780
3781 /* Perform some validation between the desired architecture and the rest of the
3782 options. */
3783 void
3784 arm_options_perform_arch_sanity_checks (void)
3785 {
3786 /* V5T code we generate is completely interworking capable, so we turn off
3787 TARGET_INTERWORK here to avoid many tests later on. */
3788
3789 /* XXX However, we must pass the right pre-processor defines to CPP
3790 or GLD can get confused. This is a hack. */
3791 if (TARGET_INTERWORK)
3792 arm_cpp_interwork = 1;
3793
3794 if (arm_arch5t)
3795 target_flags &= ~MASK_INTERWORK;
3796
3797 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3798 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3799
3800 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3801 error ("iwmmxt abi requires an iwmmxt capable cpu");
3802
3803 /* BPABI targets use linker tricks to allow interworking on cores
3804 without thumb support. */
3805 if (TARGET_INTERWORK
3806 && !TARGET_BPABI
3807 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3808 {
3809 warning (0, "target CPU does not support interworking" );
3810 target_flags &= ~MASK_INTERWORK;
3811 }
3812
3813 /* If soft-float is specified then don't use FPU. */
3814 if (TARGET_SOFT_FLOAT)
3815 arm_fpu_attr = FPU_NONE;
3816 else
3817 arm_fpu_attr = FPU_VFP;
3818
3819 if (TARGET_AAPCS_BASED)
3820 {
3821 if (TARGET_CALLER_INTERWORKING)
3822 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3823 else
3824 if (TARGET_CALLEE_INTERWORKING)
3825 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3826 }
3827
3828 /* __fp16 support currently assumes the core has ldrh. */
3829 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3830 sorry ("__fp16 and no ldrh");
3831
3832 if (use_cmse && !arm_arch_cmse)
3833 error ("target CPU does not support ARMv8-M Security Extensions");
3834
3835 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3836 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3837 if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3838 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3839
3840
3841 if (TARGET_AAPCS_BASED)
3842 {
3843 if (arm_abi == ARM_ABI_IWMMXT)
3844 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3845 else if (TARGET_HARD_FLOAT_ABI)
3846 {
3847 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3848 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
3849 && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
3850 error ("%<-mfloat-abi=hard%>: selected processor lacks an FPU");
3851 }
3852 else
3853 arm_pcs_default = ARM_PCS_AAPCS;
3854 }
3855 else
3856 {
3857 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3858 sorry ("%<-mfloat-abi=hard%> and VFP");
3859
3860 if (arm_abi == ARM_ABI_APCS)
3861 arm_pcs_default = ARM_PCS_APCS;
3862 else
3863 arm_pcs_default = ARM_PCS_ATPCS;
3864 }
3865 }
3866
3867 /* Test whether a local function descriptor is canonical, i.e.,
3868 whether we can use GOTOFFFUNCDESC to compute the address of the
3869 function. */
3870 static bool
3871 arm_fdpic_local_funcdesc_p (rtx fnx)
3872 {
3873 tree fn;
3874 enum symbol_visibility vis;
3875 bool ret;
3876
3877 if (!TARGET_FDPIC)
3878 return true;
3879
3880 if (! SYMBOL_REF_LOCAL_P (fnx))
3881 return false;
3882
3883 fn = SYMBOL_REF_DECL (fnx);
3884
3885 if (! fn)
3886 return false;
3887
3888 vis = DECL_VISIBILITY (fn);
3889
3890 if (vis == VISIBILITY_PROTECTED)
3891 /* Private function descriptors for protected functions are not
3892 canonical. Temporarily change the visibility to global so that
3893 we can ensure uniqueness of funcdesc pointers. */
3894 DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
3895
3896 ret = default_binds_local_p_1 (fn, flag_pic);
3897
3898 DECL_VISIBILITY (fn) = vis;
3899
3900 return ret;
3901 }
3902
3903 static void
3904 arm_add_gc_roots (void)
3905 {
3906 gcc_obstack_init(&minipool_obstack);
3907 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3908 }
3909 \f
3910 /* A table of known ARM exception types.
3911 For use with the interrupt function attribute. */
3912
3913 typedef struct
3914 {
3915 const char *const arg;
3916 const unsigned long return_value;
3917 }
3918 isr_attribute_arg;
3919
3920 static const isr_attribute_arg isr_attribute_args [] =
3921 {
3922 { "IRQ", ARM_FT_ISR },
3923 { "irq", ARM_FT_ISR },
3924 { "FIQ", ARM_FT_FIQ },
3925 { "fiq", ARM_FT_FIQ },
3926 { "ABORT", ARM_FT_ISR },
3927 { "abort", ARM_FT_ISR },
3928 { "UNDEF", ARM_FT_EXCEPTION },
3929 { "undef", ARM_FT_EXCEPTION },
3930 { "SWI", ARM_FT_EXCEPTION },
3931 { "swi", ARM_FT_EXCEPTION },
3932 { NULL, ARM_FT_NORMAL }
3933 };
3934
3935 /* Returns the (interrupt) function type of the current
3936 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3937
3938 static unsigned long
3939 arm_isr_value (tree argument)
3940 {
3941 const isr_attribute_arg * ptr;
3942 const char * arg;
3943
3944 if (!arm_arch_notm)
3945 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3946
3947 /* No argument - default to IRQ. */
3948 if (argument == NULL_TREE)
3949 return ARM_FT_ISR;
3950
3951 /* Get the value of the argument. */
3952 if (TREE_VALUE (argument) == NULL_TREE
3953 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3954 return ARM_FT_UNKNOWN;
3955
3956 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3957
3958 /* Check it against the list of known arguments. */
3959 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3960 if (streq (arg, ptr->arg))
3961 return ptr->return_value;
3962
3963 /* An unrecognized interrupt type. */
3964 return ARM_FT_UNKNOWN;
3965 }
3966
3967 /* Computes the type of the current function. */
3968
3969 static unsigned long
3970 arm_compute_func_type (void)
3971 {
3972 unsigned long type = ARM_FT_UNKNOWN;
3973 tree a;
3974 tree attr;
3975
3976 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3977
3978 /* Decide if the current function is volatile. Such functions
3979 never return, and many memory cycles can be saved by not storing
3980 register values that will never be needed again. This optimization
3981 was added to speed up context switching in a kernel application. */
3982 if (optimize > 0
3983 && (TREE_NOTHROW (current_function_decl)
3984 || !(flag_unwind_tables
3985 || (flag_exceptions
3986 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3987 && TREE_THIS_VOLATILE (current_function_decl))
3988 type |= ARM_FT_VOLATILE;
3989
3990 if (cfun->static_chain_decl != NULL)
3991 type |= ARM_FT_NESTED;
3992
3993 attr = DECL_ATTRIBUTES (current_function_decl);
3994
3995 a = lookup_attribute ("naked", attr);
3996 if (a != NULL_TREE)
3997 type |= ARM_FT_NAKED;
3998
3999 a = lookup_attribute ("isr", attr);
4000 if (a == NULL_TREE)
4001 a = lookup_attribute ("interrupt", attr);
4002
4003 if (a == NULL_TREE)
4004 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4005 else
4006 type |= arm_isr_value (TREE_VALUE (a));
4007
4008 if (lookup_attribute ("cmse_nonsecure_entry", attr))
4009 type |= ARM_FT_CMSE_ENTRY;
4010
4011 return type;
4012 }
4013
4014 /* Returns the type of the current function. */
4015
4016 unsigned long
4017 arm_current_func_type (void)
4018 {
4019 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4020 cfun->machine->func_type = arm_compute_func_type ();
4021
4022 return cfun->machine->func_type;
4023 }
4024
4025 bool
4026 arm_allocate_stack_slots_for_args (void)
4027 {
4028 /* Naked functions should not allocate stack slots for arguments. */
4029 return !IS_NAKED (arm_current_func_type ());
4030 }
4031
4032 static bool
4033 arm_warn_func_return (tree decl)
4034 {
4035 /* Naked functions are implemented entirely in assembly, including the
4036 return sequence, so suppress warnings about this. */
4037 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4038 }
4039
4040 \f
4041 /* Output assembler code for a block containing the constant parts
4042 of a trampoline, leaving space for the variable parts.
4043
4044 On the ARM, (if r8 is the static chain regnum, and remembering that
4045 referencing pc adds an offset of 8) the trampoline looks like:
4046 ldr r8, [pc, #0]
4047 ldr pc, [pc]
4048 .word static chain value
4049 .word function's address
4050 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4051
4052 In FDPIC mode, the trampoline looks like:
4053 .word trampoline address
4054 .word trampoline GOT address
4055 ldr r12, [pc, #8] ; #4 for Arm mode
4056 ldr r9, [pc, #8] ; #4 for Arm mode
4057 ldr pc, [pc, #8] ; #4 for Arm mode
4058 .word static chain value
4059 .word GOT address
4060 .word function's address
4061 */
4062
4063 static void
4064 arm_asm_trampoline_template (FILE *f)
4065 {
4066 fprintf (f, "\t.syntax unified\n");
4067
4068 if (TARGET_FDPIC)
4069 {
4070 /* The first two words are a function descriptor pointing to the
4071 trampoline code just below. */
4072 if (TARGET_ARM)
4073 fprintf (f, "\t.arm\n");
4074 else if (TARGET_THUMB2)
4075 fprintf (f, "\t.thumb\n");
4076 else
4077 /* Only ARM and Thumb-2 are supported. */
4078 gcc_unreachable ();
4079
4080 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4081 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4082 /* Trampoline code which sets the static chain register but also
4083 PIC register before jumping into real code. */
4084 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4085 STATIC_CHAIN_REGNUM, PC_REGNUM,
4086 TARGET_THUMB2 ? 8 : 4);
4087 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4088 PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4089 TARGET_THUMB2 ? 8 : 4);
4090 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4091 PC_REGNUM, PC_REGNUM,
4092 TARGET_THUMB2 ? 8 : 4);
4093 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4094 }
4095 else if (TARGET_ARM)
4096 {
4097 fprintf (f, "\t.arm\n");
4098 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4099 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4100 }
4101 else if (TARGET_THUMB2)
4102 {
4103 fprintf (f, "\t.thumb\n");
4104 /* The Thumb-2 trampoline is similar to the arm implementation.
4105 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4106 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4107 STATIC_CHAIN_REGNUM, PC_REGNUM);
4108 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4109 }
4110 else
4111 {
4112 ASM_OUTPUT_ALIGN (f, 2);
4113 fprintf (f, "\t.code\t16\n");
4114 fprintf (f, ".Ltrampoline_start:\n");
4115 asm_fprintf (f, "\tpush\t{r0, r1}\n");
4116 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4117 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4118 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4119 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4120 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4121 }
4122 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4123 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4124 }
4125
4126 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4127
4128 static void
4129 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4130 {
4131 rtx fnaddr, mem, a_tramp;
4132
4133 emit_block_move (m_tramp, assemble_trampoline_template (),
4134 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4135
4136 if (TARGET_FDPIC)
4137 {
4138 rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4139 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4140 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4141 /* The function start address is at offset 8, but in Thumb mode
4142 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4143 below. */
4144 rtx trampoline_code_start
4145 = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4146
4147 /* Write initial funcdesc which points to the trampoline. */
4148 mem = adjust_address (m_tramp, SImode, 0);
4149 emit_move_insn (mem, trampoline_code_start);
4150 mem = adjust_address (m_tramp, SImode, 4);
4151 emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4152 /* Setup static chain. */
4153 mem = adjust_address (m_tramp, SImode, 20);
4154 emit_move_insn (mem, chain_value);
4155 /* GOT + real function entry point. */
4156 mem = adjust_address (m_tramp, SImode, 24);
4157 emit_move_insn (mem, gotaddr);
4158 mem = adjust_address (m_tramp, SImode, 28);
4159 emit_move_insn (mem, fnaddr);
4160 }
4161 else
4162 {
4163 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4164 emit_move_insn (mem, chain_value);
4165
4166 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4167 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4168 emit_move_insn (mem, fnaddr);
4169 }
4170
4171 a_tramp = XEXP (m_tramp, 0);
4172 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4173 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
4174 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
4175 }
4176
4177 /* Thumb trampolines should be entered in thumb mode, so set
4178 the bottom bit of the address. */
4179
4180 static rtx
4181 arm_trampoline_adjust_address (rtx addr)
4182 {
4183 /* For FDPIC don't fix trampoline address since it's a function
4184 descriptor and not a function address. */
4185 if (TARGET_THUMB && !TARGET_FDPIC)
4186 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4187 NULL, 0, OPTAB_LIB_WIDEN);
4188 return addr;
4189 }
4190 \f
4191 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4192 includes call-clobbered registers too. If this is a leaf function
4193 we can just examine the registers used by the RTL, but otherwise we
4194 have to assume that whatever function is called might clobber
4195 anything, and so we have to save all the call-clobbered registers
4196 as well. */
4197 static inline bool reg_needs_saving_p (unsigned reg)
4198 {
4199 unsigned long func_type = arm_current_func_type ();
4200
4201 if (IS_INTERRUPT (func_type))
4202 if (df_regs_ever_live_p (reg)
4203 /* Save call-clobbered core registers. */
4204 || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4205 return true;
4206 else
4207 return false;
4208 else
4209 if (!df_regs_ever_live_p (reg)
4210 || call_used_or_fixed_reg_p (reg))
4211 return false;
4212 else
4213 return true;
4214 }
4215
4216 /* Return 1 if it is possible to return using a single instruction.
4217 If SIBLING is non-null, this is a test for a return before a sibling
4218 call. SIBLING is the call insn, so we can examine its register usage. */
4219
4220 int
4221 use_return_insn (int iscond, rtx sibling)
4222 {
4223 int regno;
4224 unsigned int func_type;
4225 unsigned long saved_int_regs;
4226 unsigned HOST_WIDE_INT stack_adjust;
4227 arm_stack_offsets *offsets;
4228
4229 /* Never use a return instruction before reload has run. */
4230 if (!reload_completed)
4231 return 0;
4232
4233 func_type = arm_current_func_type ();
4234
4235 /* Naked, volatile and stack alignment functions need special
4236 consideration. */
4237 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4238 return 0;
4239
4240 /* So do interrupt functions that use the frame pointer and Thumb
4241 interrupt functions. */
4242 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4243 return 0;
4244
4245 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4246 && !optimize_function_for_size_p (cfun))
4247 return 0;
4248
4249 offsets = arm_get_frame_offsets ();
4250 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4251
4252 /* As do variadic functions. */
4253 if (crtl->args.pretend_args_size
4254 || cfun->machine->uses_anonymous_args
4255 /* Or if the function calls __builtin_eh_return () */
4256 || crtl->calls_eh_return
4257 /* Or if the function calls alloca */
4258 || cfun->calls_alloca
4259 /* Or if there is a stack adjustment. However, if the stack pointer
4260 is saved on the stack, we can use a pre-incrementing stack load. */
4261 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4262 && stack_adjust == 4))
4263 /* Or if the static chain register was saved above the frame, under the
4264 assumption that the stack pointer isn't saved on the stack. */
4265 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4266 && arm_compute_static_chain_stack_bytes() != 0))
4267 return 0;
4268
4269 saved_int_regs = offsets->saved_regs_mask;
4270
4271 /* Unfortunately, the insn
4272
4273 ldmib sp, {..., sp, ...}
4274
4275 triggers a bug on most SA-110 based devices, such that the stack
4276 pointer won't be correctly restored if the instruction takes a
4277 page fault. We work around this problem by popping r3 along with
4278 the other registers, since that is never slower than executing
4279 another instruction.
4280
4281 We test for !arm_arch5t here, because code for any architecture
4282 less than this could potentially be run on one of the buggy
4283 chips. */
4284 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4285 {
4286 /* Validate that r3 is a call-clobbered register (always true in
4287 the default abi) ... */
4288 if (!call_used_or_fixed_reg_p (3))
4289 return 0;
4290
4291 /* ... that it isn't being used for a return value ... */
4292 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4293 return 0;
4294
4295 /* ... or for a tail-call argument ... */
4296 if (sibling)
4297 {
4298 gcc_assert (CALL_P (sibling));
4299
4300 if (find_regno_fusage (sibling, USE, 3))
4301 return 0;
4302 }
4303
4304 /* ... and that there are no call-saved registers in r0-r2
4305 (always true in the default ABI). */
4306 if (saved_int_regs & 0x7)
4307 return 0;
4308 }
4309
4310 /* Can't be done if interworking with Thumb, and any registers have been
4311 stacked. */
4312 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4313 return 0;
4314
4315 /* On StrongARM, conditional returns are expensive if they aren't
4316 taken and multiple registers have been stacked. */
4317 if (iscond && arm_tune_strongarm)
4318 {
4319 /* Conditional return when just the LR is stored is a simple
4320 conditional-load instruction, that's not expensive. */
4321 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4322 return 0;
4323
4324 if (flag_pic
4325 && arm_pic_register != INVALID_REGNUM
4326 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4327 return 0;
4328 }
4329
4330 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4331 several instructions if anything needs to be popped. Armv8.1-M Mainline
4332 also needs several instructions to save and restore FP context. */
4333 if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4334 return 0;
4335
4336 /* If there are saved registers but the LR isn't saved, then we need
4337 two instructions for the return. */
4338 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4339 return 0;
4340
4341 /* Can't be done if any of the VFP regs are pushed,
4342 since this also requires an insn. */
4343 if (TARGET_VFP_BASE)
4344 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4345 if (reg_needs_saving_p (regno))
4346 return 0;
4347
4348 if (TARGET_REALLY_IWMMXT)
4349 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4350 if (reg_needs_saving_p (regno))
4351 return 0;
4352
4353 return 1;
4354 }
4355
4356 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4357 shrink-wrapping if possible. This is the case if we need to emit a
4358 prologue, which we can test by looking at the offsets. */
4359 bool
4360 use_simple_return_p (void)
4361 {
4362 arm_stack_offsets *offsets;
4363
4364 /* Note this function can be called before or after reload. */
4365 if (!reload_completed)
4366 arm_compute_frame_layout ();
4367
4368 offsets = arm_get_frame_offsets ();
4369 return offsets->outgoing_args != 0;
4370 }
4371
4372 /* Return TRUE if int I is a valid immediate ARM constant. */
4373
4374 int
4375 const_ok_for_arm (HOST_WIDE_INT i)
4376 {
4377 int lowbit;
4378
4379 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4380 be all zero, or all one. */
4381 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4382 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4383 != ((~(unsigned HOST_WIDE_INT) 0)
4384 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4385 return FALSE;
4386
4387 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4388
4389 /* Fast return for 0 and small values. We must do this for zero, since
4390 the code below can't handle that one case. */
4391 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4392 return TRUE;
4393
4394 /* Get the number of trailing zeros. */
4395 lowbit = ffs((int) i) - 1;
4396
4397 /* Only even shifts are allowed in ARM mode so round down to the
4398 nearest even number. */
4399 if (TARGET_ARM)
4400 lowbit &= ~1;
4401
4402 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4403 return TRUE;
4404
4405 if (TARGET_ARM)
4406 {
4407 /* Allow rotated constants in ARM mode. */
4408 if (lowbit <= 4
4409 && ((i & ~0xc000003f) == 0
4410 || (i & ~0xf000000f) == 0
4411 || (i & ~0xfc000003) == 0))
4412 return TRUE;
4413 }
4414 else if (TARGET_THUMB2)
4415 {
4416 HOST_WIDE_INT v;
4417
4418 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4419 v = i & 0xff;
4420 v |= v << 16;
4421 if (i == v || i == (v | (v << 8)))
4422 return TRUE;
4423
4424 /* Allow repeated pattern 0xXY00XY00. */
4425 v = i & 0xff00;
4426 v |= v << 16;
4427 if (i == v)
4428 return TRUE;
4429 }
4430 else if (TARGET_HAVE_MOVT)
4431 {
4432 /* Thumb-1 Targets with MOVT. */
4433 if (i > 0xffff)
4434 return FALSE;
4435 else
4436 return TRUE;
4437 }
4438
4439 return FALSE;
4440 }
4441
4442 /* Return true if I is a valid constant for the operation CODE. */
4443 int
4444 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4445 {
4446 if (const_ok_for_arm (i))
4447 return 1;
4448
4449 switch (code)
4450 {
4451 case SET:
4452 /* See if we can use movw. */
4453 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4454 return 1;
4455 else
4456 /* Otherwise, try mvn. */
4457 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4458
4459 case PLUS:
4460 /* See if we can use addw or subw. */
4461 if (TARGET_THUMB2
4462 && ((i & 0xfffff000) == 0
4463 || ((-i) & 0xfffff000) == 0))
4464 return 1;
4465 /* Fall through. */
4466 case COMPARE:
4467 case EQ:
4468 case NE:
4469 case GT:
4470 case LE:
4471 case LT:
4472 case GE:
4473 case GEU:
4474 case LTU:
4475 case GTU:
4476 case LEU:
4477 case UNORDERED:
4478 case ORDERED:
4479 case UNEQ:
4480 case UNGE:
4481 case UNLT:
4482 case UNGT:
4483 case UNLE:
4484 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4485
4486 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4487 case XOR:
4488 return 0;
4489
4490 case IOR:
4491 if (TARGET_THUMB2)
4492 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4493 return 0;
4494
4495 case AND:
4496 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4497
4498 default:
4499 gcc_unreachable ();
4500 }
4501 }
4502
4503 /* Return true if I is a valid di mode constant for the operation CODE. */
4504 int
4505 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4506 {
4507 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4508 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4509 rtx hi = GEN_INT (hi_val);
4510 rtx lo = GEN_INT (lo_val);
4511
4512 if (TARGET_THUMB1)
4513 return 0;
4514
4515 switch (code)
4516 {
4517 case AND:
4518 case IOR:
4519 case XOR:
4520 return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4521 || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4522 case PLUS:
4523 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4524
4525 default:
4526 return 0;
4527 }
4528 }
4529
4530 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
4531 Avoid generating useless code when one of the bytes is zero. */
4532 void
4533 thumb1_gen_const_int (rtx op0, HOST_WIDE_INT op1)
4534 {
4535 bool mov_done_p = false;
4536 int i;
4537
4538 /* Emit upper 3 bytes if needed. */
4539 for (i = 0; i < 3; i++)
4540 {
4541 int byte = (op1 >> (8 * (3 - i))) & 0xff;
4542
4543 if (byte)
4544 {
4545 emit_set_insn (op0, mov_done_p
4546 ? gen_rtx_PLUS (SImode,op0, GEN_INT (byte))
4547 : GEN_INT (byte));
4548 mov_done_p = true;
4549 }
4550
4551 if (mov_done_p)
4552 emit_set_insn (op0, gen_rtx_ASHIFT (SImode, op0, GEN_INT (8)));
4553 }
4554
4555 /* Emit lower byte if needed. */
4556 if (!mov_done_p)
4557 emit_set_insn (op0, GEN_INT (op1 & 0xff));
4558 else if (op1 & 0xff)
4559 emit_set_insn (op0, gen_rtx_PLUS (SImode, op0, GEN_INT (op1 & 0xff)));
4560 }
4561
4562 /* Emit a sequence of insns to handle a large constant.
4563 CODE is the code of the operation required, it can be any of SET, PLUS,
4564 IOR, AND, XOR, MINUS;
4565 MODE is the mode in which the operation is being performed;
4566 VAL is the integer to operate on;
4567 SOURCE is the other operand (a register, or a null-pointer for SET);
4568 SUBTARGETS means it is safe to create scratch registers if that will
4569 either produce a simpler sequence, or we will want to cse the values.
4570 Return value is the number of insns emitted. */
4571
4572 /* ??? Tweak this for thumb2. */
4573 int
4574 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4575 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4576 {
4577 rtx cond;
4578
4579 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4580 cond = COND_EXEC_TEST (PATTERN (insn));
4581 else
4582 cond = NULL_RTX;
4583
4584 if (subtargets || code == SET
4585 || (REG_P (target) && REG_P (source)
4586 && REGNO (target) != REGNO (source)))
4587 {
4588 /* After arm_reorg has been called, we can't fix up expensive
4589 constants by pushing them into memory so we must synthesize
4590 them in-line, regardless of the cost. This is only likely to
4591 be more costly on chips that have load delay slots and we are
4592 compiling without running the scheduler (so no splitting
4593 occurred before the final instruction emission).
4594
4595 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4596 */
4597 if (!cfun->machine->after_arm_reorg
4598 && !cond
4599 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4600 1, 0)
4601 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4602 + (code != SET))))
4603 {
4604 if (code == SET)
4605 {
4606 /* Currently SET is the only monadic value for CODE, all
4607 the rest are diadic. */
4608 if (TARGET_USE_MOVT)
4609 arm_emit_movpair (target, GEN_INT (val));
4610 else
4611 emit_set_insn (target, GEN_INT (val));
4612
4613 return 1;
4614 }
4615 else
4616 {
4617 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4618
4619 if (TARGET_USE_MOVT)
4620 arm_emit_movpair (temp, GEN_INT (val));
4621 else
4622 emit_set_insn (temp, GEN_INT (val));
4623
4624 /* For MINUS, the value is subtracted from, since we never
4625 have subtraction of a constant. */
4626 if (code == MINUS)
4627 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4628 else
4629 emit_set_insn (target,
4630 gen_rtx_fmt_ee (code, mode, source, temp));
4631 return 2;
4632 }
4633 }
4634 }
4635
4636 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4637 1);
4638 }
4639
4640 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4641 ARM/THUMB2 immediates, and add up to VAL.
4642 Thr function return value gives the number of insns required. */
4643 static int
4644 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4645 struct four_ints *return_sequence)
4646 {
4647 int best_consecutive_zeros = 0;
4648 int i;
4649 int best_start = 0;
4650 int insns1, insns2;
4651 struct four_ints tmp_sequence;
4652
4653 /* If we aren't targeting ARM, the best place to start is always at
4654 the bottom, otherwise look more closely. */
4655 if (TARGET_ARM)
4656 {
4657 for (i = 0; i < 32; i += 2)
4658 {
4659 int consecutive_zeros = 0;
4660
4661 if (!(val & (3 << i)))
4662 {
4663 while ((i < 32) && !(val & (3 << i)))
4664 {
4665 consecutive_zeros += 2;
4666 i += 2;
4667 }
4668 if (consecutive_zeros > best_consecutive_zeros)
4669 {
4670 best_consecutive_zeros = consecutive_zeros;
4671 best_start = i - consecutive_zeros;
4672 }
4673 i -= 2;
4674 }
4675 }
4676 }
4677
4678 /* So long as it won't require any more insns to do so, it's
4679 desirable to emit a small constant (in bits 0...9) in the last
4680 insn. This way there is more chance that it can be combined with
4681 a later addressing insn to form a pre-indexed load or store
4682 operation. Consider:
4683
4684 *((volatile int *)0xe0000100) = 1;
4685 *((volatile int *)0xe0000110) = 2;
4686
4687 We want this to wind up as:
4688
4689 mov rA, #0xe0000000
4690 mov rB, #1
4691 str rB, [rA, #0x100]
4692 mov rB, #2
4693 str rB, [rA, #0x110]
4694
4695 rather than having to synthesize both large constants from scratch.
4696
4697 Therefore, we calculate how many insns would be required to emit
4698 the constant starting from `best_start', and also starting from
4699 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4700 yield a shorter sequence, we may as well use zero. */
4701 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4702 if (best_start != 0
4703 && ((HOST_WIDE_INT_1U << best_start) < val))
4704 {
4705 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4706 if (insns2 <= insns1)
4707 {
4708 *return_sequence = tmp_sequence;
4709 insns1 = insns2;
4710 }
4711 }
4712
4713 return insns1;
4714 }
4715
4716 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4717 static int
4718 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4719 struct four_ints *return_sequence, int i)
4720 {
4721 int remainder = val & 0xffffffff;
4722 int insns = 0;
4723
4724 /* Try and find a way of doing the job in either two or three
4725 instructions.
4726
4727 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4728 location. We start at position I. This may be the MSB, or
4729 optimial_immediate_sequence may have positioned it at the largest block
4730 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4731 wrapping around to the top of the word when we drop off the bottom.
4732 In the worst case this code should produce no more than four insns.
4733
4734 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4735 constants, shifted to any arbitrary location. We should always start
4736 at the MSB. */
4737 do
4738 {
4739 int end;
4740 unsigned int b1, b2, b3, b4;
4741 unsigned HOST_WIDE_INT result;
4742 int loc;
4743
4744 gcc_assert (insns < 4);
4745
4746 if (i <= 0)
4747 i += 32;
4748
4749 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4750 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4751 {
4752 loc = i;
4753 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4754 /* We can use addw/subw for the last 12 bits. */
4755 result = remainder;
4756 else
4757 {
4758 /* Use an 8-bit shifted/rotated immediate. */
4759 end = i - 8;
4760 if (end < 0)
4761 end += 32;
4762 result = remainder & ((0x0ff << end)
4763 | ((i < end) ? (0xff >> (32 - end))
4764 : 0));
4765 i -= 8;
4766 }
4767 }
4768 else
4769 {
4770 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4771 arbitrary shifts. */
4772 i -= TARGET_ARM ? 2 : 1;
4773 continue;
4774 }
4775
4776 /* Next, see if we can do a better job with a thumb2 replicated
4777 constant.
4778
4779 We do it this way around to catch the cases like 0x01F001E0 where
4780 two 8-bit immediates would work, but a replicated constant would
4781 make it worse.
4782
4783 TODO: 16-bit constants that don't clear all the bits, but still win.
4784 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4785 if (TARGET_THUMB2)
4786 {
4787 b1 = (remainder & 0xff000000) >> 24;
4788 b2 = (remainder & 0x00ff0000) >> 16;
4789 b3 = (remainder & 0x0000ff00) >> 8;
4790 b4 = remainder & 0xff;
4791
4792 if (loc > 24)
4793 {
4794 /* The 8-bit immediate already found clears b1 (and maybe b2),
4795 but must leave b3 and b4 alone. */
4796
4797 /* First try to find a 32-bit replicated constant that clears
4798 almost everything. We can assume that we can't do it in one,
4799 or else we wouldn't be here. */
4800 unsigned int tmp = b1 & b2 & b3 & b4;
4801 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4802 + (tmp << 24);
4803 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4804 + (tmp == b3) + (tmp == b4);
4805 if (tmp
4806 && (matching_bytes >= 3
4807 || (matching_bytes == 2
4808 && const_ok_for_op (remainder & ~tmp2, code))))
4809 {
4810 /* At least 3 of the bytes match, and the fourth has at
4811 least as many bits set, or two of the bytes match
4812 and it will only require one more insn to finish. */
4813 result = tmp2;
4814 i = tmp != b1 ? 32
4815 : tmp != b2 ? 24
4816 : tmp != b3 ? 16
4817 : 8;
4818 }
4819
4820 /* Second, try to find a 16-bit replicated constant that can
4821 leave three of the bytes clear. If b2 or b4 is already
4822 zero, then we can. If the 8-bit from above would not
4823 clear b2 anyway, then we still win. */
4824 else if (b1 == b3 && (!b2 || !b4
4825 || (remainder & 0x00ff0000 & ~result)))
4826 {
4827 result = remainder & 0xff00ff00;
4828 i = 24;
4829 }
4830 }
4831 else if (loc > 16)
4832 {
4833 /* The 8-bit immediate already found clears b2 (and maybe b3)
4834 and we don't get here unless b1 is alredy clear, but it will
4835 leave b4 unchanged. */
4836
4837 /* If we can clear b2 and b4 at once, then we win, since the
4838 8-bits couldn't possibly reach that far. */
4839 if (b2 == b4)
4840 {
4841 result = remainder & 0x00ff00ff;
4842 i = 16;
4843 }
4844 }
4845 }
4846
4847 return_sequence->i[insns++] = result;
4848 remainder &= ~result;
4849
4850 if (code == SET || code == MINUS)
4851 code = PLUS;
4852 }
4853 while (remainder);
4854
4855 return insns;
4856 }
4857
4858 /* Emit an instruction with the indicated PATTERN. If COND is
4859 non-NULL, conditionalize the execution of the instruction on COND
4860 being true. */
4861
4862 static void
4863 emit_constant_insn (rtx cond, rtx pattern)
4864 {
4865 if (cond)
4866 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4867 emit_insn (pattern);
4868 }
4869
4870 /* As above, but extra parameter GENERATE which, if clear, suppresses
4871 RTL generation. */
4872
4873 static int
4874 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4875 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4876 int subtargets, int generate)
4877 {
4878 int can_invert = 0;
4879 int can_negate = 0;
4880 int final_invert = 0;
4881 int i;
4882 int set_sign_bit_copies = 0;
4883 int clear_sign_bit_copies = 0;
4884 int clear_zero_bit_copies = 0;
4885 int set_zero_bit_copies = 0;
4886 int insns = 0, neg_insns, inv_insns;
4887 unsigned HOST_WIDE_INT temp1, temp2;
4888 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4889 struct four_ints *immediates;
4890 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4891
4892 /* Find out which operations are safe for a given CODE. Also do a quick
4893 check for degenerate cases; these can occur when DImode operations
4894 are split. */
4895 switch (code)
4896 {
4897 case SET:
4898 can_invert = 1;
4899 break;
4900
4901 case PLUS:
4902 can_negate = 1;
4903 break;
4904
4905 case IOR:
4906 if (remainder == 0xffffffff)
4907 {
4908 if (generate)
4909 emit_constant_insn (cond,
4910 gen_rtx_SET (target,
4911 GEN_INT (ARM_SIGN_EXTEND (val))));
4912 return 1;
4913 }
4914
4915 if (remainder == 0)
4916 {
4917 if (reload_completed && rtx_equal_p (target, source))
4918 return 0;
4919
4920 if (generate)
4921 emit_constant_insn (cond, gen_rtx_SET (target, source));
4922 return 1;
4923 }
4924 break;
4925
4926 case AND:
4927 if (remainder == 0)
4928 {
4929 if (generate)
4930 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4931 return 1;
4932 }
4933 if (remainder == 0xffffffff)
4934 {
4935 if (reload_completed && rtx_equal_p (target, source))
4936 return 0;
4937 if (generate)
4938 emit_constant_insn (cond, gen_rtx_SET (target, source));
4939 return 1;
4940 }
4941 can_invert = 1;
4942 break;
4943
4944 case XOR:
4945 if (remainder == 0)
4946 {
4947 if (reload_completed && rtx_equal_p (target, source))
4948 return 0;
4949 if (generate)
4950 emit_constant_insn (cond, gen_rtx_SET (target, source));
4951 return 1;
4952 }
4953
4954 if (remainder == 0xffffffff)
4955 {
4956 if (generate)
4957 emit_constant_insn (cond,
4958 gen_rtx_SET (target,
4959 gen_rtx_NOT (mode, source)));
4960 return 1;
4961 }
4962 final_invert = 1;
4963 break;
4964
4965 case MINUS:
4966 /* We treat MINUS as (val - source), since (source - val) is always
4967 passed as (source + (-val)). */
4968 if (remainder == 0)
4969 {
4970 if (generate)
4971 emit_constant_insn (cond,
4972 gen_rtx_SET (target,
4973 gen_rtx_NEG (mode, source)));
4974 return 1;
4975 }
4976 if (const_ok_for_arm (val))
4977 {
4978 if (generate)
4979 emit_constant_insn (cond,
4980 gen_rtx_SET (target,
4981 gen_rtx_MINUS (mode, GEN_INT (val),
4982 source)));
4983 return 1;
4984 }
4985
4986 break;
4987
4988 default:
4989 gcc_unreachable ();
4990 }
4991
4992 /* If we can do it in one insn get out quickly. */
4993 if (const_ok_for_op (val, code))
4994 {
4995 if (generate)
4996 emit_constant_insn (cond,
4997 gen_rtx_SET (target,
4998 (source
4999 ? gen_rtx_fmt_ee (code, mode, source,
5000 GEN_INT (val))
5001 : GEN_INT (val))));
5002 return 1;
5003 }
5004
5005 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5006 insn. */
5007 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5008 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5009 {
5010 if (generate)
5011 {
5012 if (mode == SImode && i == 16)
5013 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5014 smaller insn. */
5015 emit_constant_insn (cond,
5016 gen_zero_extendhisi2
5017 (target, gen_lowpart (HImode, source)));
5018 else
5019 /* Extz only supports SImode, but we can coerce the operands
5020 into that mode. */
5021 emit_constant_insn (cond,
5022 gen_extzv_t2 (gen_lowpart (SImode, target),
5023 gen_lowpart (SImode, source),
5024 GEN_INT (i), const0_rtx));
5025 }
5026
5027 return 1;
5028 }
5029
5030 /* Calculate a few attributes that may be useful for specific
5031 optimizations. */
5032 /* Count number of leading zeros. */
5033 for (i = 31; i >= 0; i--)
5034 {
5035 if ((remainder & (1 << i)) == 0)
5036 clear_sign_bit_copies++;
5037 else
5038 break;
5039 }
5040
5041 /* Count number of leading 1's. */
5042 for (i = 31; i >= 0; i--)
5043 {
5044 if ((remainder & (1 << i)) != 0)
5045 set_sign_bit_copies++;
5046 else
5047 break;
5048 }
5049
5050 /* Count number of trailing zero's. */
5051 for (i = 0; i <= 31; i++)
5052 {
5053 if ((remainder & (1 << i)) == 0)
5054 clear_zero_bit_copies++;
5055 else
5056 break;
5057 }
5058
5059 /* Count number of trailing 1's. */
5060 for (i = 0; i <= 31; i++)
5061 {
5062 if ((remainder & (1 << i)) != 0)
5063 set_zero_bit_copies++;
5064 else
5065 break;
5066 }
5067
5068 switch (code)
5069 {
5070 case SET:
5071 /* See if we can do this by sign_extending a constant that is known
5072 to be negative. This is a good, way of doing it, since the shift
5073 may well merge into a subsequent insn. */
5074 if (set_sign_bit_copies > 1)
5075 {
5076 if (const_ok_for_arm
5077 (temp1 = ARM_SIGN_EXTEND (remainder
5078 << (set_sign_bit_copies - 1))))
5079 {
5080 if (generate)
5081 {
5082 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5083 emit_constant_insn (cond,
5084 gen_rtx_SET (new_src, GEN_INT (temp1)));
5085 emit_constant_insn (cond,
5086 gen_ashrsi3 (target, new_src,
5087 GEN_INT (set_sign_bit_copies - 1)));
5088 }
5089 return 2;
5090 }
5091 /* For an inverted constant, we will need to set the low bits,
5092 these will be shifted out of harm's way. */
5093 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5094 if (const_ok_for_arm (~temp1))
5095 {
5096 if (generate)
5097 {
5098 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5099 emit_constant_insn (cond,
5100 gen_rtx_SET (new_src, GEN_INT (temp1)));
5101 emit_constant_insn (cond,
5102 gen_ashrsi3 (target, new_src,
5103 GEN_INT (set_sign_bit_copies - 1)));
5104 }
5105 return 2;
5106 }
5107 }
5108
5109 /* See if we can calculate the value as the difference between two
5110 valid immediates. */
5111 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5112 {
5113 int topshift = clear_sign_bit_copies & ~1;
5114
5115 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5116 & (0xff000000 >> topshift));
5117
5118 /* If temp1 is zero, then that means the 9 most significant
5119 bits of remainder were 1 and we've caused it to overflow.
5120 When topshift is 0 we don't need to do anything since we
5121 can borrow from 'bit 32'. */
5122 if (temp1 == 0 && topshift != 0)
5123 temp1 = 0x80000000 >> (topshift - 1);
5124
5125 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5126
5127 if (const_ok_for_arm (temp2))
5128 {
5129 if (generate)
5130 {
5131 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5132 emit_constant_insn (cond,
5133 gen_rtx_SET (new_src, GEN_INT (temp1)));
5134 emit_constant_insn (cond,
5135 gen_addsi3 (target, new_src,
5136 GEN_INT (-temp2)));
5137 }
5138
5139 return 2;
5140 }
5141 }
5142
5143 /* See if we can generate this by setting the bottom (or the top)
5144 16 bits, and then shifting these into the other half of the
5145 word. We only look for the simplest cases, to do more would cost
5146 too much. Be careful, however, not to generate this when the
5147 alternative would take fewer insns. */
5148 if (val & 0xffff0000)
5149 {
5150 temp1 = remainder & 0xffff0000;
5151 temp2 = remainder & 0x0000ffff;
5152
5153 /* Overlaps outside this range are best done using other methods. */
5154 for (i = 9; i < 24; i++)
5155 {
5156 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5157 && !const_ok_for_arm (temp2))
5158 {
5159 rtx new_src = (subtargets
5160 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5161 : target);
5162 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5163 source, subtargets, generate);
5164 source = new_src;
5165 if (generate)
5166 emit_constant_insn
5167 (cond,
5168 gen_rtx_SET
5169 (target,
5170 gen_rtx_IOR (mode,
5171 gen_rtx_ASHIFT (mode, source,
5172 GEN_INT (i)),
5173 source)));
5174 return insns + 1;
5175 }
5176 }
5177
5178 /* Don't duplicate cases already considered. */
5179 for (i = 17; i < 24; i++)
5180 {
5181 if (((temp1 | (temp1 >> i)) == remainder)
5182 && !const_ok_for_arm (temp1))
5183 {
5184 rtx new_src = (subtargets
5185 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5186 : target);
5187 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5188 source, subtargets, generate);
5189 source = new_src;
5190 if (generate)
5191 emit_constant_insn
5192 (cond,
5193 gen_rtx_SET (target,
5194 gen_rtx_IOR
5195 (mode,
5196 gen_rtx_LSHIFTRT (mode, source,
5197 GEN_INT (i)),
5198 source)));
5199 return insns + 1;
5200 }
5201 }
5202 }
5203 break;
5204
5205 case IOR:
5206 case XOR:
5207 /* If we have IOR or XOR, and the constant can be loaded in a
5208 single instruction, and we can find a temporary to put it in,
5209 then this can be done in two instructions instead of 3-4. */
5210 if (subtargets
5211 /* TARGET can't be NULL if SUBTARGETS is 0 */
5212 || (reload_completed && !reg_mentioned_p (target, source)))
5213 {
5214 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5215 {
5216 if (generate)
5217 {
5218 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5219
5220 emit_constant_insn (cond,
5221 gen_rtx_SET (sub, GEN_INT (val)));
5222 emit_constant_insn (cond,
5223 gen_rtx_SET (target,
5224 gen_rtx_fmt_ee (code, mode,
5225 source, sub)));
5226 }
5227 return 2;
5228 }
5229 }
5230
5231 if (code == XOR)
5232 break;
5233
5234 /* Convert.
5235 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5236 and the remainder 0s for e.g. 0xfff00000)
5237 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5238
5239 This can be done in 2 instructions by using shifts with mov or mvn.
5240 e.g. for
5241 x = x | 0xfff00000;
5242 we generate.
5243 mvn r0, r0, asl #12
5244 mvn r0, r0, lsr #12 */
5245 if (set_sign_bit_copies > 8
5246 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5247 {
5248 if (generate)
5249 {
5250 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5251 rtx shift = GEN_INT (set_sign_bit_copies);
5252
5253 emit_constant_insn
5254 (cond,
5255 gen_rtx_SET (sub,
5256 gen_rtx_NOT (mode,
5257 gen_rtx_ASHIFT (mode,
5258 source,
5259 shift))));
5260 emit_constant_insn
5261 (cond,
5262 gen_rtx_SET (target,
5263 gen_rtx_NOT (mode,
5264 gen_rtx_LSHIFTRT (mode, sub,
5265 shift))));
5266 }
5267 return 2;
5268 }
5269
5270 /* Convert
5271 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5272 to
5273 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5274
5275 For eg. r0 = r0 | 0xfff
5276 mvn r0, r0, lsr #12
5277 mvn r0, r0, asl #12
5278
5279 */
5280 if (set_zero_bit_copies > 8
5281 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5282 {
5283 if (generate)
5284 {
5285 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5286 rtx shift = GEN_INT (set_zero_bit_copies);
5287
5288 emit_constant_insn
5289 (cond,
5290 gen_rtx_SET (sub,
5291 gen_rtx_NOT (mode,
5292 gen_rtx_LSHIFTRT (mode,
5293 source,
5294 shift))));
5295 emit_constant_insn
5296 (cond,
5297 gen_rtx_SET (target,
5298 gen_rtx_NOT (mode,
5299 gen_rtx_ASHIFT (mode, sub,
5300 shift))));
5301 }
5302 return 2;
5303 }
5304
5305 /* This will never be reached for Thumb2 because orn is a valid
5306 instruction. This is for Thumb1 and the ARM 32 bit cases.
5307
5308 x = y | constant (such that ~constant is a valid constant)
5309 Transform this to
5310 x = ~(~y & ~constant).
5311 */
5312 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5313 {
5314 if (generate)
5315 {
5316 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5317 emit_constant_insn (cond,
5318 gen_rtx_SET (sub,
5319 gen_rtx_NOT (mode, source)));
5320 source = sub;
5321 if (subtargets)
5322 sub = gen_reg_rtx (mode);
5323 emit_constant_insn (cond,
5324 gen_rtx_SET (sub,
5325 gen_rtx_AND (mode, source,
5326 GEN_INT (temp1))));
5327 emit_constant_insn (cond,
5328 gen_rtx_SET (target,
5329 gen_rtx_NOT (mode, sub)));
5330 }
5331 return 3;
5332 }
5333 break;
5334
5335 case AND:
5336 /* See if two shifts will do 2 or more insn's worth of work. */
5337 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5338 {
5339 HOST_WIDE_INT shift_mask = ((0xffffffff
5340 << (32 - clear_sign_bit_copies))
5341 & 0xffffffff);
5342
5343 if ((remainder | shift_mask) != 0xffffffff)
5344 {
5345 HOST_WIDE_INT new_val
5346 = ARM_SIGN_EXTEND (remainder | shift_mask);
5347
5348 if (generate)
5349 {
5350 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5351 insns = arm_gen_constant (AND, SImode, cond, new_val,
5352 new_src, source, subtargets, 1);
5353 source = new_src;
5354 }
5355 else
5356 {
5357 rtx targ = subtargets ? NULL_RTX : target;
5358 insns = arm_gen_constant (AND, mode, cond, new_val,
5359 targ, source, subtargets, 0);
5360 }
5361 }
5362
5363 if (generate)
5364 {
5365 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5366 rtx shift = GEN_INT (clear_sign_bit_copies);
5367
5368 emit_insn (gen_ashlsi3 (new_src, source, shift));
5369 emit_insn (gen_lshrsi3 (target, new_src, shift));
5370 }
5371
5372 return insns + 2;
5373 }
5374
5375 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5376 {
5377 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5378
5379 if ((remainder | shift_mask) != 0xffffffff)
5380 {
5381 HOST_WIDE_INT new_val
5382 = ARM_SIGN_EXTEND (remainder | shift_mask);
5383 if (generate)
5384 {
5385 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5386
5387 insns = arm_gen_constant (AND, mode, cond, new_val,
5388 new_src, source, subtargets, 1);
5389 source = new_src;
5390 }
5391 else
5392 {
5393 rtx targ = subtargets ? NULL_RTX : target;
5394
5395 insns = arm_gen_constant (AND, mode, cond, new_val,
5396 targ, source, subtargets, 0);
5397 }
5398 }
5399
5400 if (generate)
5401 {
5402 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5403 rtx shift = GEN_INT (clear_zero_bit_copies);
5404
5405 emit_insn (gen_lshrsi3 (new_src, source, shift));
5406 emit_insn (gen_ashlsi3 (target, new_src, shift));
5407 }
5408
5409 return insns + 2;
5410 }
5411
5412 break;
5413
5414 default:
5415 break;
5416 }
5417
5418 /* Calculate what the instruction sequences would be if we generated it
5419 normally, negated, or inverted. */
5420 if (code == AND)
5421 /* AND cannot be split into multiple insns, so invert and use BIC. */
5422 insns = 99;
5423 else
5424 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5425
5426 if (can_negate)
5427 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5428 &neg_immediates);
5429 else
5430 neg_insns = 99;
5431
5432 if (can_invert || final_invert)
5433 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5434 &inv_immediates);
5435 else
5436 inv_insns = 99;
5437
5438 immediates = &pos_immediates;
5439
5440 /* Is the negated immediate sequence more efficient? */
5441 if (neg_insns < insns && neg_insns <= inv_insns)
5442 {
5443 insns = neg_insns;
5444 immediates = &neg_immediates;
5445 }
5446 else
5447 can_negate = 0;
5448
5449 /* Is the inverted immediate sequence more efficient?
5450 We must allow for an extra NOT instruction for XOR operations, although
5451 there is some chance that the final 'mvn' will get optimized later. */
5452 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5453 {
5454 insns = inv_insns;
5455 immediates = &inv_immediates;
5456 }
5457 else
5458 {
5459 can_invert = 0;
5460 final_invert = 0;
5461 }
5462
5463 /* Now output the chosen sequence as instructions. */
5464 if (generate)
5465 {
5466 for (i = 0; i < insns; i++)
5467 {
5468 rtx new_src, temp1_rtx;
5469
5470 temp1 = immediates->i[i];
5471
5472 if (code == SET || code == MINUS)
5473 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5474 else if ((final_invert || i < (insns - 1)) && subtargets)
5475 new_src = gen_reg_rtx (mode);
5476 else
5477 new_src = target;
5478
5479 if (can_invert)
5480 temp1 = ~temp1;
5481 else if (can_negate)
5482 temp1 = -temp1;
5483
5484 temp1 = trunc_int_for_mode (temp1, mode);
5485 temp1_rtx = GEN_INT (temp1);
5486
5487 if (code == SET)
5488 ;
5489 else if (code == MINUS)
5490 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5491 else
5492 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5493
5494 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5495 source = new_src;
5496
5497 if (code == SET)
5498 {
5499 can_negate = can_invert;
5500 can_invert = 0;
5501 code = PLUS;
5502 }
5503 else if (code == MINUS)
5504 code = PLUS;
5505 }
5506 }
5507
5508 if (final_invert)
5509 {
5510 if (generate)
5511 emit_constant_insn (cond, gen_rtx_SET (target,
5512 gen_rtx_NOT (mode, source)));
5513 insns++;
5514 }
5515
5516 return insns;
5517 }
5518
5519 /* Return TRUE if op is a constant where both the low and top words are
5520 suitable for RSB/RSC instructions. This is never true for Thumb, since
5521 we do not have RSC in that case. */
5522 static bool
5523 arm_const_double_prefer_rsbs_rsc (rtx op)
5524 {
5525 /* Thumb lacks RSC, so we never prefer that sequence. */
5526 if (TARGET_THUMB || !CONST_INT_P (op))
5527 return false;
5528 HOST_WIDE_INT hi, lo;
5529 lo = UINTVAL (op) & 0xffffffffULL;
5530 hi = UINTVAL (op) >> 32;
5531 return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5532 }
5533
5534 /* Canonicalize a comparison so that we are more likely to recognize it.
5535 This can be done for a few constant compares, where we can make the
5536 immediate value easier to load. */
5537
5538 static void
5539 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5540 bool op0_preserve_value)
5541 {
5542 machine_mode mode;
5543 unsigned HOST_WIDE_INT i, maxval;
5544
5545 mode = GET_MODE (*op0);
5546 if (mode == VOIDmode)
5547 mode = GET_MODE (*op1);
5548
5549 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5550
5551 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5552 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5553 either reversed or (for constant OP1) adjusted to GE/LT.
5554 Similarly for GTU/LEU in Thumb mode. */
5555 if (mode == DImode)
5556 {
5557
5558 if (*code == GT || *code == LE
5559 || *code == GTU || *code == LEU)
5560 {
5561 /* Missing comparison. First try to use an available
5562 comparison. */
5563 if (CONST_INT_P (*op1))
5564 {
5565 i = INTVAL (*op1);
5566 switch (*code)
5567 {
5568 case GT:
5569 case LE:
5570 if (i != maxval)
5571 {
5572 /* Try to convert to GE/LT, unless that would be more
5573 expensive. */
5574 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5575 && arm_const_double_prefer_rsbs_rsc (*op1))
5576 return;
5577 *op1 = GEN_INT (i + 1);
5578 *code = *code == GT ? GE : LT;
5579 return;
5580 }
5581 break;
5582
5583 case GTU:
5584 case LEU:
5585 if (i != ~((unsigned HOST_WIDE_INT) 0))
5586 {
5587 /* Try to convert to GEU/LTU, unless that would
5588 be more expensive. */
5589 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5590 && arm_const_double_prefer_rsbs_rsc (*op1))
5591 return;
5592 *op1 = GEN_INT (i + 1);
5593 *code = *code == GTU ? GEU : LTU;
5594 return;
5595 }
5596 break;
5597
5598 default:
5599 gcc_unreachable ();
5600 }
5601 }
5602
5603 if (!op0_preserve_value)
5604 {
5605 std::swap (*op0, *op1);
5606 *code = (int)swap_condition ((enum rtx_code)*code);
5607 }
5608 }
5609 return;
5610 }
5611
5612 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5613 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5614 to facilitate possible combining with a cmp into 'ands'. */
5615 if (mode == SImode
5616 && GET_CODE (*op0) == ZERO_EXTEND
5617 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5618 && GET_MODE (XEXP (*op0, 0)) == QImode
5619 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5620 && subreg_lowpart_p (XEXP (*op0, 0))
5621 && *op1 == const0_rtx)
5622 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5623 GEN_INT (255));
5624
5625 /* Comparisons smaller than DImode. Only adjust comparisons against
5626 an out-of-range constant. */
5627 if (!CONST_INT_P (*op1)
5628 || const_ok_for_arm (INTVAL (*op1))
5629 || const_ok_for_arm (- INTVAL (*op1)))
5630 return;
5631
5632 i = INTVAL (*op1);
5633
5634 switch (*code)
5635 {
5636 case EQ:
5637 case NE:
5638 return;
5639
5640 case GT:
5641 case LE:
5642 if (i != maxval
5643 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5644 {
5645 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5646 *code = *code == GT ? GE : LT;
5647 return;
5648 }
5649 break;
5650
5651 case GE:
5652 case LT:
5653 if (i != ~maxval
5654 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5655 {
5656 *op1 = GEN_INT (i - 1);
5657 *code = *code == GE ? GT : LE;
5658 return;
5659 }
5660 break;
5661
5662 case GTU:
5663 case LEU:
5664 if (i != ~((unsigned HOST_WIDE_INT) 0)
5665 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5666 {
5667 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5668 *code = *code == GTU ? GEU : LTU;
5669 return;
5670 }
5671 break;
5672
5673 case GEU:
5674 case LTU:
5675 if (i != 0
5676 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5677 {
5678 *op1 = GEN_INT (i - 1);
5679 *code = *code == GEU ? GTU : LEU;
5680 return;
5681 }
5682 break;
5683
5684 default:
5685 gcc_unreachable ();
5686 }
5687 }
5688
5689
5690 /* Define how to find the value returned by a function. */
5691
5692 static rtx
5693 arm_function_value(const_tree type, const_tree func,
5694 bool outgoing ATTRIBUTE_UNUSED)
5695 {
5696 machine_mode mode;
5697 int unsignedp ATTRIBUTE_UNUSED;
5698 rtx r ATTRIBUTE_UNUSED;
5699
5700 mode = TYPE_MODE (type);
5701
5702 if (TARGET_AAPCS_BASED)
5703 return aapcs_allocate_return_reg (mode, type, func);
5704
5705 /* Promote integer types. */
5706 if (INTEGRAL_TYPE_P (type))
5707 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5708
5709 /* Promotes small structs returned in a register to full-word size
5710 for big-endian AAPCS. */
5711 if (arm_return_in_msb (type))
5712 {
5713 HOST_WIDE_INT size = int_size_in_bytes (type);
5714 if (size % UNITS_PER_WORD != 0)
5715 {
5716 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5717 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5718 }
5719 }
5720
5721 return arm_libcall_value_1 (mode);
5722 }
5723
5724 /* libcall hashtable helpers. */
5725
5726 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5727 {
5728 static inline hashval_t hash (const rtx_def *);
5729 static inline bool equal (const rtx_def *, const rtx_def *);
5730 static inline void remove (rtx_def *);
5731 };
5732
5733 inline bool
5734 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5735 {
5736 return rtx_equal_p (p1, p2);
5737 }
5738
5739 inline hashval_t
5740 libcall_hasher::hash (const rtx_def *p1)
5741 {
5742 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5743 }
5744
5745 typedef hash_table<libcall_hasher> libcall_table_type;
5746
5747 static void
5748 add_libcall (libcall_table_type *htab, rtx libcall)
5749 {
5750 *htab->find_slot (libcall, INSERT) = libcall;
5751 }
5752
5753 static bool
5754 arm_libcall_uses_aapcs_base (const_rtx libcall)
5755 {
5756 static bool init_done = false;
5757 static libcall_table_type *libcall_htab = NULL;
5758
5759 if (!init_done)
5760 {
5761 init_done = true;
5762
5763 libcall_htab = new libcall_table_type (31);
5764 add_libcall (libcall_htab,
5765 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5766 add_libcall (libcall_htab,
5767 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5768 add_libcall (libcall_htab,
5769 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5770 add_libcall (libcall_htab,
5771 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5772
5773 add_libcall (libcall_htab,
5774 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5775 add_libcall (libcall_htab,
5776 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5777 add_libcall (libcall_htab,
5778 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5779 add_libcall (libcall_htab,
5780 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5781
5782 add_libcall (libcall_htab,
5783 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5784 add_libcall (libcall_htab,
5785 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5786 add_libcall (libcall_htab,
5787 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5788 add_libcall (libcall_htab,
5789 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5790 add_libcall (libcall_htab,
5791 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5792 add_libcall (libcall_htab,
5793 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5794 add_libcall (libcall_htab,
5795 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5796 add_libcall (libcall_htab,
5797 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5798
5799 /* Values from double-precision helper functions are returned in core
5800 registers if the selected core only supports single-precision
5801 arithmetic, even if we are using the hard-float ABI. The same is
5802 true for single-precision helpers except in case of MVE, because in
5803 MVE we will be using the hard-float ABI on a CPU which doesn't support
5804 single-precision operations in hardware. In MVE the following check
5805 enables use of emulation for the single-precision arithmetic
5806 operations. */
5807 if (TARGET_HAVE_MVE)
5808 {
5809 add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5810 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5811 add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5812 add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5813 add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5814 add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5815 add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5816 add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5817 add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5818 add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5819 add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5820 }
5821 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5822 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5823 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5824 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5825 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5826 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5827 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5828 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5829 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5830 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5831 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5832 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5833 SFmode));
5834 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5835 DFmode));
5836 add_libcall (libcall_htab,
5837 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5838 }
5839
5840 return libcall && libcall_htab->find (libcall) != NULL;
5841 }
5842
5843 static rtx
5844 arm_libcall_value_1 (machine_mode mode)
5845 {
5846 if (TARGET_AAPCS_BASED)
5847 return aapcs_libcall_value (mode);
5848 else if (TARGET_IWMMXT_ABI
5849 && arm_vector_mode_supported_p (mode))
5850 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5851 else
5852 return gen_rtx_REG (mode, ARG_REGISTER (1));
5853 }
5854
5855 /* Define how to find the value returned by a library function
5856 assuming the value has mode MODE. */
5857
5858 static rtx
5859 arm_libcall_value (machine_mode mode, const_rtx libcall)
5860 {
5861 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5862 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5863 {
5864 /* The following libcalls return their result in integer registers,
5865 even though they return a floating point value. */
5866 if (arm_libcall_uses_aapcs_base (libcall))
5867 return gen_rtx_REG (mode, ARG_REGISTER(1));
5868
5869 }
5870
5871 return arm_libcall_value_1 (mode);
5872 }
5873
5874 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5875
5876 static bool
5877 arm_function_value_regno_p (const unsigned int regno)
5878 {
5879 if (regno == ARG_REGISTER (1)
5880 || (TARGET_32BIT
5881 && TARGET_AAPCS_BASED
5882 && TARGET_HARD_FLOAT
5883 && regno == FIRST_VFP_REGNUM)
5884 || (TARGET_IWMMXT_ABI
5885 && regno == FIRST_IWMMXT_REGNUM))
5886 return true;
5887
5888 return false;
5889 }
5890
5891 /* Determine the amount of memory needed to store the possible return
5892 registers of an untyped call. */
5893 int
5894 arm_apply_result_size (void)
5895 {
5896 int size = 16;
5897
5898 if (TARGET_32BIT)
5899 {
5900 if (TARGET_HARD_FLOAT_ABI)
5901 size += 32;
5902 if (TARGET_IWMMXT_ABI)
5903 size += 8;
5904 }
5905
5906 return size;
5907 }
5908
5909 /* Decide whether TYPE should be returned in memory (true)
5910 or in a register (false). FNTYPE is the type of the function making
5911 the call. */
5912 static bool
5913 arm_return_in_memory (const_tree type, const_tree fntype)
5914 {
5915 HOST_WIDE_INT size;
5916
5917 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5918
5919 if (TARGET_AAPCS_BASED)
5920 {
5921 /* Simple, non-aggregate types (ie not including vectors and
5922 complex) are always returned in a register (or registers).
5923 We don't care about which register here, so we can short-cut
5924 some of the detail. */
5925 if (!AGGREGATE_TYPE_P (type)
5926 && TREE_CODE (type) != VECTOR_TYPE
5927 && TREE_CODE (type) != COMPLEX_TYPE)
5928 return false;
5929
5930 /* Any return value that is no larger than one word can be
5931 returned in r0. */
5932 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5933 return false;
5934
5935 /* Check any available co-processors to see if they accept the
5936 type as a register candidate (VFP, for example, can return
5937 some aggregates in consecutive registers). These aren't
5938 available if the call is variadic. */
5939 if (aapcs_select_return_coproc (type, fntype) >= 0)
5940 return false;
5941
5942 /* Vector values should be returned using ARM registers, not
5943 memory (unless they're over 16 bytes, which will break since
5944 we only have four call-clobbered registers to play with). */
5945 if (TREE_CODE (type) == VECTOR_TYPE)
5946 return (size < 0 || size > (4 * UNITS_PER_WORD));
5947
5948 /* The rest go in memory. */
5949 return true;
5950 }
5951
5952 if (TREE_CODE (type) == VECTOR_TYPE)
5953 return (size < 0 || size > (4 * UNITS_PER_WORD));
5954
5955 if (!AGGREGATE_TYPE_P (type) &&
5956 (TREE_CODE (type) != VECTOR_TYPE))
5957 /* All simple types are returned in registers. */
5958 return false;
5959
5960 if (arm_abi != ARM_ABI_APCS)
5961 {
5962 /* ATPCS and later return aggregate types in memory only if they are
5963 larger than a word (or are variable size). */
5964 return (size < 0 || size > UNITS_PER_WORD);
5965 }
5966
5967 /* For the arm-wince targets we choose to be compatible with Microsoft's
5968 ARM and Thumb compilers, which always return aggregates in memory. */
5969 #ifndef ARM_WINCE
5970 /* All structures/unions bigger than one word are returned in memory.
5971 Also catch the case where int_size_in_bytes returns -1. In this case
5972 the aggregate is either huge or of variable size, and in either case
5973 we will want to return it via memory and not in a register. */
5974 if (size < 0 || size > UNITS_PER_WORD)
5975 return true;
5976
5977 if (TREE_CODE (type) == RECORD_TYPE)
5978 {
5979 tree field;
5980
5981 /* For a struct the APCS says that we only return in a register
5982 if the type is 'integer like' and every addressable element
5983 has an offset of zero. For practical purposes this means
5984 that the structure can have at most one non bit-field element
5985 and that this element must be the first one in the structure. */
5986
5987 /* Find the first field, ignoring non FIELD_DECL things which will
5988 have been created by C++. */
5989 /* NOTE: This code is deprecated and has not been updated to handle
5990 DECL_FIELD_ABI_IGNORED. */
5991 for (field = TYPE_FIELDS (type);
5992 field && TREE_CODE (field) != FIELD_DECL;
5993 field = DECL_CHAIN (field))
5994 continue;
5995
5996 if (field == NULL)
5997 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5998
5999 /* Check that the first field is valid for returning in a register. */
6000
6001 /* ... Floats are not allowed */
6002 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6003 return true;
6004
6005 /* ... Aggregates that are not themselves valid for returning in
6006 a register are not allowed. */
6007 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6008 return true;
6009
6010 /* Now check the remaining fields, if any. Only bitfields are allowed,
6011 since they are not addressable. */
6012 for (field = DECL_CHAIN (field);
6013 field;
6014 field = DECL_CHAIN (field))
6015 {
6016 if (TREE_CODE (field) != FIELD_DECL)
6017 continue;
6018
6019 if (!DECL_BIT_FIELD_TYPE (field))
6020 return true;
6021 }
6022
6023 return false;
6024 }
6025
6026 if (TREE_CODE (type) == UNION_TYPE)
6027 {
6028 tree field;
6029
6030 /* Unions can be returned in registers if every element is
6031 integral, or can be returned in an integer register. */
6032 for (field = TYPE_FIELDS (type);
6033 field;
6034 field = DECL_CHAIN (field))
6035 {
6036 if (TREE_CODE (field) != FIELD_DECL)
6037 continue;
6038
6039 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6040 return true;
6041
6042 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6043 return true;
6044 }
6045
6046 return false;
6047 }
6048 #endif /* not ARM_WINCE */
6049
6050 /* Return all other types in memory. */
6051 return true;
6052 }
6053
6054 const struct pcs_attribute_arg
6055 {
6056 const char *arg;
6057 enum arm_pcs value;
6058 } pcs_attribute_args[] =
6059 {
6060 {"aapcs", ARM_PCS_AAPCS},
6061 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6062 #if 0
6063 /* We could recognize these, but changes would be needed elsewhere
6064 * to implement them. */
6065 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6066 {"atpcs", ARM_PCS_ATPCS},
6067 {"apcs", ARM_PCS_APCS},
6068 #endif
6069 {NULL, ARM_PCS_UNKNOWN}
6070 };
6071
6072 static enum arm_pcs
6073 arm_pcs_from_attribute (tree attr)
6074 {
6075 const struct pcs_attribute_arg *ptr;
6076 const char *arg;
6077
6078 /* Get the value of the argument. */
6079 if (TREE_VALUE (attr) == NULL_TREE
6080 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6081 return ARM_PCS_UNKNOWN;
6082
6083 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6084
6085 /* Check it against the list of known arguments. */
6086 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6087 if (streq (arg, ptr->arg))
6088 return ptr->value;
6089
6090 /* An unrecognized interrupt type. */
6091 return ARM_PCS_UNKNOWN;
6092 }
6093
6094 /* Get the PCS variant to use for this call. TYPE is the function's type
6095 specification, DECL is the specific declartion. DECL may be null if
6096 the call could be indirect or if this is a library call. */
6097 static enum arm_pcs
6098 arm_get_pcs_model (const_tree type, const_tree decl)
6099 {
6100 bool user_convention = false;
6101 enum arm_pcs user_pcs = arm_pcs_default;
6102 tree attr;
6103
6104 gcc_assert (type);
6105
6106 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6107 if (attr)
6108 {
6109 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6110 user_convention = true;
6111 }
6112
6113 if (TARGET_AAPCS_BASED)
6114 {
6115 /* Detect varargs functions. These always use the base rules
6116 (no argument is ever a candidate for a co-processor
6117 register). */
6118 bool base_rules = stdarg_p (type);
6119
6120 if (user_convention)
6121 {
6122 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6123 sorry ("non-AAPCS derived PCS variant");
6124 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6125 error ("variadic functions must use the base AAPCS variant");
6126 }
6127
6128 if (base_rules)
6129 return ARM_PCS_AAPCS;
6130 else if (user_convention)
6131 return user_pcs;
6132 else if (decl && flag_unit_at_a_time)
6133 {
6134 /* Local functions never leak outside this compilation unit,
6135 so we are free to use whatever conventions are
6136 appropriate. */
6137 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6138 cgraph_node *local_info_node
6139 = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6140 if (local_info_node && local_info_node->local)
6141 return ARM_PCS_AAPCS_LOCAL;
6142 }
6143 }
6144 else if (user_convention && user_pcs != arm_pcs_default)
6145 sorry ("PCS variant");
6146
6147 /* For everything else we use the target's default. */
6148 return arm_pcs_default;
6149 }
6150
6151
6152 static void
6153 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6154 const_tree fntype ATTRIBUTE_UNUSED,
6155 rtx libcall ATTRIBUTE_UNUSED,
6156 const_tree fndecl ATTRIBUTE_UNUSED)
6157 {
6158 /* Record the unallocated VFP registers. */
6159 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6160 pcum->aapcs_vfp_reg_alloc = 0;
6161 }
6162
6163 /* Bitmasks that indicate whether earlier versions of GCC would have
6164 taken a different path through the ABI logic. This should result in
6165 a -Wpsabi warning if the earlier path led to a different ABI decision.
6166
6167 WARN_PSABI_EMPTY_CXX17_BASE
6168 Indicates that the type includes an artificial empty C++17 base field
6169 that, prior to GCC 10.1, would prevent the type from being treated as
6170 a HFA or HVA. See PR94711 for details.
6171
6172 WARN_PSABI_NO_UNIQUE_ADDRESS
6173 Indicates that the type includes an empty [[no_unique_address]] field
6174 that, prior to GCC 10.1, would prevent the type from being treated as
6175 a HFA or HVA. */
6176 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6177 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6178
6179 /* Walk down the type tree of TYPE counting consecutive base elements.
6180 If *MODEP is VOIDmode, then set it to the first valid floating point
6181 type. If a non-floating point type is found, or if a floating point
6182 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6183 otherwise return the count in the sub-tree.
6184
6185 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6186 function has changed its behavior relative to earlier versions of GCC.
6187 Normally the argument should be nonnull and point to a zero-initialized
6188 variable. The function then records whether the ABI decision might
6189 be affected by a known fix to the ABI logic, setting the associated
6190 WARN_PSABI_* bits if so.
6191
6192 When the argument is instead a null pointer, the function tries to
6193 simulate the behavior of GCC before all such ABI fixes were made.
6194 This is useful to check whether the function returns something
6195 different after the ABI fixes. */
6196 static int
6197 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6198 unsigned int *warn_psabi_flags)
6199 {
6200 machine_mode mode;
6201 HOST_WIDE_INT size;
6202
6203 switch (TREE_CODE (type))
6204 {
6205 case REAL_TYPE:
6206 mode = TYPE_MODE (type);
6207 if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6208 return -1;
6209
6210 if (*modep == VOIDmode)
6211 *modep = mode;
6212
6213 if (*modep == mode)
6214 return 1;
6215
6216 break;
6217
6218 case COMPLEX_TYPE:
6219 mode = TYPE_MODE (TREE_TYPE (type));
6220 if (mode != DFmode && mode != SFmode)
6221 return -1;
6222
6223 if (*modep == VOIDmode)
6224 *modep = mode;
6225
6226 if (*modep == mode)
6227 return 2;
6228
6229 break;
6230
6231 case VECTOR_TYPE:
6232 /* Use V2SImode and V4SImode as representatives of all 64-bit
6233 and 128-bit vector types, whether or not those modes are
6234 supported with the present options. */
6235 size = int_size_in_bytes (type);
6236 switch (size)
6237 {
6238 case 8:
6239 mode = V2SImode;
6240 break;
6241 case 16:
6242 mode = V4SImode;
6243 break;
6244 default:
6245 return -1;
6246 }
6247
6248 if (*modep == VOIDmode)
6249 *modep = mode;
6250
6251 /* Vector modes are considered to be opaque: two vectors are
6252 equivalent for the purposes of being homogeneous aggregates
6253 if they are the same size. */
6254 if (*modep == mode)
6255 return 1;
6256
6257 break;
6258
6259 case ARRAY_TYPE:
6260 {
6261 int count;
6262 tree index = TYPE_DOMAIN (type);
6263
6264 /* Can't handle incomplete types nor sizes that are not
6265 fixed. */
6266 if (!COMPLETE_TYPE_P (type)
6267 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6268 return -1;
6269
6270 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6271 warn_psabi_flags);
6272 if (count == -1
6273 || !index
6274 || !TYPE_MAX_VALUE (index)
6275 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6276 || !TYPE_MIN_VALUE (index)
6277 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6278 || count < 0)
6279 return -1;
6280
6281 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6282 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6283
6284 /* There must be no padding. */
6285 if (wi::to_wide (TYPE_SIZE (type))
6286 != count * GET_MODE_BITSIZE (*modep))
6287 return -1;
6288
6289 return count;
6290 }
6291
6292 case RECORD_TYPE:
6293 {
6294 int count = 0;
6295 int sub_count;
6296 tree field;
6297
6298 /* Can't handle incomplete types nor sizes that are not
6299 fixed. */
6300 if (!COMPLETE_TYPE_P (type)
6301 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6302 return -1;
6303
6304 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6305 {
6306 if (TREE_CODE (field) != FIELD_DECL)
6307 continue;
6308
6309 if (DECL_FIELD_ABI_IGNORED (field))
6310 {
6311 /* See whether this is something that earlier versions of
6312 GCC failed to ignore. */
6313 unsigned int flag;
6314 if (lookup_attribute ("no_unique_address",
6315 DECL_ATTRIBUTES (field)))
6316 flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6317 else if (cxx17_empty_base_field_p (field))
6318 flag = WARN_PSABI_EMPTY_CXX17_BASE;
6319 else
6320 /* No compatibility problem. */
6321 continue;
6322
6323 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6324 if (warn_psabi_flags)
6325 {
6326 *warn_psabi_flags |= flag;
6327 continue;
6328 }
6329 }
6330
6331 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6332 warn_psabi_flags);
6333 if (sub_count < 0)
6334 return -1;
6335 count += sub_count;
6336 }
6337
6338 /* There must be no padding. */
6339 if (wi::to_wide (TYPE_SIZE (type))
6340 != count * GET_MODE_BITSIZE (*modep))
6341 return -1;
6342
6343 return count;
6344 }
6345
6346 case UNION_TYPE:
6347 case QUAL_UNION_TYPE:
6348 {
6349 /* These aren't very interesting except in a degenerate case. */
6350 int count = 0;
6351 int sub_count;
6352 tree field;
6353
6354 /* Can't handle incomplete types nor sizes that are not
6355 fixed. */
6356 if (!COMPLETE_TYPE_P (type)
6357 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6358 return -1;
6359
6360 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6361 {
6362 if (TREE_CODE (field) != FIELD_DECL)
6363 continue;
6364
6365 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6366 warn_psabi_flags);
6367 if (sub_count < 0)
6368 return -1;
6369 count = count > sub_count ? count : sub_count;
6370 }
6371
6372 /* There must be no padding. */
6373 if (wi::to_wide (TYPE_SIZE (type))
6374 != count * GET_MODE_BITSIZE (*modep))
6375 return -1;
6376
6377 return count;
6378 }
6379
6380 default:
6381 break;
6382 }
6383
6384 return -1;
6385 }
6386
6387 /* Return true if PCS_VARIANT should use VFP registers. */
6388 static bool
6389 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6390 {
6391 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6392 {
6393 static bool seen_thumb1_vfp = false;
6394
6395 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6396 {
6397 sorry ("Thumb-1 hard-float VFP ABI");
6398 /* sorry() is not immediately fatal, so only display this once. */
6399 seen_thumb1_vfp = true;
6400 }
6401
6402 return true;
6403 }
6404
6405 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6406 return false;
6407
6408 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6409 (TARGET_VFP_DOUBLE || !is_double));
6410 }
6411
6412 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6413 suitable for passing or returning in VFP registers for the PCS
6414 variant selected. If it is, then *BASE_MODE is updated to contain
6415 a machine mode describing each element of the argument's type and
6416 *COUNT to hold the number of such elements. */
6417 static bool
6418 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6419 machine_mode mode, const_tree type,
6420 machine_mode *base_mode, int *count)
6421 {
6422 machine_mode new_mode = VOIDmode;
6423
6424 /* If we have the type information, prefer that to working things
6425 out from the mode. */
6426 if (type)
6427 {
6428 unsigned int warn_psabi_flags = 0;
6429 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6430 &warn_psabi_flags);
6431 if (ag_count > 0 && ag_count <= 4)
6432 {
6433 static unsigned last_reported_type_uid;
6434 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6435 int alt;
6436 if (warn_psabi
6437 && warn_psabi_flags
6438 && uid != last_reported_type_uid
6439 && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6440 != ag_count))
6441 {
6442 const char *url
6443 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6444 gcc_assert (alt == -1);
6445 last_reported_type_uid = uid;
6446 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6447 qualification. */
6448 if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6449 inform (input_location, "parameter passing for argument of "
6450 "type %qT with %<[[no_unique_address]]%> members "
6451 "changed %{in GCC 10.1%}",
6452 TYPE_MAIN_VARIANT (type), url);
6453 else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6454 inform (input_location, "parameter passing for argument of "
6455 "type %qT when C++17 is enabled changed to match "
6456 "C++14 %{in GCC 10.1%}",
6457 TYPE_MAIN_VARIANT (type), url);
6458 }
6459 *count = ag_count;
6460 }
6461 else
6462 return false;
6463 }
6464 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6465 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6466 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6467 {
6468 *count = 1;
6469 new_mode = mode;
6470 }
6471 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6472 {
6473 *count = 2;
6474 new_mode = (mode == DCmode ? DFmode : SFmode);
6475 }
6476 else
6477 return false;
6478
6479
6480 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6481 return false;
6482
6483 *base_mode = new_mode;
6484
6485 if (TARGET_GENERAL_REGS_ONLY)
6486 error ("argument of type %qT not permitted with -mgeneral-regs-only",
6487 type);
6488
6489 return true;
6490 }
6491
6492 static bool
6493 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6494 machine_mode mode, const_tree type)
6495 {
6496 int count ATTRIBUTE_UNUSED;
6497 machine_mode ag_mode ATTRIBUTE_UNUSED;
6498
6499 if (!use_vfp_abi (pcs_variant, false))
6500 return false;
6501 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6502 &ag_mode, &count);
6503 }
6504
6505 static bool
6506 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6507 const_tree type)
6508 {
6509 if (!use_vfp_abi (pcum->pcs_variant, false))
6510 return false;
6511
6512 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6513 &pcum->aapcs_vfp_rmode,
6514 &pcum->aapcs_vfp_rcount);
6515 }
6516
6517 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6518 for the behaviour of this function. */
6519
6520 static bool
6521 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6522 const_tree type ATTRIBUTE_UNUSED)
6523 {
6524 int rmode_size
6525 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6526 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6527 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6528 int regno;
6529
6530 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6531 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6532 {
6533 pcum->aapcs_vfp_reg_alloc = mask << regno;
6534 if (mode == BLKmode
6535 || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6536 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6537 {
6538 int i;
6539 int rcount = pcum->aapcs_vfp_rcount;
6540 int rshift = shift;
6541 machine_mode rmode = pcum->aapcs_vfp_rmode;
6542 rtx par;
6543 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6544 {
6545 /* Avoid using unsupported vector modes. */
6546 if (rmode == V2SImode)
6547 rmode = DImode;
6548 else if (rmode == V4SImode)
6549 {
6550 rmode = DImode;
6551 rcount *= 2;
6552 rshift /= 2;
6553 }
6554 }
6555 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6556 for (i = 0; i < rcount; i++)
6557 {
6558 rtx tmp = gen_rtx_REG (rmode,
6559 FIRST_VFP_REGNUM + regno + i * rshift);
6560 tmp = gen_rtx_EXPR_LIST
6561 (VOIDmode, tmp,
6562 GEN_INT (i * GET_MODE_SIZE (rmode)));
6563 XVECEXP (par, 0, i) = tmp;
6564 }
6565
6566 pcum->aapcs_reg = par;
6567 }
6568 else
6569 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6570 return true;
6571 }
6572 return false;
6573 }
6574
6575 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6576 comment there for the behaviour of this function. */
6577
6578 static rtx
6579 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6580 machine_mode mode,
6581 const_tree type ATTRIBUTE_UNUSED)
6582 {
6583 if (!use_vfp_abi (pcs_variant, false))
6584 return NULL;
6585
6586 if (mode == BLKmode
6587 || (GET_MODE_CLASS (mode) == MODE_INT
6588 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6589 && !(TARGET_NEON || TARGET_HAVE_MVE)))
6590 {
6591 int count;
6592 machine_mode ag_mode;
6593 int i;
6594 rtx par;
6595 int shift;
6596
6597 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6598 &ag_mode, &count);
6599
6600 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6601 {
6602 if (ag_mode == V2SImode)
6603 ag_mode = DImode;
6604 else if (ag_mode == V4SImode)
6605 {
6606 ag_mode = DImode;
6607 count *= 2;
6608 }
6609 }
6610 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6611 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6612 for (i = 0; i < count; i++)
6613 {
6614 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6615 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6616 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6617 XVECEXP (par, 0, i) = tmp;
6618 }
6619
6620 return par;
6621 }
6622
6623 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6624 }
6625
6626 static void
6627 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6628 machine_mode mode ATTRIBUTE_UNUSED,
6629 const_tree type ATTRIBUTE_UNUSED)
6630 {
6631 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6632 pcum->aapcs_vfp_reg_alloc = 0;
6633 return;
6634 }
6635
6636 #define AAPCS_CP(X) \
6637 { \
6638 aapcs_ ## X ## _cum_init, \
6639 aapcs_ ## X ## _is_call_candidate, \
6640 aapcs_ ## X ## _allocate, \
6641 aapcs_ ## X ## _is_return_candidate, \
6642 aapcs_ ## X ## _allocate_return_reg, \
6643 aapcs_ ## X ## _advance \
6644 }
6645
6646 /* Table of co-processors that can be used to pass arguments in
6647 registers. Idealy no arugment should be a candidate for more than
6648 one co-processor table entry, but the table is processed in order
6649 and stops after the first match. If that entry then fails to put
6650 the argument into a co-processor register, the argument will go on
6651 the stack. */
6652 static struct
6653 {
6654 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6655 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6656
6657 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6658 BLKmode) is a candidate for this co-processor's registers; this
6659 function should ignore any position-dependent state in
6660 CUMULATIVE_ARGS and only use call-type dependent information. */
6661 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6662
6663 /* Return true if the argument does get a co-processor register; it
6664 should set aapcs_reg to an RTX of the register allocated as is
6665 required for a return from FUNCTION_ARG. */
6666 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6667
6668 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6669 be returned in this co-processor's registers. */
6670 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6671
6672 /* Allocate and return an RTX element to hold the return type of a call. This
6673 routine must not fail and will only be called if is_return_candidate
6674 returned true with the same parameters. */
6675 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6676
6677 /* Finish processing this argument and prepare to start processing
6678 the next one. */
6679 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6680 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6681 {
6682 AAPCS_CP(vfp)
6683 };
6684
6685 #undef AAPCS_CP
6686
6687 static int
6688 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6689 const_tree type)
6690 {
6691 int i;
6692
6693 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6694 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6695 return i;
6696
6697 return -1;
6698 }
6699
6700 static int
6701 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6702 {
6703 /* We aren't passed a decl, so we can't check that a call is local.
6704 However, it isn't clear that that would be a win anyway, since it
6705 might limit some tail-calling opportunities. */
6706 enum arm_pcs pcs_variant;
6707
6708 if (fntype)
6709 {
6710 const_tree fndecl = NULL_TREE;
6711
6712 if (TREE_CODE (fntype) == FUNCTION_DECL)
6713 {
6714 fndecl = fntype;
6715 fntype = TREE_TYPE (fntype);
6716 }
6717
6718 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6719 }
6720 else
6721 pcs_variant = arm_pcs_default;
6722
6723 if (pcs_variant != ARM_PCS_AAPCS)
6724 {
6725 int i;
6726
6727 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6728 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6729 TYPE_MODE (type),
6730 type))
6731 return i;
6732 }
6733 return -1;
6734 }
6735
6736 static rtx
6737 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6738 const_tree fntype)
6739 {
6740 /* We aren't passed a decl, so we can't check that a call is local.
6741 However, it isn't clear that that would be a win anyway, since it
6742 might limit some tail-calling opportunities. */
6743 enum arm_pcs pcs_variant;
6744 int unsignedp ATTRIBUTE_UNUSED;
6745
6746 if (fntype)
6747 {
6748 const_tree fndecl = NULL_TREE;
6749
6750 if (TREE_CODE (fntype) == FUNCTION_DECL)
6751 {
6752 fndecl = fntype;
6753 fntype = TREE_TYPE (fntype);
6754 }
6755
6756 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6757 }
6758 else
6759 pcs_variant = arm_pcs_default;
6760
6761 /* Promote integer types. */
6762 if (type && INTEGRAL_TYPE_P (type))
6763 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6764
6765 if (pcs_variant != ARM_PCS_AAPCS)
6766 {
6767 int i;
6768
6769 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6770 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6771 type))
6772 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6773 mode, type);
6774 }
6775
6776 /* Promotes small structs returned in a register to full-word size
6777 for big-endian AAPCS. */
6778 if (type && arm_return_in_msb (type))
6779 {
6780 HOST_WIDE_INT size = int_size_in_bytes (type);
6781 if (size % UNITS_PER_WORD != 0)
6782 {
6783 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6784 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6785 }
6786 }
6787
6788 return gen_rtx_REG (mode, R0_REGNUM);
6789 }
6790
6791 static rtx
6792 aapcs_libcall_value (machine_mode mode)
6793 {
6794 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6795 && GET_MODE_SIZE (mode) <= 4)
6796 mode = SImode;
6797
6798 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6799 }
6800
6801 /* Lay out a function argument using the AAPCS rules. The rule
6802 numbers referred to here are those in the AAPCS. */
6803 static void
6804 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6805 const_tree type, bool named)
6806 {
6807 int nregs, nregs2;
6808 int ncrn;
6809
6810 /* We only need to do this once per argument. */
6811 if (pcum->aapcs_arg_processed)
6812 return;
6813
6814 pcum->aapcs_arg_processed = true;
6815
6816 /* Special case: if named is false then we are handling an incoming
6817 anonymous argument which is on the stack. */
6818 if (!named)
6819 return;
6820
6821 /* Is this a potential co-processor register candidate? */
6822 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6823 {
6824 int slot = aapcs_select_call_coproc (pcum, mode, type);
6825 pcum->aapcs_cprc_slot = slot;
6826
6827 /* We don't have to apply any of the rules from part B of the
6828 preparation phase, these are handled elsewhere in the
6829 compiler. */
6830
6831 if (slot >= 0)
6832 {
6833 /* A Co-processor register candidate goes either in its own
6834 class of registers or on the stack. */
6835 if (!pcum->aapcs_cprc_failed[slot])
6836 {
6837 /* C1.cp - Try to allocate the argument to co-processor
6838 registers. */
6839 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6840 return;
6841
6842 /* C2.cp - Put the argument on the stack and note that we
6843 can't assign any more candidates in this slot. We also
6844 need to note that we have allocated stack space, so that
6845 we won't later try to split a non-cprc candidate between
6846 core registers and the stack. */
6847 pcum->aapcs_cprc_failed[slot] = true;
6848 pcum->can_split = false;
6849 }
6850
6851 /* We didn't get a register, so this argument goes on the
6852 stack. */
6853 gcc_assert (pcum->can_split == false);
6854 return;
6855 }
6856 }
6857
6858 /* C3 - For double-word aligned arguments, round the NCRN up to the
6859 next even number. */
6860 ncrn = pcum->aapcs_ncrn;
6861 if (ncrn & 1)
6862 {
6863 int res = arm_needs_doubleword_align (mode, type);
6864 /* Only warn during RTL expansion of call stmts, otherwise we would
6865 warn e.g. during gimplification even on functions that will be
6866 always inlined, and we'd warn multiple times. Don't warn when
6867 called in expand_function_start either, as we warn instead in
6868 arm_function_arg_boundary in that case. */
6869 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6870 inform (input_location, "parameter passing for argument of type "
6871 "%qT changed in GCC 7.1", type);
6872 else if (res > 0)
6873 ncrn++;
6874 }
6875
6876 nregs = ARM_NUM_REGS2(mode, type);
6877
6878 /* Sigh, this test should really assert that nregs > 0, but a GCC
6879 extension allows empty structs and then gives them empty size; it
6880 then allows such a structure to be passed by value. For some of
6881 the code below we have to pretend that such an argument has
6882 non-zero size so that we 'locate' it correctly either in
6883 registers or on the stack. */
6884 gcc_assert (nregs >= 0);
6885
6886 nregs2 = nregs ? nregs : 1;
6887
6888 /* C4 - Argument fits entirely in core registers. */
6889 if (ncrn + nregs2 <= NUM_ARG_REGS)
6890 {
6891 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6892 pcum->aapcs_next_ncrn = ncrn + nregs;
6893 return;
6894 }
6895
6896 /* C5 - Some core registers left and there are no arguments already
6897 on the stack: split this argument between the remaining core
6898 registers and the stack. */
6899 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6900 {
6901 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6902 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6903 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6904 return;
6905 }
6906
6907 /* C6 - NCRN is set to 4. */
6908 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6909
6910 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6911 return;
6912 }
6913
6914 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6915 for a call to a function whose data type is FNTYPE.
6916 For a library call, FNTYPE is NULL. */
6917 void
6918 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6919 rtx libname,
6920 tree fndecl ATTRIBUTE_UNUSED)
6921 {
6922 /* Long call handling. */
6923 if (fntype)
6924 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6925 else
6926 pcum->pcs_variant = arm_pcs_default;
6927
6928 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6929 {
6930 if (arm_libcall_uses_aapcs_base (libname))
6931 pcum->pcs_variant = ARM_PCS_AAPCS;
6932
6933 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6934 pcum->aapcs_reg = NULL_RTX;
6935 pcum->aapcs_partial = 0;
6936 pcum->aapcs_arg_processed = false;
6937 pcum->aapcs_cprc_slot = -1;
6938 pcum->can_split = true;
6939
6940 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6941 {
6942 int i;
6943
6944 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6945 {
6946 pcum->aapcs_cprc_failed[i] = false;
6947 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6948 }
6949 }
6950 return;
6951 }
6952
6953 /* Legacy ABIs */
6954
6955 /* On the ARM, the offset starts at 0. */
6956 pcum->nregs = 0;
6957 pcum->iwmmxt_nregs = 0;
6958 pcum->can_split = true;
6959
6960 /* Varargs vectors are treated the same as long long.
6961 named_count avoids having to change the way arm handles 'named' */
6962 pcum->named_count = 0;
6963 pcum->nargs = 0;
6964
6965 if (TARGET_REALLY_IWMMXT && fntype)
6966 {
6967 tree fn_arg;
6968
6969 for (fn_arg = TYPE_ARG_TYPES (fntype);
6970 fn_arg;
6971 fn_arg = TREE_CHAIN (fn_arg))
6972 pcum->named_count += 1;
6973
6974 if (! pcum->named_count)
6975 pcum->named_count = INT_MAX;
6976 }
6977 }
6978
6979 /* Return 2 if double word alignment is required for argument passing,
6980 but wasn't required before the fix for PR88469.
6981 Return 1 if double word alignment is required for argument passing.
6982 Return -1 if double word alignment used to be required for argument
6983 passing before PR77728 ABI fix, but is not required anymore.
6984 Return 0 if double word alignment is not required and wasn't requried
6985 before either. */
6986 static int
6987 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6988 {
6989 if (!type)
6990 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6991
6992 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6993 if (!AGGREGATE_TYPE_P (type))
6994 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6995
6996 /* Array types: Use member alignment of element type. */
6997 if (TREE_CODE (type) == ARRAY_TYPE)
6998 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6999
7000 int ret = 0;
7001 int ret2 = 0;
7002 /* Record/aggregate types: Use greatest member alignment of any member.
7003
7004 Note that we explicitly consider zero-sized fields here, even though
7005 they don't map to AAPCS machine types. For example, in:
7006
7007 struct __attribute__((aligned(8))) empty {};
7008
7009 struct s {
7010 [[no_unique_address]] empty e;
7011 int x;
7012 };
7013
7014 "s" contains only one Fundamental Data Type (the int field)
7015 but gains 8-byte alignment and size thanks to "e". */
7016 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7017 if (DECL_ALIGN (field) > PARM_BOUNDARY)
7018 {
7019 if (TREE_CODE (field) == FIELD_DECL)
7020 return 1;
7021 else
7022 /* Before PR77728 fix, we were incorrectly considering also
7023 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7024 Make sure we can warn about that with -Wpsabi. */
7025 ret = -1;
7026 }
7027 else if (TREE_CODE (field) == FIELD_DECL
7028 && DECL_BIT_FIELD_TYPE (field)
7029 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7030 ret2 = 1;
7031
7032 if (ret2)
7033 return 2;
7034
7035 return ret;
7036 }
7037
7038
7039 /* Determine where to put an argument to a function.
7040 Value is zero to push the argument on the stack,
7041 or a hard register in which to store the argument.
7042
7043 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7044 the preceding args and about the function being called.
7045 ARG is a description of the argument.
7046
7047 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7048 other arguments are passed on the stack. If (NAMED == 0) (which happens
7049 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7050 defined), say it is passed in the stack (function_prologue will
7051 indeed make it pass in the stack if necessary). */
7052
7053 static rtx
7054 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7055 {
7056 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7057 int nregs;
7058
7059 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7060 a call insn (op3 of a call_value insn). */
7061 if (arg.end_marker_p ())
7062 return const0_rtx;
7063
7064 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7065 {
7066 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7067 return pcum->aapcs_reg;
7068 }
7069
7070 /* Varargs vectors are treated the same as long long.
7071 named_count avoids having to change the way arm handles 'named' */
7072 if (TARGET_IWMMXT_ABI
7073 && arm_vector_mode_supported_p (arg.mode)
7074 && pcum->named_count > pcum->nargs + 1)
7075 {
7076 if (pcum->iwmmxt_nregs <= 9)
7077 return gen_rtx_REG (arg.mode,
7078 pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7079 else
7080 {
7081 pcum->can_split = false;
7082 return NULL_RTX;
7083 }
7084 }
7085
7086 /* Put doubleword aligned quantities in even register pairs. */
7087 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7088 {
7089 int res = arm_needs_doubleword_align (arg.mode, arg.type);
7090 if (res < 0 && warn_psabi)
7091 inform (input_location, "parameter passing for argument of type "
7092 "%qT changed in GCC 7.1", arg.type);
7093 else if (res > 0)
7094 {
7095 pcum->nregs++;
7096 if (res > 1 && warn_psabi)
7097 inform (input_location, "parameter passing for argument of type "
7098 "%qT changed in GCC 9.1", arg.type);
7099 }
7100 }
7101
7102 /* Only allow splitting an arg between regs and memory if all preceding
7103 args were allocated to regs. For args passed by reference we only count
7104 the reference pointer. */
7105 if (pcum->can_split)
7106 nregs = 1;
7107 else
7108 nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7109
7110 if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7111 return NULL_RTX;
7112
7113 return gen_rtx_REG (arg.mode, pcum->nregs);
7114 }
7115
7116 static unsigned int
7117 arm_function_arg_boundary (machine_mode mode, const_tree type)
7118 {
7119 if (!ARM_DOUBLEWORD_ALIGN)
7120 return PARM_BOUNDARY;
7121
7122 int res = arm_needs_doubleword_align (mode, type);
7123 if (res < 0 && warn_psabi)
7124 inform (input_location, "parameter passing for argument of type %qT "
7125 "changed in GCC 7.1", type);
7126 if (res > 1 && warn_psabi)
7127 inform (input_location, "parameter passing for argument of type "
7128 "%qT changed in GCC 9.1", type);
7129
7130 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7131 }
7132
7133 static int
7134 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7135 {
7136 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7137 int nregs = pcum->nregs;
7138
7139 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7140 {
7141 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7142 return pcum->aapcs_partial;
7143 }
7144
7145 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7146 return 0;
7147
7148 if (NUM_ARG_REGS > nregs
7149 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7150 && pcum->can_split)
7151 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7152
7153 return 0;
7154 }
7155
7156 /* Update the data in PCUM to advance over argument ARG. */
7157
7158 static void
7159 arm_function_arg_advance (cumulative_args_t pcum_v,
7160 const function_arg_info &arg)
7161 {
7162 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7163
7164 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7165 {
7166 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7167
7168 if (pcum->aapcs_cprc_slot >= 0)
7169 {
7170 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7171 arg.type);
7172 pcum->aapcs_cprc_slot = -1;
7173 }
7174
7175 /* Generic stuff. */
7176 pcum->aapcs_arg_processed = false;
7177 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7178 pcum->aapcs_reg = NULL_RTX;
7179 pcum->aapcs_partial = 0;
7180 }
7181 else
7182 {
7183 pcum->nargs += 1;
7184 if (arm_vector_mode_supported_p (arg.mode)
7185 && pcum->named_count > pcum->nargs
7186 && TARGET_IWMMXT_ABI)
7187 pcum->iwmmxt_nregs += 1;
7188 else
7189 pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7190 }
7191 }
7192
7193 /* Variable sized types are passed by reference. This is a GCC
7194 extension to the ARM ABI. */
7195
7196 static bool
7197 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7198 {
7199 return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7200 }
7201 \f
7202 /* Encode the current state of the #pragma [no_]long_calls. */
7203 typedef enum
7204 {
7205 OFF, /* No #pragma [no_]long_calls is in effect. */
7206 LONG, /* #pragma long_calls is in effect. */
7207 SHORT /* #pragma no_long_calls is in effect. */
7208 } arm_pragma_enum;
7209
7210 static arm_pragma_enum arm_pragma_long_calls = OFF;
7211
7212 void
7213 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7214 {
7215 arm_pragma_long_calls = LONG;
7216 }
7217
7218 void
7219 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7220 {
7221 arm_pragma_long_calls = SHORT;
7222 }
7223
7224 void
7225 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7226 {
7227 arm_pragma_long_calls = OFF;
7228 }
7229 \f
7230 /* Handle an attribute requiring a FUNCTION_DECL;
7231 arguments as in struct attribute_spec.handler. */
7232 static tree
7233 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7234 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7235 {
7236 if (TREE_CODE (*node) != FUNCTION_DECL)
7237 {
7238 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7239 name);
7240 *no_add_attrs = true;
7241 }
7242
7243 return NULL_TREE;
7244 }
7245
7246 /* Handle an "interrupt" or "isr" attribute;
7247 arguments as in struct attribute_spec.handler. */
7248 static tree
7249 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7250 bool *no_add_attrs)
7251 {
7252 if (DECL_P (*node))
7253 {
7254 if (TREE_CODE (*node) != FUNCTION_DECL)
7255 {
7256 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7257 name);
7258 *no_add_attrs = true;
7259 }
7260 else if (TARGET_VFP_BASE)
7261 {
7262 warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7263 name);
7264 }
7265 /* FIXME: the argument if any is checked for type attributes;
7266 should it be checked for decl ones? */
7267 }
7268 else
7269 {
7270 if (TREE_CODE (*node) == FUNCTION_TYPE
7271 || TREE_CODE (*node) == METHOD_TYPE)
7272 {
7273 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7274 {
7275 warning (OPT_Wattributes, "%qE attribute ignored",
7276 name);
7277 *no_add_attrs = true;
7278 }
7279 }
7280 else if (TREE_CODE (*node) == POINTER_TYPE
7281 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
7282 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
7283 && arm_isr_value (args) != ARM_FT_UNKNOWN)
7284 {
7285 *node = build_variant_type_copy (*node);
7286 TREE_TYPE (*node) = build_type_attribute_variant
7287 (TREE_TYPE (*node),
7288 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7289 *no_add_attrs = true;
7290 }
7291 else
7292 {
7293 /* Possibly pass this attribute on from the type to a decl. */
7294 if (flags & ((int) ATTR_FLAG_DECL_NEXT
7295 | (int) ATTR_FLAG_FUNCTION_NEXT
7296 | (int) ATTR_FLAG_ARRAY_NEXT))
7297 {
7298 *no_add_attrs = true;
7299 return tree_cons (name, args, NULL_TREE);
7300 }
7301 else
7302 {
7303 warning (OPT_Wattributes, "%qE attribute ignored",
7304 name);
7305 }
7306 }
7307 }
7308
7309 return NULL_TREE;
7310 }
7311
7312 /* Handle a "pcs" attribute; arguments as in struct
7313 attribute_spec.handler. */
7314 static tree
7315 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7316 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7317 {
7318 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7319 {
7320 warning (OPT_Wattributes, "%qE attribute ignored", name);
7321 *no_add_attrs = true;
7322 }
7323 return NULL_TREE;
7324 }
7325
7326 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7327 /* Handle the "notshared" attribute. This attribute is another way of
7328 requesting hidden visibility. ARM's compiler supports
7329 "__declspec(notshared)"; we support the same thing via an
7330 attribute. */
7331
7332 static tree
7333 arm_handle_notshared_attribute (tree *node,
7334 tree name ATTRIBUTE_UNUSED,
7335 tree args ATTRIBUTE_UNUSED,
7336 int flags ATTRIBUTE_UNUSED,
7337 bool *no_add_attrs)
7338 {
7339 tree decl = TYPE_NAME (*node);
7340
7341 if (decl)
7342 {
7343 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7344 DECL_VISIBILITY_SPECIFIED (decl) = 1;
7345 *no_add_attrs = false;
7346 }
7347 return NULL_TREE;
7348 }
7349 #endif
7350
7351 /* This function returns true if a function with declaration FNDECL and type
7352 FNTYPE uses the stack to pass arguments or return variables and false
7353 otherwise. This is used for functions with the attributes
7354 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7355 diagnostic messages if the stack is used. NAME is the name of the attribute
7356 used. */
7357
7358 static bool
7359 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7360 {
7361 function_args_iterator args_iter;
7362 CUMULATIVE_ARGS args_so_far_v;
7363 cumulative_args_t args_so_far;
7364 bool first_param = true;
7365 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7366
7367 /* Error out if any argument is passed on the stack. */
7368 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7369 args_so_far = pack_cumulative_args (&args_so_far_v);
7370 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7371 {
7372 rtx arg_rtx;
7373
7374 prev_arg_type = arg_type;
7375 if (VOID_TYPE_P (arg_type))
7376 continue;
7377
7378 function_arg_info arg (arg_type, /*named=*/true);
7379 if (!first_param)
7380 /* ??? We should advance after processing the argument and pass
7381 the argument we're advancing past. */
7382 arm_function_arg_advance (args_so_far, arg);
7383 arg_rtx = arm_function_arg (args_so_far, arg);
7384 if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7385 {
7386 error ("%qE attribute not available to functions with arguments "
7387 "passed on the stack", name);
7388 return true;
7389 }
7390 first_param = false;
7391 }
7392
7393 /* Error out for variadic functions since we cannot control how many
7394 arguments will be passed and thus stack could be used. stdarg_p () is not
7395 used for the checking to avoid browsing arguments twice. */
7396 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7397 {
7398 error ("%qE attribute not available to functions with variable number "
7399 "of arguments", name);
7400 return true;
7401 }
7402
7403 /* Error out if return value is passed on the stack. */
7404 ret_type = TREE_TYPE (fntype);
7405 if (arm_return_in_memory (ret_type, fntype))
7406 {
7407 error ("%qE attribute not available to functions that return value on "
7408 "the stack", name);
7409 return true;
7410 }
7411 return false;
7412 }
7413
7414 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7415 function will check whether the attribute is allowed here and will add the
7416 attribute to the function declaration tree or otherwise issue a warning. */
7417
7418 static tree
7419 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7420 tree /* args */,
7421 int /* flags */,
7422 bool *no_add_attrs)
7423 {
7424 tree fndecl;
7425
7426 if (!use_cmse)
7427 {
7428 *no_add_attrs = true;
7429 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7430 "option.", name);
7431 return NULL_TREE;
7432 }
7433
7434 /* Ignore attribute for function types. */
7435 if (TREE_CODE (*node) != FUNCTION_DECL)
7436 {
7437 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7438 name);
7439 *no_add_attrs = true;
7440 return NULL_TREE;
7441 }
7442
7443 fndecl = *node;
7444
7445 /* Warn for static linkage functions. */
7446 if (!TREE_PUBLIC (fndecl))
7447 {
7448 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7449 "with static linkage", name);
7450 *no_add_attrs = true;
7451 return NULL_TREE;
7452 }
7453
7454 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7455 TREE_TYPE (fndecl));
7456 return NULL_TREE;
7457 }
7458
7459
7460 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7461 function will check whether the attribute is allowed here and will add the
7462 attribute to the function type tree or otherwise issue a diagnostic. The
7463 reason we check this at declaration time is to only allow the use of the
7464 attribute with declarations of function pointers and not function
7465 declarations. This function checks NODE is of the expected type and issues
7466 diagnostics otherwise using NAME. If it is not of the expected type
7467 *NO_ADD_ATTRS will be set to true. */
7468
7469 static tree
7470 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7471 tree /* args */,
7472 int /* flags */,
7473 bool *no_add_attrs)
7474 {
7475 tree decl = NULL_TREE, fntype = NULL_TREE;
7476 tree type;
7477
7478 if (!use_cmse)
7479 {
7480 *no_add_attrs = true;
7481 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7482 "option.", name);
7483 return NULL_TREE;
7484 }
7485
7486 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7487 {
7488 decl = *node;
7489 fntype = TREE_TYPE (decl);
7490 }
7491
7492 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7493 fntype = TREE_TYPE (fntype);
7494
7495 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7496 {
7497 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7498 "function pointer", name);
7499 *no_add_attrs = true;
7500 return NULL_TREE;
7501 }
7502
7503 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7504
7505 if (*no_add_attrs)
7506 return NULL_TREE;
7507
7508 /* Prevent trees being shared among function types with and without
7509 cmse_nonsecure_call attribute. */
7510 type = TREE_TYPE (decl);
7511
7512 type = build_distinct_type_copy (type);
7513 TREE_TYPE (decl) = type;
7514 fntype = type;
7515
7516 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7517 {
7518 type = fntype;
7519 fntype = TREE_TYPE (fntype);
7520 fntype = build_distinct_type_copy (fntype);
7521 TREE_TYPE (type) = fntype;
7522 }
7523
7524 /* Construct a type attribute and add it to the function type. */
7525 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7526 TYPE_ATTRIBUTES (fntype));
7527 TYPE_ATTRIBUTES (fntype) = attrs;
7528 return NULL_TREE;
7529 }
7530
7531 /* Return 0 if the attributes for two types are incompatible, 1 if they
7532 are compatible, and 2 if they are nearly compatible (which causes a
7533 warning to be generated). */
7534 static int
7535 arm_comp_type_attributes (const_tree type1, const_tree type2)
7536 {
7537 int l1, l2, s1, s2;
7538
7539 tree attrs1 = lookup_attribute ("Advanced SIMD type",
7540 TYPE_ATTRIBUTES (type1));
7541 tree attrs2 = lookup_attribute ("Advanced SIMD type",
7542 TYPE_ATTRIBUTES (type2));
7543 if (bool (attrs1) != bool (attrs2))
7544 return 0;
7545 if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7546 return 0;
7547
7548 /* Check for mismatch of non-default calling convention. */
7549 if (TREE_CODE (type1) != FUNCTION_TYPE)
7550 return 1;
7551
7552 /* Check for mismatched call attributes. */
7553 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7554 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7555 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7556 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7557
7558 /* Only bother to check if an attribute is defined. */
7559 if (l1 | l2 | s1 | s2)
7560 {
7561 /* If one type has an attribute, the other must have the same attribute. */
7562 if ((l1 != l2) || (s1 != s2))
7563 return 0;
7564
7565 /* Disallow mixed attributes. */
7566 if ((l1 & s2) || (l2 & s1))
7567 return 0;
7568 }
7569
7570 /* Check for mismatched ISR attribute. */
7571 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7572 if (! l1)
7573 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7574 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7575 if (! l2)
7576 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7577 if (l1 != l2)
7578 return 0;
7579
7580 l1 = lookup_attribute ("cmse_nonsecure_call",
7581 TYPE_ATTRIBUTES (type1)) != NULL;
7582 l2 = lookup_attribute ("cmse_nonsecure_call",
7583 TYPE_ATTRIBUTES (type2)) != NULL;
7584
7585 if (l1 != l2)
7586 return 0;
7587
7588 return 1;
7589 }
7590
7591 /* Assigns default attributes to newly defined type. This is used to
7592 set short_call/long_call attributes for function types of
7593 functions defined inside corresponding #pragma scopes. */
7594 static void
7595 arm_set_default_type_attributes (tree type)
7596 {
7597 /* Add __attribute__ ((long_call)) to all functions, when
7598 inside #pragma long_calls or __attribute__ ((short_call)),
7599 when inside #pragma no_long_calls. */
7600 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7601 {
7602 tree type_attr_list, attr_name;
7603 type_attr_list = TYPE_ATTRIBUTES (type);
7604
7605 if (arm_pragma_long_calls == LONG)
7606 attr_name = get_identifier ("long_call");
7607 else if (arm_pragma_long_calls == SHORT)
7608 attr_name = get_identifier ("short_call");
7609 else
7610 return;
7611
7612 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7613 TYPE_ATTRIBUTES (type) = type_attr_list;
7614 }
7615 }
7616 \f
7617 /* Return true if DECL is known to be linked into section SECTION. */
7618
7619 static bool
7620 arm_function_in_section_p (tree decl, section *section)
7621 {
7622 /* We can only be certain about the prevailing symbol definition. */
7623 if (!decl_binds_to_current_def_p (decl))
7624 return false;
7625
7626 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7627 if (!DECL_SECTION_NAME (decl))
7628 {
7629 /* Make sure that we will not create a unique section for DECL. */
7630 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7631 return false;
7632 }
7633
7634 return function_section (decl) == section;
7635 }
7636
7637 /* Return nonzero if a 32-bit "long_call" should be generated for
7638 a call from the current function to DECL. We generate a long_call
7639 if the function:
7640
7641 a. has an __attribute__((long call))
7642 or b. is within the scope of a #pragma long_calls
7643 or c. the -mlong-calls command line switch has been specified
7644
7645 However we do not generate a long call if the function:
7646
7647 d. has an __attribute__ ((short_call))
7648 or e. is inside the scope of a #pragma no_long_calls
7649 or f. is defined in the same section as the current function. */
7650
7651 bool
7652 arm_is_long_call_p (tree decl)
7653 {
7654 tree attrs;
7655
7656 if (!decl)
7657 return TARGET_LONG_CALLS;
7658
7659 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7660 if (lookup_attribute ("short_call", attrs))
7661 return false;
7662
7663 /* For "f", be conservative, and only cater for cases in which the
7664 whole of the current function is placed in the same section. */
7665 if (!flag_reorder_blocks_and_partition
7666 && TREE_CODE (decl) == FUNCTION_DECL
7667 && arm_function_in_section_p (decl, current_function_section ()))
7668 return false;
7669
7670 if (lookup_attribute ("long_call", attrs))
7671 return true;
7672
7673 return TARGET_LONG_CALLS;
7674 }
7675
7676 /* Return nonzero if it is ok to make a tail-call to DECL. */
7677 static bool
7678 arm_function_ok_for_sibcall (tree decl, tree exp)
7679 {
7680 unsigned long func_type;
7681
7682 if (cfun->machine->sibcall_blocked)
7683 return false;
7684
7685 if (TARGET_FDPIC)
7686 {
7687 /* In FDPIC, never tailcall something for which we have no decl:
7688 the target function could be in a different module, requiring
7689 a different FDPIC register value. */
7690 if (decl == NULL)
7691 return false;
7692 }
7693
7694 /* Never tailcall something if we are generating code for Thumb-1. */
7695 if (TARGET_THUMB1)
7696 return false;
7697
7698 /* The PIC register is live on entry to VxWorks PLT entries, so we
7699 must make the call before restoring the PIC register. */
7700 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7701 return false;
7702
7703 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7704 may be used both as target of the call and base register for restoring
7705 the VFP registers */
7706 if (TARGET_APCS_FRAME && TARGET_ARM
7707 && TARGET_HARD_FLOAT
7708 && decl && arm_is_long_call_p (decl))
7709 return false;
7710
7711 /* If we are interworking and the function is not declared static
7712 then we can't tail-call it unless we know that it exists in this
7713 compilation unit (since it might be a Thumb routine). */
7714 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7715 && !TREE_ASM_WRITTEN (decl))
7716 return false;
7717
7718 func_type = arm_current_func_type ();
7719 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7720 if (IS_INTERRUPT (func_type))
7721 return false;
7722
7723 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7724 generated for entry functions themselves. */
7725 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7726 return false;
7727
7728 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7729 this would complicate matters for later code generation. */
7730 if (TREE_CODE (exp) == CALL_EXPR)
7731 {
7732 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7733 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7734 return false;
7735 }
7736
7737 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7738 {
7739 /* Check that the return value locations are the same. For
7740 example that we aren't returning a value from the sibling in
7741 a VFP register but then need to transfer it to a core
7742 register. */
7743 rtx a, b;
7744 tree decl_or_type = decl;
7745
7746 /* If it is an indirect function pointer, get the function type. */
7747 if (!decl)
7748 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7749
7750 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7751 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7752 cfun->decl, false);
7753 if (!rtx_equal_p (a, b))
7754 return false;
7755 }
7756
7757 /* Never tailcall if function may be called with a misaligned SP. */
7758 if (IS_STACKALIGN (func_type))
7759 return false;
7760
7761 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7762 references should become a NOP. Don't convert such calls into
7763 sibling calls. */
7764 if (TARGET_AAPCS_BASED
7765 && arm_abi == ARM_ABI_AAPCS
7766 && decl
7767 && DECL_WEAK (decl))
7768 return false;
7769
7770 /* We cannot do a tailcall for an indirect call by descriptor if all the
7771 argument registers are used because the only register left to load the
7772 address is IP and it will already contain the static chain. */
7773 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7774 {
7775 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7776 CUMULATIVE_ARGS cum;
7777 cumulative_args_t cum_v;
7778
7779 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7780 cum_v = pack_cumulative_args (&cum);
7781
7782 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7783 {
7784 tree type = TREE_VALUE (t);
7785 if (!VOID_TYPE_P (type))
7786 {
7787 function_arg_info arg (type, /*named=*/true);
7788 arm_function_arg_advance (cum_v, arg);
7789 }
7790 }
7791
7792 function_arg_info arg (integer_type_node, /*named=*/true);
7793 if (!arm_function_arg (cum_v, arg))
7794 return false;
7795 }
7796
7797 /* Everything else is ok. */
7798 return true;
7799 }
7800
7801 \f
7802 /* Addressing mode support functions. */
7803
7804 /* Return nonzero if X is a legitimate immediate operand when compiling
7805 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7806 int
7807 legitimate_pic_operand_p (rtx x)
7808 {
7809 if (GET_CODE (x) == SYMBOL_REF
7810 || (GET_CODE (x) == CONST
7811 && GET_CODE (XEXP (x, 0)) == PLUS
7812 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7813 return 0;
7814
7815 return 1;
7816 }
7817
7818 /* Record that the current function needs a PIC register. If PIC_REG is null,
7819 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
7820 both case cfun->machine->pic_reg is initialized if we have not already done
7821 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
7822 PIC register is reloaded in the current position of the instruction stream
7823 irregardless of whether it was loaded before. Otherwise, it is only loaded
7824 if not already done so (crtl->uses_pic_offset_table is null). Note that
7825 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7826 is only supported iff COMPUTE_NOW is false. */
7827
7828 static void
7829 require_pic_register (rtx pic_reg, bool compute_now)
7830 {
7831 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7832
7833 /* A lot of the logic here is made obscure by the fact that this
7834 routine gets called as part of the rtx cost estimation process.
7835 We don't want those calls to affect any assumptions about the real
7836 function; and further, we can't call entry_of_function() until we
7837 start the real expansion process. */
7838 if (!crtl->uses_pic_offset_table || compute_now)
7839 {
7840 gcc_assert (can_create_pseudo_p ()
7841 || (pic_reg != NULL_RTX
7842 && REG_P (pic_reg)
7843 && GET_MODE (pic_reg) == Pmode));
7844 if (arm_pic_register != INVALID_REGNUM
7845 && !compute_now
7846 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7847 {
7848 if (!cfun->machine->pic_reg)
7849 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7850
7851 /* Play games to avoid marking the function as needing pic
7852 if we are being called as part of the cost-estimation
7853 process. */
7854 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7855 crtl->uses_pic_offset_table = 1;
7856 }
7857 else
7858 {
7859 rtx_insn *seq, *insn;
7860
7861 if (pic_reg == NULL_RTX)
7862 pic_reg = gen_reg_rtx (Pmode);
7863 if (!cfun->machine->pic_reg)
7864 cfun->machine->pic_reg = pic_reg;
7865
7866 /* Play games to avoid marking the function as needing pic
7867 if we are being called as part of the cost-estimation
7868 process. */
7869 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7870 {
7871 crtl->uses_pic_offset_table = 1;
7872 start_sequence ();
7873
7874 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7875 && arm_pic_register > LAST_LO_REGNUM
7876 && !compute_now)
7877 emit_move_insn (cfun->machine->pic_reg,
7878 gen_rtx_REG (Pmode, arm_pic_register));
7879 else
7880 arm_load_pic_register (0UL, pic_reg);
7881
7882 seq = get_insns ();
7883 end_sequence ();
7884
7885 for (insn = seq; insn; insn = NEXT_INSN (insn))
7886 if (INSN_P (insn))
7887 INSN_LOCATION (insn) = prologue_location;
7888
7889 /* We can be called during expansion of PHI nodes, where
7890 we can't yet emit instructions directly in the final
7891 insn stream. Queue the insns on the entry edge, they will
7892 be committed after everything else is expanded. */
7893 if (currently_expanding_to_rtl)
7894 insert_insn_on_edge (seq,
7895 single_succ_edge
7896 (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7897 else
7898 emit_insn (seq);
7899 }
7900 }
7901 }
7902 }
7903
7904 /* Generate insns to calculate the address of ORIG in pic mode. */
7905 static rtx_insn *
7906 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
7907 {
7908 rtx pat;
7909 rtx mem;
7910
7911 pat = gen_calculate_pic_address (reg, pic_reg, orig);
7912
7913 /* Make the MEM as close to a constant as possible. */
7914 mem = SET_SRC (pat);
7915 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7916 MEM_READONLY_P (mem) = 1;
7917 MEM_NOTRAP_P (mem) = 1;
7918
7919 return emit_insn (pat);
7920 }
7921
7922 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
7923 created to hold the result of the load. If not NULL, PIC_REG indicates
7924 which register to use as PIC register, otherwise it is decided by register
7925 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
7926 location in the instruction stream, irregardless of whether it was loaded
7927 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
7928 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
7929
7930 Returns the register REG into which the PIC load is performed. */
7931
7932 rtx
7933 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
7934 bool compute_now)
7935 {
7936 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7937
7938 if (GET_CODE (orig) == SYMBOL_REF
7939 || GET_CODE (orig) == LABEL_REF)
7940 {
7941 if (reg == 0)
7942 {
7943 gcc_assert (can_create_pseudo_p ());
7944 reg = gen_reg_rtx (Pmode);
7945 }
7946
7947 /* VxWorks does not impose a fixed gap between segments; the run-time
7948 gap can be different from the object-file gap. We therefore can't
7949 use GOTOFF unless we are absolutely sure that the symbol is in the
7950 same segment as the GOT. Unfortunately, the flexibility of linker
7951 scripts means that we can't be sure of that in general, so assume
7952 that GOTOFF is never valid on VxWorks. */
7953 /* References to weak symbols cannot be resolved locally: they
7954 may be overridden by a non-weak definition at link time. */
7955 rtx_insn *insn;
7956 if ((GET_CODE (orig) == LABEL_REF
7957 || (GET_CODE (orig) == SYMBOL_REF
7958 && SYMBOL_REF_LOCAL_P (orig)
7959 && (SYMBOL_REF_DECL (orig)
7960 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
7961 && (!SYMBOL_REF_FUNCTION_P (orig)
7962 || arm_fdpic_local_funcdesc_p (orig))))
7963 && NEED_GOT_RELOC
7964 && arm_pic_data_is_text_relative)
7965 insn = arm_pic_static_addr (orig, reg);
7966 else
7967 {
7968 /* If this function doesn't have a pic register, create one now. */
7969 require_pic_register (pic_reg, compute_now);
7970
7971 if (pic_reg == NULL_RTX)
7972 pic_reg = cfun->machine->pic_reg;
7973
7974 insn = calculate_pic_address_constant (reg, pic_reg, orig);
7975 }
7976
7977 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7978 by loop. */
7979 set_unique_reg_note (insn, REG_EQUAL, orig);
7980
7981 return reg;
7982 }
7983 else if (GET_CODE (orig) == CONST)
7984 {
7985 rtx base, offset;
7986
7987 if (GET_CODE (XEXP (orig, 0)) == PLUS
7988 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7989 return orig;
7990
7991 /* Handle the case where we have: const (UNSPEC_TLS). */
7992 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7993 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7994 return orig;
7995
7996 /* Handle the case where we have:
7997 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7998 CONST_INT. */
7999 if (GET_CODE (XEXP (orig, 0)) == PLUS
8000 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8001 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8002 {
8003 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8004 return orig;
8005 }
8006
8007 if (reg == 0)
8008 {
8009 gcc_assert (can_create_pseudo_p ());
8010 reg = gen_reg_rtx (Pmode);
8011 }
8012
8013 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8014
8015 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8016 pic_reg, compute_now);
8017 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8018 base == reg ? 0 : reg, pic_reg,
8019 compute_now);
8020
8021 if (CONST_INT_P (offset))
8022 {
8023 /* The base register doesn't really matter, we only want to
8024 test the index for the appropriate mode. */
8025 if (!arm_legitimate_index_p (mode, offset, SET, 0))
8026 {
8027 gcc_assert (can_create_pseudo_p ());
8028 offset = force_reg (Pmode, offset);
8029 }
8030
8031 if (CONST_INT_P (offset))
8032 return plus_constant (Pmode, base, INTVAL (offset));
8033 }
8034
8035 if (GET_MODE_SIZE (mode) > 4
8036 && (GET_MODE_CLASS (mode) == MODE_INT
8037 || TARGET_SOFT_FLOAT))
8038 {
8039 emit_insn (gen_addsi3 (reg, base, offset));
8040 return reg;
8041 }
8042
8043 return gen_rtx_PLUS (Pmode, base, offset);
8044 }
8045
8046 return orig;
8047 }
8048
8049
8050 /* Whether a register is callee saved or not. This is necessary because high
8051 registers are marked as caller saved when optimizing for size on Thumb-1
8052 targets despite being callee saved in order to avoid using them. */
8053 #define callee_saved_reg_p(reg) \
8054 (!call_used_or_fixed_reg_p (reg) \
8055 || (TARGET_THUMB1 && optimize_size \
8056 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8057
8058 /* Return a mask for the call-clobbered low registers that are unused
8059 at the end of the prologue. */
8060 static unsigned long
8061 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8062 {
8063 unsigned long mask = 0;
8064 bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8065
8066 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8067 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8068 mask |= 1 << (reg - FIRST_LO_REGNUM);
8069 return mask;
8070 }
8071
8072 /* Similarly for the start of the epilogue. */
8073 static unsigned long
8074 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8075 {
8076 unsigned long mask = 0;
8077 bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8078
8079 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8080 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8081 mask |= 1 << (reg - FIRST_LO_REGNUM);
8082 return mask;
8083 }
8084
8085 /* Find a spare register to use during the prolog of a function. */
8086
8087 static int
8088 thumb_find_work_register (unsigned long pushed_regs_mask)
8089 {
8090 int reg;
8091
8092 unsigned long unused_regs
8093 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8094
8095 /* Check the argument registers first as these are call-used. The
8096 register allocation order means that sometimes r3 might be used
8097 but earlier argument registers might not, so check them all. */
8098 for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8099 if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8100 return reg;
8101
8102 /* Otherwise look for a call-saved register that is going to be pushed. */
8103 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8104 if (pushed_regs_mask & (1 << reg))
8105 return reg;
8106
8107 if (TARGET_THUMB2)
8108 {
8109 /* Thumb-2 can use high regs. */
8110 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8111 if (pushed_regs_mask & (1 << reg))
8112 return reg;
8113 }
8114 /* Something went wrong - thumb_compute_save_reg_mask()
8115 should have arranged for a suitable register to be pushed. */
8116 gcc_unreachable ();
8117 }
8118
8119 static GTY(()) int pic_labelno;
8120
8121 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8122 low register. */
8123
8124 void
8125 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8126 {
8127 rtx l1, labelno, pic_tmp, pic_rtx;
8128
8129 if (crtl->uses_pic_offset_table == 0
8130 || TARGET_SINGLE_PIC_BASE
8131 || TARGET_FDPIC)
8132 return;
8133
8134 gcc_assert (flag_pic);
8135
8136 if (pic_reg == NULL_RTX)
8137 pic_reg = cfun->machine->pic_reg;
8138 if (TARGET_VXWORKS_RTP)
8139 {
8140 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8141 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8142 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8143
8144 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8145
8146 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8147 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8148 }
8149 else
8150 {
8151 /* We use an UNSPEC rather than a LABEL_REF because this label
8152 never appears in the code stream. */
8153
8154 labelno = GEN_INT (pic_labelno++);
8155 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8156 l1 = gen_rtx_CONST (VOIDmode, l1);
8157
8158 /* On the ARM the PC register contains 'dot + 8' at the time of the
8159 addition, on the Thumb it is 'dot + 4'. */
8160 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8161 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8162 UNSPEC_GOTSYM_OFF);
8163 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8164
8165 if (TARGET_32BIT)
8166 {
8167 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8168 }
8169 else /* TARGET_THUMB1 */
8170 {
8171 if (arm_pic_register != INVALID_REGNUM
8172 && REGNO (pic_reg) > LAST_LO_REGNUM)
8173 {
8174 /* We will have pushed the pic register, so we should always be
8175 able to find a work register. */
8176 pic_tmp = gen_rtx_REG (SImode,
8177 thumb_find_work_register (saved_regs));
8178 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8179 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8180 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8181 }
8182 else if (arm_pic_register != INVALID_REGNUM
8183 && arm_pic_register > LAST_LO_REGNUM
8184 && REGNO (pic_reg) <= LAST_LO_REGNUM)
8185 {
8186 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8187 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8188 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8189 }
8190 else
8191 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8192 }
8193 }
8194
8195 /* Need to emit this whether or not we obey regdecls,
8196 since setjmp/longjmp can cause life info to screw up. */
8197 emit_use (pic_reg);
8198 }
8199
8200 /* Try to determine whether an object, referenced via ORIG, will be
8201 placed in the text or data segment. This is used in FDPIC mode, to
8202 decide which relocations to use when accessing ORIG. *IS_READONLY
8203 is set to true if ORIG is a read-only location, false otherwise.
8204 Return true if we could determine the location of ORIG, false
8205 otherwise. *IS_READONLY is valid only when we return true. */
8206 static bool
8207 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8208 {
8209 *is_readonly = false;
8210
8211 if (GET_CODE (orig) == LABEL_REF)
8212 {
8213 *is_readonly = true;
8214 return true;
8215 }
8216
8217 if (SYMBOL_REF_P (orig))
8218 {
8219 if (CONSTANT_POOL_ADDRESS_P (orig))
8220 {
8221 *is_readonly = true;
8222 return true;
8223 }
8224 if (SYMBOL_REF_LOCAL_P (orig)
8225 && !SYMBOL_REF_EXTERNAL_P (orig)
8226 && SYMBOL_REF_DECL (orig)
8227 && (!DECL_P (SYMBOL_REF_DECL (orig))
8228 || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8229 {
8230 tree decl = SYMBOL_REF_DECL (orig);
8231 tree init = (TREE_CODE (decl) == VAR_DECL)
8232 ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8233 ? decl : 0;
8234 int reloc = 0;
8235 bool named_section, readonly;
8236
8237 if (init && init != error_mark_node)
8238 reloc = compute_reloc_for_constant (init);
8239
8240 named_section = TREE_CODE (decl) == VAR_DECL
8241 && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8242 readonly = decl_readonly_section (decl, reloc);
8243
8244 /* We don't know where the link script will put a named
8245 section, so return false in such a case. */
8246 if (named_section)
8247 return false;
8248
8249 *is_readonly = readonly;
8250 return true;
8251 }
8252
8253 /* We don't know. */
8254 return false;
8255 }
8256
8257 gcc_unreachable ();
8258 }
8259
8260 /* Generate code to load the address of a static var when flag_pic is set. */
8261 static rtx_insn *
8262 arm_pic_static_addr (rtx orig, rtx reg)
8263 {
8264 rtx l1, labelno, offset_rtx;
8265 rtx_insn *insn;
8266
8267 gcc_assert (flag_pic);
8268
8269 bool is_readonly = false;
8270 bool info_known = false;
8271
8272 if (TARGET_FDPIC
8273 && SYMBOL_REF_P (orig)
8274 && !SYMBOL_REF_FUNCTION_P (orig))
8275 info_known = arm_is_segment_info_known (orig, &is_readonly);
8276
8277 if (TARGET_FDPIC
8278 && SYMBOL_REF_P (orig)
8279 && !SYMBOL_REF_FUNCTION_P (orig)
8280 && !info_known)
8281 {
8282 /* We don't know where orig is stored, so we have be
8283 pessimistic and use a GOT relocation. */
8284 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8285
8286 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8287 }
8288 else if (TARGET_FDPIC
8289 && SYMBOL_REF_P (orig)
8290 && (SYMBOL_REF_FUNCTION_P (orig)
8291 || !is_readonly))
8292 {
8293 /* We use the GOTOFF relocation. */
8294 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8295
8296 rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8297 emit_insn (gen_movsi (reg, l1));
8298 insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8299 }
8300 else
8301 {
8302 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8303 PC-relative access. */
8304 /* We use an UNSPEC rather than a LABEL_REF because this label
8305 never appears in the code stream. */
8306 labelno = GEN_INT (pic_labelno++);
8307 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8308 l1 = gen_rtx_CONST (VOIDmode, l1);
8309
8310 /* On the ARM the PC register contains 'dot + 8' at the time of the
8311 addition, on the Thumb it is 'dot + 4'. */
8312 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8313 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8314 UNSPEC_SYMBOL_OFFSET);
8315 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8316
8317 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8318 labelno));
8319 }
8320
8321 return insn;
8322 }
8323
8324 /* Return nonzero if X is valid as an ARM state addressing register. */
8325 static int
8326 arm_address_register_rtx_p (rtx x, int strict_p)
8327 {
8328 int regno;
8329
8330 if (!REG_P (x))
8331 return 0;
8332
8333 regno = REGNO (x);
8334
8335 if (strict_p)
8336 return ARM_REGNO_OK_FOR_BASE_P (regno);
8337
8338 return (regno <= LAST_ARM_REGNUM
8339 || regno >= FIRST_PSEUDO_REGISTER
8340 || regno == FRAME_POINTER_REGNUM
8341 || regno == ARG_POINTER_REGNUM);
8342 }
8343
8344 /* Return TRUE if this rtx is the difference of a symbol and a label,
8345 and will reduce to a PC-relative relocation in the object file.
8346 Expressions like this can be left alone when generating PIC, rather
8347 than forced through the GOT. */
8348 static int
8349 pcrel_constant_p (rtx x)
8350 {
8351 if (GET_CODE (x) == MINUS)
8352 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8353
8354 return FALSE;
8355 }
8356
8357 /* Return true if X will surely end up in an index register after next
8358 splitting pass. */
8359 static bool
8360 will_be_in_index_register (const_rtx x)
8361 {
8362 /* arm.md: calculate_pic_address will split this into a register. */
8363 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8364 }
8365
8366 /* Return nonzero if X is a valid ARM state address operand. */
8367 int
8368 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8369 int strict_p)
8370 {
8371 bool use_ldrd;
8372 enum rtx_code code = GET_CODE (x);
8373
8374 if (arm_address_register_rtx_p (x, strict_p))
8375 return 1;
8376
8377 use_ldrd = (TARGET_LDRD
8378 && (mode == DImode || mode == DFmode));
8379
8380 if (code == POST_INC || code == PRE_DEC
8381 || ((code == PRE_INC || code == POST_DEC)
8382 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8383 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8384
8385 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8386 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8387 && GET_CODE (XEXP (x, 1)) == PLUS
8388 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8389 {
8390 rtx addend = XEXP (XEXP (x, 1), 1);
8391
8392 /* Don't allow ldrd post increment by register because it's hard
8393 to fixup invalid register choices. */
8394 if (use_ldrd
8395 && GET_CODE (x) == POST_MODIFY
8396 && REG_P (addend))
8397 return 0;
8398
8399 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8400 && arm_legitimate_index_p (mode, addend, outer, strict_p));
8401 }
8402
8403 /* After reload constants split into minipools will have addresses
8404 from a LABEL_REF. */
8405 else if (reload_completed
8406 && (code == LABEL_REF
8407 || (code == CONST
8408 && GET_CODE (XEXP (x, 0)) == PLUS
8409 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8410 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8411 return 1;
8412
8413 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8414 return 0;
8415
8416 else if (code == PLUS)
8417 {
8418 rtx xop0 = XEXP (x, 0);
8419 rtx xop1 = XEXP (x, 1);
8420
8421 return ((arm_address_register_rtx_p (xop0, strict_p)
8422 && ((CONST_INT_P (xop1)
8423 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8424 || (!strict_p && will_be_in_index_register (xop1))))
8425 || (arm_address_register_rtx_p (xop1, strict_p)
8426 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8427 }
8428
8429 #if 0
8430 /* Reload currently can't handle MINUS, so disable this for now */
8431 else if (GET_CODE (x) == MINUS)
8432 {
8433 rtx xop0 = XEXP (x, 0);
8434 rtx xop1 = XEXP (x, 1);
8435
8436 return (arm_address_register_rtx_p (xop0, strict_p)
8437 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8438 }
8439 #endif
8440
8441 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8442 && code == SYMBOL_REF
8443 && CONSTANT_POOL_ADDRESS_P (x)
8444 && ! (flag_pic
8445 && symbol_mentioned_p (get_pool_constant (x))
8446 && ! pcrel_constant_p (get_pool_constant (x))))
8447 return 1;
8448
8449 return 0;
8450 }
8451
8452 /* Return true if we can avoid creating a constant pool entry for x. */
8453 static bool
8454 can_avoid_literal_pool_for_label_p (rtx x)
8455 {
8456 /* Normally we can assign constant values to target registers without
8457 the help of constant pool. But there are cases we have to use constant
8458 pool like:
8459 1) assign a label to register.
8460 2) sign-extend a 8bit value to 32bit and then assign to register.
8461
8462 Constant pool access in format:
8463 (set (reg r0) (mem (symbol_ref (".LC0"))))
8464 will cause the use of literal pool (later in function arm_reorg).
8465 So here we mark such format as an invalid format, then the compiler
8466 will adjust it into:
8467 (set (reg r0) (symbol_ref (".LC0")))
8468 (set (reg r0) (mem (reg r0))).
8469 No extra register is required, and (mem (reg r0)) won't cause the use
8470 of literal pools. */
8471 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
8472 && CONSTANT_POOL_ADDRESS_P (x))
8473 return 1;
8474 return 0;
8475 }
8476
8477
8478 /* Return nonzero if X is a valid Thumb-2 address operand. */
8479 static int
8480 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8481 {
8482 bool use_ldrd;
8483 enum rtx_code code = GET_CODE (x);
8484
8485 if (TARGET_HAVE_MVE
8486 && (mode == V8QImode || mode == E_V4QImode || mode == V4HImode))
8487 return mve_vector_mem_operand (mode, x, strict_p);
8488
8489 if (arm_address_register_rtx_p (x, strict_p))
8490 return 1;
8491
8492 use_ldrd = (TARGET_LDRD
8493 && (mode == DImode || mode == DFmode));
8494
8495 if (code == POST_INC || code == PRE_DEC
8496 || ((code == PRE_INC || code == POST_DEC)
8497 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8498 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8499
8500 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8501 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8502 && GET_CODE (XEXP (x, 1)) == PLUS
8503 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8504 {
8505 /* Thumb-2 only has autoincrement by constant. */
8506 rtx addend = XEXP (XEXP (x, 1), 1);
8507 HOST_WIDE_INT offset;
8508
8509 if (!CONST_INT_P (addend))
8510 return 0;
8511
8512 offset = INTVAL(addend);
8513 if (GET_MODE_SIZE (mode) <= 4)
8514 return (offset > -256 && offset < 256);
8515
8516 return (use_ldrd && offset > -1024 && offset < 1024
8517 && (offset & 3) == 0);
8518 }
8519
8520 /* After reload constants split into minipools will have addresses
8521 from a LABEL_REF. */
8522 else if (reload_completed
8523 && (code == LABEL_REF
8524 || (code == CONST
8525 && GET_CODE (XEXP (x, 0)) == PLUS
8526 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8527 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8528 return 1;
8529
8530 else if (mode == TImode
8531 || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8532 || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8533 return 0;
8534
8535 else if (code == PLUS)
8536 {
8537 rtx xop0 = XEXP (x, 0);
8538 rtx xop1 = XEXP (x, 1);
8539
8540 return ((arm_address_register_rtx_p (xop0, strict_p)
8541 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8542 || (!strict_p && will_be_in_index_register (xop1))))
8543 || (arm_address_register_rtx_p (xop1, strict_p)
8544 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8545 }
8546
8547 else if (can_avoid_literal_pool_for_label_p (x))
8548 return 0;
8549
8550 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8551 && code == SYMBOL_REF
8552 && CONSTANT_POOL_ADDRESS_P (x)
8553 && ! (flag_pic
8554 && symbol_mentioned_p (get_pool_constant (x))
8555 && ! pcrel_constant_p (get_pool_constant (x))))
8556 return 1;
8557
8558 return 0;
8559 }
8560
8561 /* Return nonzero if INDEX is valid for an address index operand in
8562 ARM state. */
8563 static int
8564 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8565 int strict_p)
8566 {
8567 HOST_WIDE_INT range;
8568 enum rtx_code code = GET_CODE (index);
8569
8570 /* Standard coprocessor addressing modes. */
8571 if (TARGET_HARD_FLOAT
8572 && (mode == SFmode || mode == DFmode))
8573 return (code == CONST_INT && INTVAL (index) < 1024
8574 && INTVAL (index) > -1024
8575 && (INTVAL (index) & 3) == 0);
8576
8577 /* For quad modes, we restrict the constant offset to be slightly less
8578 than what the instruction format permits. We do this because for
8579 quad mode moves, we will actually decompose them into two separate
8580 double-mode reads or writes. INDEX must therefore be a valid
8581 (double-mode) offset and so should INDEX+8. */
8582 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8583 return (code == CONST_INT
8584 && INTVAL (index) < 1016
8585 && INTVAL (index) > -1024
8586 && (INTVAL (index) & 3) == 0);
8587
8588 /* We have no such constraint on double mode offsets, so we permit the
8589 full range of the instruction format. */
8590 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8591 return (code == CONST_INT
8592 && INTVAL (index) < 1024
8593 && INTVAL (index) > -1024
8594 && (INTVAL (index) & 3) == 0);
8595
8596 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8597 return (code == CONST_INT
8598 && INTVAL (index) < 1024
8599 && INTVAL (index) > -1024
8600 && (INTVAL (index) & 3) == 0);
8601
8602 if (arm_address_register_rtx_p (index, strict_p)
8603 && (GET_MODE_SIZE (mode) <= 4))
8604 return 1;
8605
8606 if (mode == DImode || mode == DFmode)
8607 {
8608 if (code == CONST_INT)
8609 {
8610 HOST_WIDE_INT val = INTVAL (index);
8611
8612 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8613 If vldr is selected it uses arm_coproc_mem_operand. */
8614 if (TARGET_LDRD)
8615 return val > -256 && val < 256;
8616 else
8617 return val > -4096 && val < 4092;
8618 }
8619
8620 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8621 }
8622
8623 if (GET_MODE_SIZE (mode) <= 4
8624 && ! (arm_arch4
8625 && (mode == HImode
8626 || mode == HFmode
8627 || (mode == QImode && outer == SIGN_EXTEND))))
8628 {
8629 if (code == MULT)
8630 {
8631 rtx xiop0 = XEXP (index, 0);
8632 rtx xiop1 = XEXP (index, 1);
8633
8634 return ((arm_address_register_rtx_p (xiop0, strict_p)
8635 && power_of_two_operand (xiop1, SImode))
8636 || (arm_address_register_rtx_p (xiop1, strict_p)
8637 && power_of_two_operand (xiop0, SImode)));
8638 }
8639 else if (code == LSHIFTRT || code == ASHIFTRT
8640 || code == ASHIFT || code == ROTATERT)
8641 {
8642 rtx op = XEXP (index, 1);
8643
8644 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8645 && CONST_INT_P (op)
8646 && INTVAL (op) > 0
8647 && INTVAL (op) <= 31);
8648 }
8649 }
8650
8651 /* For ARM v4 we may be doing a sign-extend operation during the
8652 load. */
8653 if (arm_arch4)
8654 {
8655 if (mode == HImode
8656 || mode == HFmode
8657 || (outer == SIGN_EXTEND && mode == QImode))
8658 range = 256;
8659 else
8660 range = 4096;
8661 }
8662 else
8663 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8664
8665 return (code == CONST_INT
8666 && INTVAL (index) < range
8667 && INTVAL (index) > -range);
8668 }
8669
8670 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8671 index operand. i.e. 1, 2, 4 or 8. */
8672 static bool
8673 thumb2_index_mul_operand (rtx op)
8674 {
8675 HOST_WIDE_INT val;
8676
8677 if (!CONST_INT_P (op))
8678 return false;
8679
8680 val = INTVAL(op);
8681 return (val == 1 || val == 2 || val == 4 || val == 8);
8682 }
8683
8684 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8685 static int
8686 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8687 {
8688 enum rtx_code code = GET_CODE (index);
8689
8690 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8691 /* Standard coprocessor addressing modes. */
8692 if (TARGET_VFP_BASE
8693 && (mode == SFmode || mode == DFmode))
8694 return (code == CONST_INT && INTVAL (index) < 1024
8695 /* Thumb-2 allows only > -256 index range for it's core register
8696 load/stores. Since we allow SF/DF in core registers, we have
8697 to use the intersection between -256~4096 (core) and -1024~1024
8698 (coprocessor). */
8699 && INTVAL (index) > -256
8700 && (INTVAL (index) & 3) == 0);
8701
8702 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8703 {
8704 /* For DImode assume values will usually live in core regs
8705 and only allow LDRD addressing modes. */
8706 if (!TARGET_LDRD || mode != DImode)
8707 return (code == CONST_INT
8708 && INTVAL (index) < 1024
8709 && INTVAL (index) > -1024
8710 && (INTVAL (index) & 3) == 0);
8711 }
8712
8713 /* For quad modes, we restrict the constant offset to be slightly less
8714 than what the instruction format permits. We do this because for
8715 quad mode moves, we will actually decompose them into two separate
8716 double-mode reads or writes. INDEX must therefore be a valid
8717 (double-mode) offset and so should INDEX+8. */
8718 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8719 return (code == CONST_INT
8720 && INTVAL (index) < 1016
8721 && INTVAL (index) > -1024
8722 && (INTVAL (index) & 3) == 0);
8723
8724 /* We have no such constraint on double mode offsets, so we permit the
8725 full range of the instruction format. */
8726 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8727 return (code == CONST_INT
8728 && INTVAL (index) < 1024
8729 && INTVAL (index) > -1024
8730 && (INTVAL (index) & 3) == 0);
8731
8732 if (arm_address_register_rtx_p (index, strict_p)
8733 && (GET_MODE_SIZE (mode) <= 4))
8734 return 1;
8735
8736 if (mode == DImode || mode == DFmode)
8737 {
8738 if (code == CONST_INT)
8739 {
8740 HOST_WIDE_INT val = INTVAL (index);
8741 /* Thumb-2 ldrd only has reg+const addressing modes.
8742 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8743 If vldr is selected it uses arm_coproc_mem_operand. */
8744 if (TARGET_LDRD)
8745 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8746 else
8747 return IN_RANGE (val, -255, 4095 - 4);
8748 }
8749 else
8750 return 0;
8751 }
8752
8753 if (code == MULT)
8754 {
8755 rtx xiop0 = XEXP (index, 0);
8756 rtx xiop1 = XEXP (index, 1);
8757
8758 return ((arm_address_register_rtx_p (xiop0, strict_p)
8759 && thumb2_index_mul_operand (xiop1))
8760 || (arm_address_register_rtx_p (xiop1, strict_p)
8761 && thumb2_index_mul_operand (xiop0)));
8762 }
8763 else if (code == ASHIFT)
8764 {
8765 rtx op = XEXP (index, 1);
8766
8767 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8768 && CONST_INT_P (op)
8769 && INTVAL (op) > 0
8770 && INTVAL (op) <= 3);
8771 }
8772
8773 return (code == CONST_INT
8774 && INTVAL (index) < 4096
8775 && INTVAL (index) > -256);
8776 }
8777
8778 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8779 static int
8780 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8781 {
8782 int regno;
8783
8784 if (!REG_P (x))
8785 return 0;
8786
8787 regno = REGNO (x);
8788
8789 if (strict_p)
8790 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8791
8792 return (regno <= LAST_LO_REGNUM
8793 || regno > LAST_VIRTUAL_REGISTER
8794 || regno == FRAME_POINTER_REGNUM
8795 || (GET_MODE_SIZE (mode) >= 4
8796 && (regno == STACK_POINTER_REGNUM
8797 || regno >= FIRST_PSEUDO_REGISTER
8798 || x == hard_frame_pointer_rtx
8799 || x == arg_pointer_rtx)));
8800 }
8801
8802 /* Return nonzero if x is a legitimate index register. This is the case
8803 for any base register that can access a QImode object. */
8804 inline static int
8805 thumb1_index_register_rtx_p (rtx x, int strict_p)
8806 {
8807 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8808 }
8809
8810 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8811
8812 The AP may be eliminated to either the SP or the FP, so we use the
8813 least common denominator, e.g. SImode, and offsets from 0 to 64.
8814
8815 ??? Verify whether the above is the right approach.
8816
8817 ??? Also, the FP may be eliminated to the SP, so perhaps that
8818 needs special handling also.
8819
8820 ??? Look at how the mips16 port solves this problem. It probably uses
8821 better ways to solve some of these problems.
8822
8823 Although it is not incorrect, we don't accept QImode and HImode
8824 addresses based on the frame pointer or arg pointer until the
8825 reload pass starts. This is so that eliminating such addresses
8826 into stack based ones won't produce impossible code. */
8827 int
8828 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8829 {
8830 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8831 return 0;
8832
8833 /* ??? Not clear if this is right. Experiment. */
8834 if (GET_MODE_SIZE (mode) < 4
8835 && !(reload_in_progress || reload_completed)
8836 && (reg_mentioned_p (frame_pointer_rtx, x)
8837 || reg_mentioned_p (arg_pointer_rtx, x)
8838 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8839 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8840 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8841 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8842 return 0;
8843
8844 /* Accept any base register. SP only in SImode or larger. */
8845 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8846 return 1;
8847
8848 /* This is PC relative data before arm_reorg runs. */
8849 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8850 && GET_CODE (x) == SYMBOL_REF
8851 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
8852 && !arm_disable_literal_pool)
8853 return 1;
8854
8855 /* This is PC relative data after arm_reorg runs. */
8856 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8857 && reload_completed
8858 && (GET_CODE (x) == LABEL_REF
8859 || (GET_CODE (x) == CONST
8860 && GET_CODE (XEXP (x, 0)) == PLUS
8861 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8862 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8863 return 1;
8864
8865 /* Post-inc indexing only supported for SImode and larger. */
8866 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8867 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8868 return 1;
8869
8870 else if (GET_CODE (x) == PLUS)
8871 {
8872 /* REG+REG address can be any two index registers. */
8873 /* We disallow FRAME+REG addressing since we know that FRAME
8874 will be replaced with STACK, and SP relative addressing only
8875 permits SP+OFFSET. */
8876 if (GET_MODE_SIZE (mode) <= 4
8877 && XEXP (x, 0) != frame_pointer_rtx
8878 && XEXP (x, 1) != frame_pointer_rtx
8879 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8880 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8881 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8882 return 1;
8883
8884 /* REG+const has 5-7 bit offset for non-SP registers. */
8885 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8886 || XEXP (x, 0) == arg_pointer_rtx)
8887 && CONST_INT_P (XEXP (x, 1))
8888 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8889 return 1;
8890
8891 /* REG+const has 10-bit offset for SP, but only SImode and
8892 larger is supported. */
8893 /* ??? Should probably check for DI/DFmode overflow here
8894 just like GO_IF_LEGITIMATE_OFFSET does. */
8895 else if (REG_P (XEXP (x, 0))
8896 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8897 && GET_MODE_SIZE (mode) >= 4
8898 && CONST_INT_P (XEXP (x, 1))
8899 && INTVAL (XEXP (x, 1)) >= 0
8900 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8901 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8902 return 1;
8903
8904 else if (REG_P (XEXP (x, 0))
8905 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8906 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8907 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8908 && REGNO (XEXP (x, 0))
8909 <= LAST_VIRTUAL_POINTER_REGISTER))
8910 && GET_MODE_SIZE (mode) >= 4
8911 && CONST_INT_P (XEXP (x, 1))
8912 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8913 return 1;
8914 }
8915
8916 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8917 && GET_MODE_SIZE (mode) == 4
8918 && GET_CODE (x) == SYMBOL_REF
8919 && CONSTANT_POOL_ADDRESS_P (x)
8920 && !arm_disable_literal_pool
8921 && ! (flag_pic
8922 && symbol_mentioned_p (get_pool_constant (x))
8923 && ! pcrel_constant_p (get_pool_constant (x))))
8924 return 1;
8925
8926 return 0;
8927 }
8928
8929 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8930 instruction of mode MODE. */
8931 int
8932 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8933 {
8934 switch (GET_MODE_SIZE (mode))
8935 {
8936 case 1:
8937 return val >= 0 && val < 32;
8938
8939 case 2:
8940 return val >= 0 && val < 64 && (val & 1) == 0;
8941
8942 default:
8943 return (val >= 0
8944 && (val + GET_MODE_SIZE (mode)) <= 128
8945 && (val & 3) == 0);
8946 }
8947 }
8948
8949 bool
8950 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8951 {
8952 if (TARGET_ARM)
8953 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8954 else if (TARGET_THUMB2)
8955 return thumb2_legitimate_address_p (mode, x, strict_p);
8956 else /* if (TARGET_THUMB1) */
8957 return thumb1_legitimate_address_p (mode, x, strict_p);
8958 }
8959
8960 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8961
8962 Given an rtx X being reloaded into a reg required to be
8963 in class CLASS, return the class of reg to actually use.
8964 In general this is just CLASS, but for the Thumb core registers and
8965 immediate constants we prefer a LO_REGS class or a subset. */
8966
8967 static reg_class_t
8968 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8969 {
8970 if (TARGET_32BIT)
8971 return rclass;
8972 else
8973 {
8974 if (rclass == GENERAL_REGS)
8975 return LO_REGS;
8976 else
8977 return rclass;
8978 }
8979 }
8980
8981 /* Build the SYMBOL_REF for __tls_get_addr. */
8982
8983 static GTY(()) rtx tls_get_addr_libfunc;
8984
8985 static rtx
8986 get_tls_get_addr (void)
8987 {
8988 if (!tls_get_addr_libfunc)
8989 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8990 return tls_get_addr_libfunc;
8991 }
8992
8993 rtx
8994 arm_load_tp (rtx target)
8995 {
8996 if (!target)
8997 target = gen_reg_rtx (SImode);
8998
8999 if (TARGET_HARD_TP)
9000 {
9001 /* Can return in any reg. */
9002 emit_insn (gen_load_tp_hard (target));
9003 }
9004 else
9005 {
9006 /* Always returned in r0. Immediately copy the result into a pseudo,
9007 otherwise other uses of r0 (e.g. setting up function arguments) may
9008 clobber the value. */
9009
9010 rtx tmp;
9011
9012 if (TARGET_FDPIC)
9013 {
9014 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9015 rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9016
9017 emit_insn (gen_load_tp_soft_fdpic ());
9018
9019 /* Restore r9. */
9020 emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9021 }
9022 else
9023 emit_insn (gen_load_tp_soft ());
9024
9025 tmp = gen_rtx_REG (SImode, R0_REGNUM);
9026 emit_move_insn (target, tmp);
9027 }
9028 return target;
9029 }
9030
9031 static rtx
9032 load_tls_operand (rtx x, rtx reg)
9033 {
9034 rtx tmp;
9035
9036 if (reg == NULL_RTX)
9037 reg = gen_reg_rtx (SImode);
9038
9039 tmp = gen_rtx_CONST (SImode, x);
9040
9041 emit_move_insn (reg, tmp);
9042
9043 return reg;
9044 }
9045
9046 static rtx_insn *
9047 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9048 {
9049 rtx label, labelno = NULL_RTX, sum;
9050
9051 gcc_assert (reloc != TLS_DESCSEQ);
9052 start_sequence ();
9053
9054 if (TARGET_FDPIC)
9055 {
9056 sum = gen_rtx_UNSPEC (Pmode,
9057 gen_rtvec (2, x, GEN_INT (reloc)),
9058 UNSPEC_TLS);
9059 }
9060 else
9061 {
9062 labelno = GEN_INT (pic_labelno++);
9063 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9064 label = gen_rtx_CONST (VOIDmode, label);
9065
9066 sum = gen_rtx_UNSPEC (Pmode,
9067 gen_rtvec (4, x, GEN_INT (reloc), label,
9068 GEN_INT (TARGET_ARM ? 8 : 4)),
9069 UNSPEC_TLS);
9070 }
9071 reg = load_tls_operand (sum, reg);
9072
9073 if (TARGET_FDPIC)
9074 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9075 else if (TARGET_ARM)
9076 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9077 else
9078 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9079
9080 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9081 LCT_PURE, /* LCT_CONST? */
9082 Pmode, reg, Pmode);
9083
9084 rtx_insn *insns = get_insns ();
9085 end_sequence ();
9086
9087 return insns;
9088 }
9089
9090 static rtx
9091 arm_tls_descseq_addr (rtx x, rtx reg)
9092 {
9093 rtx labelno = GEN_INT (pic_labelno++);
9094 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9095 rtx sum = gen_rtx_UNSPEC (Pmode,
9096 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9097 gen_rtx_CONST (VOIDmode, label),
9098 GEN_INT (!TARGET_ARM)),
9099 UNSPEC_TLS);
9100 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9101
9102 emit_insn (gen_tlscall (x, labelno));
9103 if (!reg)
9104 reg = gen_reg_rtx (SImode);
9105 else
9106 gcc_assert (REGNO (reg) != R0_REGNUM);
9107
9108 emit_move_insn (reg, reg0);
9109
9110 return reg;
9111 }
9112
9113
9114 rtx
9115 legitimize_tls_address (rtx x, rtx reg)
9116 {
9117 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9118 rtx_insn *insns;
9119 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9120
9121 switch (model)
9122 {
9123 case TLS_MODEL_GLOBAL_DYNAMIC:
9124 if (TARGET_GNU2_TLS)
9125 {
9126 gcc_assert (!TARGET_FDPIC);
9127
9128 reg = arm_tls_descseq_addr (x, reg);
9129
9130 tp = arm_load_tp (NULL_RTX);
9131
9132 dest = gen_rtx_PLUS (Pmode, tp, reg);
9133 }
9134 else
9135 {
9136 /* Original scheme */
9137 if (TARGET_FDPIC)
9138 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9139 else
9140 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9141 dest = gen_reg_rtx (Pmode);
9142 emit_libcall_block (insns, dest, ret, x);
9143 }
9144 return dest;
9145
9146 case TLS_MODEL_LOCAL_DYNAMIC:
9147 if (TARGET_GNU2_TLS)
9148 {
9149 gcc_assert (!TARGET_FDPIC);
9150
9151 reg = arm_tls_descseq_addr (x, reg);
9152
9153 tp = arm_load_tp (NULL_RTX);
9154
9155 dest = gen_rtx_PLUS (Pmode, tp, reg);
9156 }
9157 else
9158 {
9159 if (TARGET_FDPIC)
9160 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9161 else
9162 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9163
9164 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9165 share the LDM result with other LD model accesses. */
9166 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9167 UNSPEC_TLS);
9168 dest = gen_reg_rtx (Pmode);
9169 emit_libcall_block (insns, dest, ret, eqv);
9170
9171 /* Load the addend. */
9172 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9173 GEN_INT (TLS_LDO32)),
9174 UNSPEC_TLS);
9175 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9176 dest = gen_rtx_PLUS (Pmode, dest, addend);
9177 }
9178 return dest;
9179
9180 case TLS_MODEL_INITIAL_EXEC:
9181 if (TARGET_FDPIC)
9182 {
9183 sum = gen_rtx_UNSPEC (Pmode,
9184 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9185 UNSPEC_TLS);
9186 reg = load_tls_operand (sum, reg);
9187 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9188 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9189 }
9190 else
9191 {
9192 labelno = GEN_INT (pic_labelno++);
9193 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9194 label = gen_rtx_CONST (VOIDmode, label);
9195 sum = gen_rtx_UNSPEC (Pmode,
9196 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9197 GEN_INT (TARGET_ARM ? 8 : 4)),
9198 UNSPEC_TLS);
9199 reg = load_tls_operand (sum, reg);
9200
9201 if (TARGET_ARM)
9202 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9203 else if (TARGET_THUMB2)
9204 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9205 else
9206 {
9207 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9208 emit_move_insn (reg, gen_const_mem (SImode, reg));
9209 }
9210 }
9211
9212 tp = arm_load_tp (NULL_RTX);
9213
9214 return gen_rtx_PLUS (Pmode, tp, reg);
9215
9216 case TLS_MODEL_LOCAL_EXEC:
9217 tp = arm_load_tp (NULL_RTX);
9218
9219 reg = gen_rtx_UNSPEC (Pmode,
9220 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9221 UNSPEC_TLS);
9222 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9223
9224 return gen_rtx_PLUS (Pmode, tp, reg);
9225
9226 default:
9227 abort ();
9228 }
9229 }
9230
9231 /* Try machine-dependent ways of modifying an illegitimate address
9232 to be legitimate. If we find one, return the new, valid address. */
9233 rtx
9234 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9235 {
9236 if (arm_tls_referenced_p (x))
9237 {
9238 rtx addend = NULL;
9239
9240 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9241 {
9242 addend = XEXP (XEXP (x, 0), 1);
9243 x = XEXP (XEXP (x, 0), 0);
9244 }
9245
9246 if (GET_CODE (x) != SYMBOL_REF)
9247 return x;
9248
9249 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9250
9251 x = legitimize_tls_address (x, NULL_RTX);
9252
9253 if (addend)
9254 {
9255 x = gen_rtx_PLUS (SImode, x, addend);
9256 orig_x = x;
9257 }
9258 else
9259 return x;
9260 }
9261
9262 if (TARGET_THUMB1)
9263 return thumb_legitimize_address (x, orig_x, mode);
9264
9265 if (GET_CODE (x) == PLUS)
9266 {
9267 rtx xop0 = XEXP (x, 0);
9268 rtx xop1 = XEXP (x, 1);
9269
9270 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9271 xop0 = force_reg (SImode, xop0);
9272
9273 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9274 && !symbol_mentioned_p (xop1))
9275 xop1 = force_reg (SImode, xop1);
9276
9277 if (ARM_BASE_REGISTER_RTX_P (xop0)
9278 && CONST_INT_P (xop1))
9279 {
9280 HOST_WIDE_INT n, low_n;
9281 rtx base_reg, val;
9282 n = INTVAL (xop1);
9283
9284 /* VFP addressing modes actually allow greater offsets, but for
9285 now we just stick with the lowest common denominator. */
9286 if (mode == DImode || mode == DFmode)
9287 {
9288 low_n = n & 0x0f;
9289 n &= ~0x0f;
9290 if (low_n > 4)
9291 {
9292 n += 16;
9293 low_n -= 16;
9294 }
9295 }
9296 else
9297 {
9298 low_n = ((mode) == TImode ? 0
9299 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9300 n -= low_n;
9301 }
9302
9303 base_reg = gen_reg_rtx (SImode);
9304 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9305 emit_move_insn (base_reg, val);
9306 x = plus_constant (Pmode, base_reg, low_n);
9307 }
9308 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9309 x = gen_rtx_PLUS (SImode, xop0, xop1);
9310 }
9311
9312 /* XXX We don't allow MINUS any more -- see comment in
9313 arm_legitimate_address_outer_p (). */
9314 else if (GET_CODE (x) == MINUS)
9315 {
9316 rtx xop0 = XEXP (x, 0);
9317 rtx xop1 = XEXP (x, 1);
9318
9319 if (CONSTANT_P (xop0))
9320 xop0 = force_reg (SImode, xop0);
9321
9322 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9323 xop1 = force_reg (SImode, xop1);
9324
9325 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9326 x = gen_rtx_MINUS (SImode, xop0, xop1);
9327 }
9328
9329 /* Make sure to take full advantage of the pre-indexed addressing mode
9330 with absolute addresses which often allows for the base register to
9331 be factorized for multiple adjacent memory references, and it might
9332 even allows for the mini pool to be avoided entirely. */
9333 else if (CONST_INT_P (x) && optimize > 0)
9334 {
9335 unsigned int bits;
9336 HOST_WIDE_INT mask, base, index;
9337 rtx base_reg;
9338
9339 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9340 only use a 8-bit index. So let's use a 12-bit index for
9341 SImode only and hope that arm_gen_constant will enable LDRB
9342 to use more bits. */
9343 bits = (mode == SImode) ? 12 : 8;
9344 mask = (1 << bits) - 1;
9345 base = INTVAL (x) & ~mask;
9346 index = INTVAL (x) & mask;
9347 if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9348 {
9349 /* It'll most probably be more efficient to generate the
9350 base with more bits set and use a negative index instead.
9351 Don't do this for Thumb as negative offsets are much more
9352 limited. */
9353 base |= mask;
9354 index -= mask;
9355 }
9356 base_reg = force_reg (SImode, GEN_INT (base));
9357 x = plus_constant (Pmode, base_reg, index);
9358 }
9359
9360 if (flag_pic)
9361 {
9362 /* We need to find and carefully transform any SYMBOL and LABEL
9363 references; so go back to the original address expression. */
9364 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9365 false /*compute_now*/);
9366
9367 if (new_x != orig_x)
9368 x = new_x;
9369 }
9370
9371 return x;
9372 }
9373
9374
9375 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9376 to be legitimate. If we find one, return the new, valid address. */
9377 rtx
9378 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9379 {
9380 if (GET_CODE (x) == PLUS
9381 && CONST_INT_P (XEXP (x, 1))
9382 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9383 || INTVAL (XEXP (x, 1)) < 0))
9384 {
9385 rtx xop0 = XEXP (x, 0);
9386 rtx xop1 = XEXP (x, 1);
9387 HOST_WIDE_INT offset = INTVAL (xop1);
9388
9389 /* Try and fold the offset into a biasing of the base register and
9390 then offsetting that. Don't do this when optimizing for space
9391 since it can cause too many CSEs. */
9392 if (optimize_size && offset >= 0
9393 && offset < 256 + 31 * GET_MODE_SIZE (mode))
9394 {
9395 HOST_WIDE_INT delta;
9396
9397 if (offset >= 256)
9398 delta = offset - (256 - GET_MODE_SIZE (mode));
9399 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9400 delta = 31 * GET_MODE_SIZE (mode);
9401 else
9402 delta = offset & (~31 * GET_MODE_SIZE (mode));
9403
9404 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9405 NULL_RTX);
9406 x = plus_constant (Pmode, xop0, delta);
9407 }
9408 else if (offset < 0 && offset > -256)
9409 /* Small negative offsets are best done with a subtract before the
9410 dereference, forcing these into a register normally takes two
9411 instructions. */
9412 x = force_operand (x, NULL_RTX);
9413 else
9414 {
9415 /* For the remaining cases, force the constant into a register. */
9416 xop1 = force_reg (SImode, xop1);
9417 x = gen_rtx_PLUS (SImode, xop0, xop1);
9418 }
9419 }
9420 else if (GET_CODE (x) == PLUS
9421 && s_register_operand (XEXP (x, 1), SImode)
9422 && !s_register_operand (XEXP (x, 0), SImode))
9423 {
9424 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9425
9426 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9427 }
9428
9429 if (flag_pic)
9430 {
9431 /* We need to find and carefully transform any SYMBOL and LABEL
9432 references; so go back to the original address expression. */
9433 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9434 false /*compute_now*/);
9435
9436 if (new_x != orig_x)
9437 x = new_x;
9438 }
9439
9440 return x;
9441 }
9442
9443 /* Return TRUE if X contains any TLS symbol references. */
9444
9445 bool
9446 arm_tls_referenced_p (rtx x)
9447 {
9448 if (! TARGET_HAVE_TLS)
9449 return false;
9450
9451 subrtx_iterator::array_type array;
9452 FOR_EACH_SUBRTX (iter, array, x, ALL)
9453 {
9454 const_rtx x = *iter;
9455 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
9456 {
9457 /* ARM currently does not provide relocations to encode TLS variables
9458 into AArch32 instructions, only data, so there is no way to
9459 currently implement these if a literal pool is disabled. */
9460 if (arm_disable_literal_pool)
9461 sorry ("accessing thread-local storage is not currently supported "
9462 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9463
9464 return true;
9465 }
9466
9467 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9468 TLS offsets, not real symbol references. */
9469 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9470 iter.skip_subrtxes ();
9471 }
9472 return false;
9473 }
9474
9475 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9476
9477 On the ARM, allow any integer (invalid ones are removed later by insn
9478 patterns), nice doubles and symbol_refs which refer to the function's
9479 constant pool XXX.
9480
9481 When generating pic allow anything. */
9482
9483 static bool
9484 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9485 {
9486 return flag_pic || !label_mentioned_p (x);
9487 }
9488
9489 static bool
9490 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9491 {
9492 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9493 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9494 for ARMv8-M Baseline or later the result is valid. */
9495 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9496 x = XEXP (x, 0);
9497
9498 return (CONST_INT_P (x)
9499 || CONST_DOUBLE_P (x)
9500 || CONSTANT_ADDRESS_P (x)
9501 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
9502 || flag_pic);
9503 }
9504
9505 static bool
9506 arm_legitimate_constant_p (machine_mode mode, rtx x)
9507 {
9508 return (!arm_cannot_force_const_mem (mode, x)
9509 && (TARGET_32BIT
9510 ? arm_legitimate_constant_p_1 (mode, x)
9511 : thumb_legitimate_constant_p (mode, x)));
9512 }
9513
9514 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9515
9516 static bool
9517 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9518 {
9519 rtx base, offset;
9520 split_const (x, &base, &offset);
9521
9522 if (SYMBOL_REF_P (base))
9523 {
9524 /* Function symbols cannot have an offset due to the Thumb bit. */
9525 if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9526 && INTVAL (offset) != 0)
9527 return true;
9528
9529 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9530 && !offset_within_block_p (base, INTVAL (offset)))
9531 return true;
9532 }
9533 return arm_tls_referenced_p (x);
9534 }
9535 \f
9536 #define REG_OR_SUBREG_REG(X) \
9537 (REG_P (X) \
9538 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
9539
9540 #define REG_OR_SUBREG_RTX(X) \
9541 (REG_P (X) ? (X) : SUBREG_REG (X))
9542
9543 static inline int
9544 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9545 {
9546 machine_mode mode = GET_MODE (x);
9547 int total, words;
9548
9549 switch (code)
9550 {
9551 case ASHIFT:
9552 case ASHIFTRT:
9553 case LSHIFTRT:
9554 case ROTATERT:
9555 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9556
9557 case PLUS:
9558 case MINUS:
9559 case COMPARE:
9560 case NEG:
9561 case NOT:
9562 return COSTS_N_INSNS (1);
9563
9564 case MULT:
9565 if (arm_arch6m && arm_m_profile_small_mul)
9566 return COSTS_N_INSNS (32);
9567
9568 if (CONST_INT_P (XEXP (x, 1)))
9569 {
9570 int cycles = 0;
9571 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9572
9573 while (i)
9574 {
9575 i >>= 2;
9576 cycles++;
9577 }
9578 return COSTS_N_INSNS (2) + cycles;
9579 }
9580 return COSTS_N_INSNS (1) + 16;
9581
9582 case SET:
9583 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9584 the mode. */
9585 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9586 return (COSTS_N_INSNS (words)
9587 + 4 * ((MEM_P (SET_SRC (x)))
9588 + MEM_P (SET_DEST (x))));
9589
9590 case CONST_INT:
9591 if (outer == SET)
9592 {
9593 if (UINTVAL (x) < 256
9594 /* 16-bit constant. */
9595 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9596 return 0;
9597 if (thumb_shiftable_const (INTVAL (x)))
9598 return COSTS_N_INSNS (2);
9599 return arm_disable_literal_pool
9600 ? COSTS_N_INSNS (8)
9601 : COSTS_N_INSNS (3);
9602 }
9603 else if ((outer == PLUS || outer == COMPARE)
9604 && INTVAL (x) < 256 && INTVAL (x) > -256)
9605 return 0;
9606 else if ((outer == IOR || outer == XOR || outer == AND)
9607 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9608 return COSTS_N_INSNS (1);
9609 else if (outer == AND)
9610 {
9611 int i;
9612 /* This duplicates the tests in the andsi3 expander. */
9613 for (i = 9; i <= 31; i++)
9614 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9615 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9616 return COSTS_N_INSNS (2);
9617 }
9618 else if (outer == ASHIFT || outer == ASHIFTRT
9619 || outer == LSHIFTRT)
9620 return 0;
9621 return COSTS_N_INSNS (2);
9622
9623 case CONST:
9624 case CONST_DOUBLE:
9625 case LABEL_REF:
9626 case SYMBOL_REF:
9627 return COSTS_N_INSNS (3);
9628
9629 case UDIV:
9630 case UMOD:
9631 case DIV:
9632 case MOD:
9633 return 100;
9634
9635 case TRUNCATE:
9636 return 99;
9637
9638 case AND:
9639 case XOR:
9640 case IOR:
9641 /* XXX guess. */
9642 return 8;
9643
9644 case MEM:
9645 /* XXX another guess. */
9646 /* Memory costs quite a lot for the first word, but subsequent words
9647 load at the equivalent of a single insn each. */
9648 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9649 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9650 ? 4 : 0));
9651
9652 case IF_THEN_ELSE:
9653 /* XXX a guess. */
9654 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9655 return 14;
9656 return 2;
9657
9658 case SIGN_EXTEND:
9659 case ZERO_EXTEND:
9660 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9661 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9662
9663 if (mode == SImode)
9664 return total;
9665
9666 if (arm_arch6)
9667 return total + COSTS_N_INSNS (1);
9668
9669 /* Assume a two-shift sequence. Increase the cost slightly so
9670 we prefer actual shifts over an extend operation. */
9671 return total + 1 + COSTS_N_INSNS (2);
9672
9673 default:
9674 return 99;
9675 }
9676 }
9677
9678 /* Estimates the size cost of thumb1 instructions.
9679 For now most of the code is copied from thumb1_rtx_costs. We need more
9680 fine grain tuning when we have more related test cases. */
9681 static inline int
9682 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9683 {
9684 machine_mode mode = GET_MODE (x);
9685 int words, cost;
9686
9687 switch (code)
9688 {
9689 case ASHIFT:
9690 case ASHIFTRT:
9691 case LSHIFTRT:
9692 case ROTATERT:
9693 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9694
9695 case PLUS:
9696 case MINUS:
9697 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9698 defined by RTL expansion, especially for the expansion of
9699 multiplication. */
9700 if ((GET_CODE (XEXP (x, 0)) == MULT
9701 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9702 || (GET_CODE (XEXP (x, 1)) == MULT
9703 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9704 return COSTS_N_INSNS (2);
9705 /* Fall through. */
9706 case COMPARE:
9707 case NEG:
9708 case NOT:
9709 return COSTS_N_INSNS (1);
9710
9711 case MULT:
9712 if (CONST_INT_P (XEXP (x, 1)))
9713 {
9714 /* Thumb1 mul instruction can't operate on const. We must Load it
9715 into a register first. */
9716 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9717 /* For the targets which have a very small and high-latency multiply
9718 unit, we prefer to synthesize the mult with up to 5 instructions,
9719 giving a good balance between size and performance. */
9720 if (arm_arch6m && arm_m_profile_small_mul)
9721 return COSTS_N_INSNS (5);
9722 else
9723 return COSTS_N_INSNS (1) + const_size;
9724 }
9725 return COSTS_N_INSNS (1);
9726
9727 case SET:
9728 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9729 the mode. */
9730 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9731 cost = COSTS_N_INSNS (words);
9732 if (satisfies_constraint_J (SET_SRC (x))
9733 || satisfies_constraint_K (SET_SRC (x))
9734 /* Too big an immediate for a 2-byte mov, using MOVT. */
9735 || (CONST_INT_P (SET_SRC (x))
9736 && UINTVAL (SET_SRC (x)) >= 256
9737 && TARGET_HAVE_MOVT
9738 && satisfies_constraint_j (SET_SRC (x)))
9739 /* thumb1_movdi_insn. */
9740 || ((words > 1) && MEM_P (SET_SRC (x))))
9741 cost += COSTS_N_INSNS (1);
9742 return cost;
9743
9744 case CONST_INT:
9745 if (outer == SET)
9746 {
9747 if (UINTVAL (x) < 256)
9748 return COSTS_N_INSNS (1);
9749 /* movw is 4byte long. */
9750 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9751 return COSTS_N_INSNS (2);
9752 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9753 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9754 return COSTS_N_INSNS (2);
9755 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9756 if (thumb_shiftable_const (INTVAL (x)))
9757 return COSTS_N_INSNS (2);
9758 return arm_disable_literal_pool
9759 ? COSTS_N_INSNS (8)
9760 : COSTS_N_INSNS (3);
9761 }
9762 else if ((outer == PLUS || outer == COMPARE)
9763 && INTVAL (x) < 256 && INTVAL (x) > -256)
9764 return 0;
9765 else if ((outer == IOR || outer == XOR || outer == AND)
9766 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9767 return COSTS_N_INSNS (1);
9768 else if (outer == AND)
9769 {
9770 int i;
9771 /* This duplicates the tests in the andsi3 expander. */
9772 for (i = 9; i <= 31; i++)
9773 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9774 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9775 return COSTS_N_INSNS (2);
9776 }
9777 else if (outer == ASHIFT || outer == ASHIFTRT
9778 || outer == LSHIFTRT)
9779 return 0;
9780 return COSTS_N_INSNS (2);
9781
9782 case CONST:
9783 case CONST_DOUBLE:
9784 case LABEL_REF:
9785 case SYMBOL_REF:
9786 return COSTS_N_INSNS (3);
9787
9788 case UDIV:
9789 case UMOD:
9790 case DIV:
9791 case MOD:
9792 return 100;
9793
9794 case TRUNCATE:
9795 return 99;
9796
9797 case AND:
9798 case XOR:
9799 case IOR:
9800 return COSTS_N_INSNS (1);
9801
9802 case MEM:
9803 return (COSTS_N_INSNS (1)
9804 + COSTS_N_INSNS (1)
9805 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9806 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9807 ? COSTS_N_INSNS (1) : 0));
9808
9809 case IF_THEN_ELSE:
9810 /* XXX a guess. */
9811 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9812 return 14;
9813 return 2;
9814
9815 case ZERO_EXTEND:
9816 /* XXX still guessing. */
9817 switch (GET_MODE (XEXP (x, 0)))
9818 {
9819 case E_QImode:
9820 return (1 + (mode == DImode ? 4 : 0)
9821 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9822
9823 case E_HImode:
9824 return (4 + (mode == DImode ? 4 : 0)
9825 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9826
9827 case E_SImode:
9828 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9829
9830 default:
9831 return 99;
9832 }
9833
9834 default:
9835 return 99;
9836 }
9837 }
9838
9839 /* Helper function for arm_rtx_costs. If one operand of the OP, a
9840 PLUS, adds the carry flag, then return the other operand. If
9841 neither is a carry, return OP unchanged. */
9842 static rtx
9843 strip_carry_operation (rtx op)
9844 {
9845 gcc_assert (GET_CODE (op) == PLUS);
9846 if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
9847 return XEXP (op, 1);
9848 else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
9849 return XEXP (op, 0);
9850 return op;
9851 }
9852
9853 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9854 operand, then return the operand that is being shifted. If the shift
9855 is not by a constant, then set SHIFT_REG to point to the operand.
9856 Return NULL if OP is not a shifter operand. */
9857 static rtx
9858 shifter_op_p (rtx op, rtx *shift_reg)
9859 {
9860 enum rtx_code code = GET_CODE (op);
9861
9862 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9863 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9864 return XEXP (op, 0);
9865 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9866 return XEXP (op, 0);
9867 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9868 || code == ASHIFTRT)
9869 {
9870 if (!CONST_INT_P (XEXP (op, 1)))
9871 *shift_reg = XEXP (op, 1);
9872 return XEXP (op, 0);
9873 }
9874
9875 return NULL;
9876 }
9877
9878 static bool
9879 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9880 {
9881 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9882 rtx_code code = GET_CODE (x);
9883 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9884
9885 switch (XINT (x, 1))
9886 {
9887 case UNSPEC_UNALIGNED_LOAD:
9888 /* We can only do unaligned loads into the integer unit, and we can't
9889 use LDM or LDRD. */
9890 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9891 if (speed_p)
9892 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9893 + extra_cost->ldst.load_unaligned);
9894
9895 #ifdef NOT_YET
9896 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9897 ADDR_SPACE_GENERIC, speed_p);
9898 #endif
9899 return true;
9900
9901 case UNSPEC_UNALIGNED_STORE:
9902 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9903 if (speed_p)
9904 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9905 + extra_cost->ldst.store_unaligned);
9906
9907 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9908 #ifdef NOT_YET
9909 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9910 ADDR_SPACE_GENERIC, speed_p);
9911 #endif
9912 return true;
9913
9914 case UNSPEC_VRINTZ:
9915 case UNSPEC_VRINTP:
9916 case UNSPEC_VRINTM:
9917 case UNSPEC_VRINTR:
9918 case UNSPEC_VRINTX:
9919 case UNSPEC_VRINTA:
9920 if (speed_p)
9921 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9922
9923 return true;
9924 default:
9925 *cost = COSTS_N_INSNS (2);
9926 break;
9927 }
9928 return true;
9929 }
9930
9931 /* Cost of a libcall. We assume one insn per argument, an amount for the
9932 call (one insn for -Os) and then one for processing the result. */
9933 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9934
9935 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9936 do \
9937 { \
9938 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9939 if (shift_op != NULL \
9940 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9941 { \
9942 if (shift_reg) \
9943 { \
9944 if (speed_p) \
9945 *cost += extra_cost->alu.arith_shift_reg; \
9946 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9947 ASHIFT, 1, speed_p); \
9948 } \
9949 else if (speed_p) \
9950 *cost += extra_cost->alu.arith_shift; \
9951 \
9952 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9953 ASHIFT, 0, speed_p) \
9954 + rtx_cost (XEXP (x, 1 - IDX), \
9955 GET_MODE (shift_op), \
9956 OP, 1, speed_p)); \
9957 return true; \
9958 } \
9959 } \
9960 while (0)
9961
9962 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9963 considering the costs of the addressing mode and memory access
9964 separately. */
9965 static bool
9966 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9967 int *cost, bool speed_p)
9968 {
9969 machine_mode mode = GET_MODE (x);
9970
9971 *cost = COSTS_N_INSNS (1);
9972
9973 if (flag_pic
9974 && GET_CODE (XEXP (x, 0)) == PLUS
9975 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9976 /* This will be split into two instructions. Add the cost of the
9977 additional instruction here. The cost of the memory access is computed
9978 below. See arm.md:calculate_pic_address. */
9979 *cost += COSTS_N_INSNS (1);
9980
9981 /* Calculate cost of the addressing mode. */
9982 if (speed_p)
9983 {
9984 arm_addr_mode_op op_type;
9985 switch (GET_CODE (XEXP (x, 0)))
9986 {
9987 default:
9988 case REG:
9989 op_type = AMO_DEFAULT;
9990 break;
9991 case MINUS:
9992 /* MINUS does not appear in RTL, but the architecture supports it,
9993 so handle this case defensively. */
9994 /* fall through */
9995 case PLUS:
9996 op_type = AMO_NO_WB;
9997 break;
9998 case PRE_INC:
9999 case PRE_DEC:
10000 case POST_INC:
10001 case POST_DEC:
10002 case PRE_MODIFY:
10003 case POST_MODIFY:
10004 op_type = AMO_WB;
10005 break;
10006 }
10007
10008 if (VECTOR_MODE_P (mode))
10009 *cost += current_tune->addr_mode_costs->vector[op_type];
10010 else if (FLOAT_MODE_P (mode))
10011 *cost += current_tune->addr_mode_costs->fp[op_type];
10012 else
10013 *cost += current_tune->addr_mode_costs->integer[op_type];
10014 }
10015
10016 /* Calculate cost of memory access. */
10017 if (speed_p)
10018 {
10019 if (FLOAT_MODE_P (mode))
10020 {
10021 if (GET_MODE_SIZE (mode) == 8)
10022 *cost += extra_cost->ldst.loadd;
10023 else
10024 *cost += extra_cost->ldst.loadf;
10025 }
10026 else if (VECTOR_MODE_P (mode))
10027 *cost += extra_cost->ldst.loadv;
10028 else
10029 {
10030 /* Integer modes */
10031 if (GET_MODE_SIZE (mode) == 8)
10032 *cost += extra_cost->ldst.ldrd;
10033 else
10034 *cost += extra_cost->ldst.load;
10035 }
10036 }
10037
10038 return true;
10039 }
10040
10041 /* RTX costs. Make an estimate of the cost of executing the operation
10042 X, which is contained within an operation with code OUTER_CODE.
10043 SPEED_P indicates whether the cost desired is the performance cost,
10044 or the size cost. The estimate is stored in COST and the return
10045 value is TRUE if the cost calculation is final, or FALSE if the
10046 caller should recurse through the operands of X to add additional
10047 costs.
10048
10049 We currently make no attempt to model the size savings of Thumb-2
10050 16-bit instructions. At the normal points in compilation where
10051 this code is called we have no measure of whether the condition
10052 flags are live or not, and thus no realistic way to determine what
10053 the size will eventually be. */
10054 static bool
10055 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10056 const struct cpu_cost_table *extra_cost,
10057 int *cost, bool speed_p)
10058 {
10059 machine_mode mode = GET_MODE (x);
10060
10061 *cost = COSTS_N_INSNS (1);
10062
10063 if (TARGET_THUMB1)
10064 {
10065 if (speed_p)
10066 *cost = thumb1_rtx_costs (x, code, outer_code);
10067 else
10068 *cost = thumb1_size_rtx_costs (x, code, outer_code);
10069 return true;
10070 }
10071
10072 switch (code)
10073 {
10074 case SET:
10075 *cost = 0;
10076 /* SET RTXs don't have a mode so we get it from the destination. */
10077 mode = GET_MODE (SET_DEST (x));
10078
10079 if (REG_P (SET_SRC (x))
10080 && REG_P (SET_DEST (x)))
10081 {
10082 /* Assume that most copies can be done with a single insn,
10083 unless we don't have HW FP, in which case everything
10084 larger than word mode will require two insns. */
10085 *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10086 && GET_MODE_SIZE (mode) > 4)
10087 || mode == DImode)
10088 ? 2 : 1);
10089 /* Conditional register moves can be encoded
10090 in 16 bits in Thumb mode. */
10091 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10092 *cost >>= 1;
10093
10094 return true;
10095 }
10096
10097 if (CONST_INT_P (SET_SRC (x)))
10098 {
10099 /* Handle CONST_INT here, since the value doesn't have a mode
10100 and we would otherwise be unable to work out the true cost. */
10101 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10102 0, speed_p);
10103 outer_code = SET;
10104 /* Slightly lower the cost of setting a core reg to a constant.
10105 This helps break up chains and allows for better scheduling. */
10106 if (REG_P (SET_DEST (x))
10107 && REGNO (SET_DEST (x)) <= LR_REGNUM)
10108 *cost -= 1;
10109 x = SET_SRC (x);
10110 /* Immediate moves with an immediate in the range [0, 255] can be
10111 encoded in 16 bits in Thumb mode. */
10112 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10113 && INTVAL (x) >= 0 && INTVAL (x) <=255)
10114 *cost >>= 1;
10115 goto const_int_cost;
10116 }
10117
10118 return false;
10119
10120 case MEM:
10121 return arm_mem_costs (x, extra_cost, cost, speed_p);
10122
10123 case PARALLEL:
10124 {
10125 /* Calculations of LDM costs are complex. We assume an initial cost
10126 (ldm_1st) which will load the number of registers mentioned in
10127 ldm_regs_per_insn_1st registers; then each additional
10128 ldm_regs_per_insn_subsequent registers cost one more insn. The
10129 formula for N regs is thus:
10130
10131 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10132 + ldm_regs_per_insn_subsequent - 1)
10133 / ldm_regs_per_insn_subsequent).
10134
10135 Additional costs may also be added for addressing. A similar
10136 formula is used for STM. */
10137
10138 bool is_ldm = load_multiple_operation (x, SImode);
10139 bool is_stm = store_multiple_operation (x, SImode);
10140
10141 if (is_ldm || is_stm)
10142 {
10143 if (speed_p)
10144 {
10145 HOST_WIDE_INT nregs = XVECLEN (x, 0);
10146 HOST_WIDE_INT regs_per_insn_1st = is_ldm
10147 ? extra_cost->ldst.ldm_regs_per_insn_1st
10148 : extra_cost->ldst.stm_regs_per_insn_1st;
10149 HOST_WIDE_INT regs_per_insn_sub = is_ldm
10150 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10151 : extra_cost->ldst.stm_regs_per_insn_subsequent;
10152
10153 *cost += regs_per_insn_1st
10154 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10155 + regs_per_insn_sub - 1)
10156 / regs_per_insn_sub);
10157 return true;
10158 }
10159
10160 }
10161 return false;
10162 }
10163 case DIV:
10164 case UDIV:
10165 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10166 && (mode == SFmode || !TARGET_VFP_SINGLE))
10167 *cost += COSTS_N_INSNS (speed_p
10168 ? extra_cost->fp[mode != SFmode].div : 0);
10169 else if (mode == SImode && TARGET_IDIV)
10170 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10171 else
10172 *cost = LIBCALL_COST (2);
10173
10174 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10175 possible udiv is prefered. */
10176 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10177 return false; /* All arguments must be in registers. */
10178
10179 case MOD:
10180 /* MOD by a power of 2 can be expanded as:
10181 rsbs r1, r0, #0
10182 and r0, r0, #(n - 1)
10183 and r1, r1, #(n - 1)
10184 rsbpl r0, r1, #0. */
10185 if (CONST_INT_P (XEXP (x, 1))
10186 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10187 && mode == SImode)
10188 {
10189 *cost += COSTS_N_INSNS (3);
10190
10191 if (speed_p)
10192 *cost += 2 * extra_cost->alu.logical
10193 + extra_cost->alu.arith;
10194 return true;
10195 }
10196
10197 /* Fall-through. */
10198 case UMOD:
10199 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10200 possible udiv is prefered. */
10201 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10202 return false; /* All arguments must be in registers. */
10203
10204 case ROTATE:
10205 if (mode == SImode && REG_P (XEXP (x, 1)))
10206 {
10207 *cost += (COSTS_N_INSNS (1)
10208 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10209 if (speed_p)
10210 *cost += extra_cost->alu.shift_reg;
10211 return true;
10212 }
10213 /* Fall through */
10214 case ROTATERT:
10215 case ASHIFT:
10216 case LSHIFTRT:
10217 case ASHIFTRT:
10218 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10219 {
10220 *cost += (COSTS_N_INSNS (2)
10221 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10222 if (speed_p)
10223 *cost += 2 * extra_cost->alu.shift;
10224 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10225 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10226 *cost += 1;
10227 return true;
10228 }
10229 else if (mode == SImode)
10230 {
10231 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10232 /* Slightly disparage register shifts at -Os, but not by much. */
10233 if (!CONST_INT_P (XEXP (x, 1)))
10234 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10235 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10236 return true;
10237 }
10238 else if (GET_MODE_CLASS (mode) == MODE_INT
10239 && GET_MODE_SIZE (mode) < 4)
10240 {
10241 if (code == ASHIFT)
10242 {
10243 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10244 /* Slightly disparage register shifts at -Os, but not by
10245 much. */
10246 if (!CONST_INT_P (XEXP (x, 1)))
10247 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10248 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10249 }
10250 else if (code == LSHIFTRT || code == ASHIFTRT)
10251 {
10252 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10253 {
10254 /* Can use SBFX/UBFX. */
10255 if (speed_p)
10256 *cost += extra_cost->alu.bfx;
10257 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10258 }
10259 else
10260 {
10261 *cost += COSTS_N_INSNS (1);
10262 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10263 if (speed_p)
10264 {
10265 if (CONST_INT_P (XEXP (x, 1)))
10266 *cost += 2 * extra_cost->alu.shift;
10267 else
10268 *cost += (extra_cost->alu.shift
10269 + extra_cost->alu.shift_reg);
10270 }
10271 else
10272 /* Slightly disparage register shifts. */
10273 *cost += !CONST_INT_P (XEXP (x, 1));
10274 }
10275 }
10276 else /* Rotates. */
10277 {
10278 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10279 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10280 if (speed_p)
10281 {
10282 if (CONST_INT_P (XEXP (x, 1)))
10283 *cost += (2 * extra_cost->alu.shift
10284 + extra_cost->alu.log_shift);
10285 else
10286 *cost += (extra_cost->alu.shift
10287 + extra_cost->alu.shift_reg
10288 + extra_cost->alu.log_shift_reg);
10289 }
10290 }
10291 return true;
10292 }
10293
10294 *cost = LIBCALL_COST (2);
10295 return false;
10296
10297 case BSWAP:
10298 if (arm_arch6)
10299 {
10300 if (mode == SImode)
10301 {
10302 if (speed_p)
10303 *cost += extra_cost->alu.rev;
10304
10305 return false;
10306 }
10307 }
10308 else
10309 {
10310 /* No rev instruction available. Look at arm_legacy_rev
10311 and thumb_legacy_rev for the form of RTL used then. */
10312 if (TARGET_THUMB)
10313 {
10314 *cost += COSTS_N_INSNS (9);
10315
10316 if (speed_p)
10317 {
10318 *cost += 6 * extra_cost->alu.shift;
10319 *cost += 3 * extra_cost->alu.logical;
10320 }
10321 }
10322 else
10323 {
10324 *cost += COSTS_N_INSNS (4);
10325
10326 if (speed_p)
10327 {
10328 *cost += 2 * extra_cost->alu.shift;
10329 *cost += extra_cost->alu.arith_shift;
10330 *cost += 2 * extra_cost->alu.logical;
10331 }
10332 }
10333 return true;
10334 }
10335 return false;
10336
10337 case MINUS:
10338 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10339 && (mode == SFmode || !TARGET_VFP_SINGLE))
10340 {
10341 if (GET_CODE (XEXP (x, 0)) == MULT
10342 || GET_CODE (XEXP (x, 1)) == MULT)
10343 {
10344 rtx mul_op0, mul_op1, sub_op;
10345
10346 if (speed_p)
10347 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10348
10349 if (GET_CODE (XEXP (x, 0)) == MULT)
10350 {
10351 mul_op0 = XEXP (XEXP (x, 0), 0);
10352 mul_op1 = XEXP (XEXP (x, 0), 1);
10353 sub_op = XEXP (x, 1);
10354 }
10355 else
10356 {
10357 mul_op0 = XEXP (XEXP (x, 1), 0);
10358 mul_op1 = XEXP (XEXP (x, 1), 1);
10359 sub_op = XEXP (x, 0);
10360 }
10361
10362 /* The first operand of the multiply may be optionally
10363 negated. */
10364 if (GET_CODE (mul_op0) == NEG)
10365 mul_op0 = XEXP (mul_op0, 0);
10366
10367 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10368 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10369 + rtx_cost (sub_op, mode, code, 0, speed_p));
10370
10371 return true;
10372 }
10373
10374 if (speed_p)
10375 *cost += extra_cost->fp[mode != SFmode].addsub;
10376 return false;
10377 }
10378
10379 if (mode == SImode)
10380 {
10381 rtx shift_by_reg = NULL;
10382 rtx shift_op;
10383 rtx non_shift_op;
10384 rtx op0 = XEXP (x, 0);
10385 rtx op1 = XEXP (x, 1);
10386
10387 /* Factor out any borrow operation. There's more than one way
10388 of expressing this; try to recognize them all. */
10389 if (GET_CODE (op0) == MINUS)
10390 {
10391 if (arm_borrow_operation (op1, SImode))
10392 {
10393 op1 = XEXP (op0, 1);
10394 op0 = XEXP (op0, 0);
10395 }
10396 else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10397 op0 = XEXP (op0, 0);
10398 }
10399 else if (GET_CODE (op1) == PLUS
10400 && arm_borrow_operation (XEXP (op1, 0), SImode))
10401 op1 = XEXP (op1, 0);
10402 else if (GET_CODE (op0) == NEG
10403 && arm_borrow_operation (op1, SImode))
10404 {
10405 /* Negate with carry-in. For Thumb2 this is done with
10406 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10407 RSC instruction that exists in Arm mode. */
10408 if (speed_p)
10409 *cost += (TARGET_THUMB2
10410 ? extra_cost->alu.arith_shift
10411 : extra_cost->alu.arith);
10412 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10413 return true;
10414 }
10415 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10416 Note we do mean ~borrow here. */
10417 else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10418 {
10419 *cost += rtx_cost (op1, mode, code, 1, speed_p);
10420 return true;
10421 }
10422
10423 shift_op = shifter_op_p (op0, &shift_by_reg);
10424 if (shift_op == NULL)
10425 {
10426 shift_op = shifter_op_p (op1, &shift_by_reg);
10427 non_shift_op = op0;
10428 }
10429 else
10430 non_shift_op = op1;
10431
10432 if (shift_op != NULL)
10433 {
10434 if (shift_by_reg != NULL)
10435 {
10436 if (speed_p)
10437 *cost += extra_cost->alu.arith_shift_reg;
10438 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10439 }
10440 else if (speed_p)
10441 *cost += extra_cost->alu.arith_shift;
10442
10443 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10444 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10445 return true;
10446 }
10447
10448 if (arm_arch_thumb2
10449 && GET_CODE (XEXP (x, 1)) == MULT)
10450 {
10451 /* MLS. */
10452 if (speed_p)
10453 *cost += extra_cost->mult[0].add;
10454 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10455 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10456 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10457 return true;
10458 }
10459
10460 if (CONST_INT_P (op0))
10461 {
10462 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10463 INTVAL (op0), NULL_RTX,
10464 NULL_RTX, 1, 0);
10465 *cost = COSTS_N_INSNS (insns);
10466 if (speed_p)
10467 *cost += insns * extra_cost->alu.arith;
10468 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10469 return true;
10470 }
10471 else if (speed_p)
10472 *cost += extra_cost->alu.arith;
10473
10474 /* Don't recurse as we don't want to cost any borrow that
10475 we've stripped. */
10476 *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10477 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10478 return true;
10479 }
10480
10481 if (GET_MODE_CLASS (mode) == MODE_INT
10482 && GET_MODE_SIZE (mode) < 4)
10483 {
10484 rtx shift_op, shift_reg;
10485 shift_reg = NULL;
10486
10487 /* We check both sides of the MINUS for shifter operands since,
10488 unlike PLUS, it's not commutative. */
10489
10490 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10491 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10492
10493 /* Slightly disparage, as we might need to widen the result. */
10494 *cost += 1;
10495 if (speed_p)
10496 *cost += extra_cost->alu.arith;
10497
10498 if (CONST_INT_P (XEXP (x, 0)))
10499 {
10500 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10501 return true;
10502 }
10503
10504 return false;
10505 }
10506
10507 if (mode == DImode)
10508 {
10509 *cost += COSTS_N_INSNS (1);
10510
10511 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10512 {
10513 rtx op1 = XEXP (x, 1);
10514
10515 if (speed_p)
10516 *cost += 2 * extra_cost->alu.arith;
10517
10518 if (GET_CODE (op1) == ZERO_EXTEND)
10519 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10520 0, speed_p);
10521 else
10522 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10523 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10524 0, speed_p);
10525 return true;
10526 }
10527 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10528 {
10529 if (speed_p)
10530 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10531 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10532 0, speed_p)
10533 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10534 return true;
10535 }
10536 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10537 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10538 {
10539 if (speed_p)
10540 *cost += (extra_cost->alu.arith
10541 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10542 ? extra_cost->alu.arith
10543 : extra_cost->alu.arith_shift));
10544 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10545 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10546 GET_CODE (XEXP (x, 1)), 0, speed_p));
10547 return true;
10548 }
10549
10550 if (speed_p)
10551 *cost += 2 * extra_cost->alu.arith;
10552 return false;
10553 }
10554
10555 /* Vector mode? */
10556
10557 *cost = LIBCALL_COST (2);
10558 return false;
10559
10560 case PLUS:
10561 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10562 && (mode == SFmode || !TARGET_VFP_SINGLE))
10563 {
10564 if (GET_CODE (XEXP (x, 0)) == MULT)
10565 {
10566 rtx mul_op0, mul_op1, add_op;
10567
10568 if (speed_p)
10569 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10570
10571 mul_op0 = XEXP (XEXP (x, 0), 0);
10572 mul_op1 = XEXP (XEXP (x, 0), 1);
10573 add_op = XEXP (x, 1);
10574
10575 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10576 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10577 + rtx_cost (add_op, mode, code, 0, speed_p));
10578
10579 return true;
10580 }
10581
10582 if (speed_p)
10583 *cost += extra_cost->fp[mode != SFmode].addsub;
10584 return false;
10585 }
10586 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10587 {
10588 *cost = LIBCALL_COST (2);
10589 return false;
10590 }
10591
10592 /* Narrow modes can be synthesized in SImode, but the range
10593 of useful sub-operations is limited. Check for shift operations
10594 on one of the operands. Only left shifts can be used in the
10595 narrow modes. */
10596 if (GET_MODE_CLASS (mode) == MODE_INT
10597 && GET_MODE_SIZE (mode) < 4)
10598 {
10599 rtx shift_op, shift_reg;
10600 shift_reg = NULL;
10601
10602 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10603
10604 if (CONST_INT_P (XEXP (x, 1)))
10605 {
10606 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10607 INTVAL (XEXP (x, 1)), NULL_RTX,
10608 NULL_RTX, 1, 0);
10609 *cost = COSTS_N_INSNS (insns);
10610 if (speed_p)
10611 *cost += insns * extra_cost->alu.arith;
10612 /* Slightly penalize a narrow operation as the result may
10613 need widening. */
10614 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10615 return true;
10616 }
10617
10618 /* Slightly penalize a narrow operation as the result may
10619 need widening. */
10620 *cost += 1;
10621 if (speed_p)
10622 *cost += extra_cost->alu.arith;
10623
10624 return false;
10625 }
10626
10627 if (mode == SImode)
10628 {
10629 rtx shift_op, shift_reg;
10630
10631 if (TARGET_INT_SIMD
10632 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10633 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10634 {
10635 /* UXTA[BH] or SXTA[BH]. */
10636 if (speed_p)
10637 *cost += extra_cost->alu.extend_arith;
10638 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10639 0, speed_p)
10640 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10641 return true;
10642 }
10643
10644 rtx op0 = XEXP (x, 0);
10645 rtx op1 = XEXP (x, 1);
10646
10647 /* Handle a side effect of adding in the carry to an addition. */
10648 if (GET_CODE (op0) == PLUS
10649 && arm_carry_operation (op1, mode))
10650 {
10651 op1 = XEXP (op0, 1);
10652 op0 = XEXP (op0, 0);
10653 }
10654 else if (GET_CODE (op1) == PLUS
10655 && arm_carry_operation (op0, mode))
10656 {
10657 op0 = XEXP (op1, 0);
10658 op1 = XEXP (op1, 1);
10659 }
10660 else if (GET_CODE (op0) == PLUS)
10661 {
10662 op0 = strip_carry_operation (op0);
10663 if (swap_commutative_operands_p (op0, op1))
10664 std::swap (op0, op1);
10665 }
10666
10667 if (arm_carry_operation (op0, mode))
10668 {
10669 /* Adding the carry to a register is a canonicalization of
10670 adding 0 to the register plus the carry. */
10671 if (speed_p)
10672 *cost += extra_cost->alu.arith;
10673 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10674 return true;
10675 }
10676
10677 shift_reg = NULL;
10678 shift_op = shifter_op_p (op0, &shift_reg);
10679 if (shift_op != NULL)
10680 {
10681 if (shift_reg)
10682 {
10683 if (speed_p)
10684 *cost += extra_cost->alu.arith_shift_reg;
10685 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10686 }
10687 else if (speed_p)
10688 *cost += extra_cost->alu.arith_shift;
10689
10690 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10691 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10692 return true;
10693 }
10694
10695 if (GET_CODE (op0) == MULT)
10696 {
10697 rtx mul_op = op0;
10698
10699 if (TARGET_DSP_MULTIPLY
10700 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10701 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10702 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10703 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10704 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10705 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10706 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10707 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10708 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10709 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10710 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10711 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10712 == 16))))))
10713 {
10714 /* SMLA[BT][BT]. */
10715 if (speed_p)
10716 *cost += extra_cost->mult[0].extend_add;
10717 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10718 SIGN_EXTEND, 0, speed_p)
10719 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10720 SIGN_EXTEND, 0, speed_p)
10721 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10722 return true;
10723 }
10724
10725 if (speed_p)
10726 *cost += extra_cost->mult[0].add;
10727 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10728 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10729 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10730 return true;
10731 }
10732
10733 if (CONST_INT_P (op1))
10734 {
10735 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10736 INTVAL (op1), NULL_RTX,
10737 NULL_RTX, 1, 0);
10738 *cost = COSTS_N_INSNS (insns);
10739 if (speed_p)
10740 *cost += insns * extra_cost->alu.arith;
10741 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10742 return true;
10743 }
10744
10745 if (speed_p)
10746 *cost += extra_cost->alu.arith;
10747
10748 /* Don't recurse here because we want to test the operands
10749 without any carry operation. */
10750 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10751 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10752 return true;
10753 }
10754
10755 if (mode == DImode)
10756 {
10757 if (GET_CODE (XEXP (x, 0)) == MULT
10758 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10759 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10760 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10761 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10762 {
10763 if (speed_p)
10764 *cost += extra_cost->mult[1].extend_add;
10765 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10766 ZERO_EXTEND, 0, speed_p)
10767 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10768 ZERO_EXTEND, 0, speed_p)
10769 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10770 return true;
10771 }
10772
10773 *cost += COSTS_N_INSNS (1);
10774
10775 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10776 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10777 {
10778 if (speed_p)
10779 *cost += (extra_cost->alu.arith
10780 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10781 ? extra_cost->alu.arith
10782 : extra_cost->alu.arith_shift));
10783
10784 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10785 0, speed_p)
10786 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10787 return true;
10788 }
10789
10790 if (speed_p)
10791 *cost += 2 * extra_cost->alu.arith;
10792 return false;
10793 }
10794
10795 /* Vector mode? */
10796 *cost = LIBCALL_COST (2);
10797 return false;
10798 case IOR:
10799 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10800 {
10801 if (speed_p)
10802 *cost += extra_cost->alu.rev;
10803
10804 return true;
10805 }
10806 /* Fall through. */
10807 case AND: case XOR:
10808 if (mode == SImode)
10809 {
10810 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10811 rtx op0 = XEXP (x, 0);
10812 rtx shift_op, shift_reg;
10813
10814 if (subcode == NOT
10815 && (code == AND
10816 || (code == IOR && TARGET_THUMB2)))
10817 op0 = XEXP (op0, 0);
10818
10819 shift_reg = NULL;
10820 shift_op = shifter_op_p (op0, &shift_reg);
10821 if (shift_op != NULL)
10822 {
10823 if (shift_reg)
10824 {
10825 if (speed_p)
10826 *cost += extra_cost->alu.log_shift_reg;
10827 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10828 }
10829 else if (speed_p)
10830 *cost += extra_cost->alu.log_shift;
10831
10832 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10833 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10834 return true;
10835 }
10836
10837 if (CONST_INT_P (XEXP (x, 1)))
10838 {
10839 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10840 INTVAL (XEXP (x, 1)), NULL_RTX,
10841 NULL_RTX, 1, 0);
10842
10843 *cost = COSTS_N_INSNS (insns);
10844 if (speed_p)
10845 *cost += insns * extra_cost->alu.logical;
10846 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10847 return true;
10848 }
10849
10850 if (speed_p)
10851 *cost += extra_cost->alu.logical;
10852 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10853 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10854 return true;
10855 }
10856
10857 if (mode == DImode)
10858 {
10859 rtx op0 = XEXP (x, 0);
10860 enum rtx_code subcode = GET_CODE (op0);
10861
10862 *cost += COSTS_N_INSNS (1);
10863
10864 if (subcode == NOT
10865 && (code == AND
10866 || (code == IOR && TARGET_THUMB2)))
10867 op0 = XEXP (op0, 0);
10868
10869 if (GET_CODE (op0) == ZERO_EXTEND)
10870 {
10871 if (speed_p)
10872 *cost += 2 * extra_cost->alu.logical;
10873
10874 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10875 0, speed_p)
10876 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10877 return true;
10878 }
10879 else if (GET_CODE (op0) == SIGN_EXTEND)
10880 {
10881 if (speed_p)
10882 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10883
10884 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10885 0, speed_p)
10886 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10887 return true;
10888 }
10889
10890 if (speed_p)
10891 *cost += 2 * extra_cost->alu.logical;
10892
10893 return true;
10894 }
10895 /* Vector mode? */
10896
10897 *cost = LIBCALL_COST (2);
10898 return false;
10899
10900 case MULT:
10901 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10902 && (mode == SFmode || !TARGET_VFP_SINGLE))
10903 {
10904 rtx op0 = XEXP (x, 0);
10905
10906 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10907 op0 = XEXP (op0, 0);
10908
10909 if (speed_p)
10910 *cost += extra_cost->fp[mode != SFmode].mult;
10911
10912 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10913 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10914 return true;
10915 }
10916 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10917 {
10918 *cost = LIBCALL_COST (2);
10919 return false;
10920 }
10921
10922 if (mode == SImode)
10923 {
10924 if (TARGET_DSP_MULTIPLY
10925 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10926 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10927 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10928 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10929 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10930 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10931 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10932 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10933 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10934 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10935 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10936 && (INTVAL (XEXP (XEXP (x, 1), 1))
10937 == 16))))))
10938 {
10939 /* SMUL[TB][TB]. */
10940 if (speed_p)
10941 *cost += extra_cost->mult[0].extend;
10942 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10943 SIGN_EXTEND, 0, speed_p);
10944 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10945 SIGN_EXTEND, 1, speed_p);
10946 return true;
10947 }
10948 if (speed_p)
10949 *cost += extra_cost->mult[0].simple;
10950 return false;
10951 }
10952
10953 if (mode == DImode)
10954 {
10955 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10956 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10957 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10958 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10959 {
10960 if (speed_p)
10961 *cost += extra_cost->mult[1].extend;
10962 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10963 ZERO_EXTEND, 0, speed_p)
10964 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10965 ZERO_EXTEND, 0, speed_p));
10966 return true;
10967 }
10968
10969 *cost = LIBCALL_COST (2);
10970 return false;
10971 }
10972
10973 /* Vector mode? */
10974 *cost = LIBCALL_COST (2);
10975 return false;
10976
10977 case NEG:
10978 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10979 && (mode == SFmode || !TARGET_VFP_SINGLE))
10980 {
10981 if (GET_CODE (XEXP (x, 0)) == MULT)
10982 {
10983 /* VNMUL. */
10984 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10985 return true;
10986 }
10987
10988 if (speed_p)
10989 *cost += extra_cost->fp[mode != SFmode].neg;
10990
10991 return false;
10992 }
10993 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10994 {
10995 *cost = LIBCALL_COST (1);
10996 return false;
10997 }
10998
10999 if (mode == SImode)
11000 {
11001 if (GET_CODE (XEXP (x, 0)) == ABS)
11002 {
11003 *cost += COSTS_N_INSNS (1);
11004 /* Assume the non-flag-changing variant. */
11005 if (speed_p)
11006 *cost += (extra_cost->alu.log_shift
11007 + extra_cost->alu.arith_shift);
11008 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11009 return true;
11010 }
11011
11012 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11013 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11014 {
11015 *cost += COSTS_N_INSNS (1);
11016 /* No extra cost for MOV imm and MVN imm. */
11017 /* If the comparison op is using the flags, there's no further
11018 cost, otherwise we need to add the cost of the comparison. */
11019 if (!(REG_P (XEXP (XEXP (x, 0), 0))
11020 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11021 && XEXP (XEXP (x, 0), 1) == const0_rtx))
11022 {
11023 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11024 *cost += (COSTS_N_INSNS (1)
11025 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11026 0, speed_p)
11027 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11028 1, speed_p));
11029 if (speed_p)
11030 *cost += extra_cost->alu.arith;
11031 }
11032 return true;
11033 }
11034
11035 if (speed_p)
11036 *cost += extra_cost->alu.arith;
11037 return false;
11038 }
11039
11040 if (GET_MODE_CLASS (mode) == MODE_INT
11041 && GET_MODE_SIZE (mode) < 4)
11042 {
11043 /* Slightly disparage, as we might need an extend operation. */
11044 *cost += 1;
11045 if (speed_p)
11046 *cost += extra_cost->alu.arith;
11047 return false;
11048 }
11049
11050 if (mode == DImode)
11051 {
11052 *cost += COSTS_N_INSNS (1);
11053 if (speed_p)
11054 *cost += 2 * extra_cost->alu.arith;
11055 return false;
11056 }
11057
11058 /* Vector mode? */
11059 *cost = LIBCALL_COST (1);
11060 return false;
11061
11062 case NOT:
11063 if (mode == SImode)
11064 {
11065 rtx shift_op;
11066 rtx shift_reg = NULL;
11067
11068 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11069
11070 if (shift_op)
11071 {
11072 if (shift_reg != NULL)
11073 {
11074 if (speed_p)
11075 *cost += extra_cost->alu.log_shift_reg;
11076 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11077 }
11078 else if (speed_p)
11079 *cost += extra_cost->alu.log_shift;
11080 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11081 return true;
11082 }
11083
11084 if (speed_p)
11085 *cost += extra_cost->alu.logical;
11086 return false;
11087 }
11088 if (mode == DImode)
11089 {
11090 *cost += COSTS_N_INSNS (1);
11091 return false;
11092 }
11093
11094 /* Vector mode? */
11095
11096 *cost += LIBCALL_COST (1);
11097 return false;
11098
11099 case IF_THEN_ELSE:
11100 {
11101 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11102 {
11103 *cost += COSTS_N_INSNS (3);
11104 return true;
11105 }
11106 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11107 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11108
11109 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11110 /* Assume that if one arm of the if_then_else is a register,
11111 that it will be tied with the result and eliminate the
11112 conditional insn. */
11113 if (REG_P (XEXP (x, 1)))
11114 *cost += op2cost;
11115 else if (REG_P (XEXP (x, 2)))
11116 *cost += op1cost;
11117 else
11118 {
11119 if (speed_p)
11120 {
11121 if (extra_cost->alu.non_exec_costs_exec)
11122 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11123 else
11124 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11125 }
11126 else
11127 *cost += op1cost + op2cost;
11128 }
11129 }
11130 return true;
11131
11132 case COMPARE:
11133 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11134 *cost = 0;
11135 else
11136 {
11137 machine_mode op0mode;
11138 /* We'll mostly assume that the cost of a compare is the cost of the
11139 LHS. However, there are some notable exceptions. */
11140
11141 /* Floating point compares are never done as side-effects. */
11142 op0mode = GET_MODE (XEXP (x, 0));
11143 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11144 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11145 {
11146 if (speed_p)
11147 *cost += extra_cost->fp[op0mode != SFmode].compare;
11148
11149 if (XEXP (x, 1) == CONST0_RTX (op0mode))
11150 {
11151 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11152 return true;
11153 }
11154
11155 return false;
11156 }
11157 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11158 {
11159 *cost = LIBCALL_COST (2);
11160 return false;
11161 }
11162
11163 /* DImode compares normally take two insns. */
11164 if (op0mode == DImode)
11165 {
11166 *cost += COSTS_N_INSNS (1);
11167 if (speed_p)
11168 *cost += 2 * extra_cost->alu.arith;
11169 return false;
11170 }
11171
11172 if (op0mode == SImode)
11173 {
11174 rtx shift_op;
11175 rtx shift_reg;
11176
11177 if (XEXP (x, 1) == const0_rtx
11178 && !(REG_P (XEXP (x, 0))
11179 || (GET_CODE (XEXP (x, 0)) == SUBREG
11180 && REG_P (SUBREG_REG (XEXP (x, 0))))))
11181 {
11182 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11183
11184 /* Multiply operations that set the flags are often
11185 significantly more expensive. */
11186 if (speed_p
11187 && GET_CODE (XEXP (x, 0)) == MULT
11188 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11189 *cost += extra_cost->mult[0].flag_setting;
11190
11191 if (speed_p
11192 && GET_CODE (XEXP (x, 0)) == PLUS
11193 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11194 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11195 0), 1), mode))
11196 *cost += extra_cost->mult[0].flag_setting;
11197 return true;
11198 }
11199
11200 shift_reg = NULL;
11201 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11202 if (shift_op != NULL)
11203 {
11204 if (shift_reg != NULL)
11205 {
11206 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11207 1, speed_p);
11208 if (speed_p)
11209 *cost += extra_cost->alu.arith_shift_reg;
11210 }
11211 else if (speed_p)
11212 *cost += extra_cost->alu.arith_shift;
11213 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11214 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11215 return true;
11216 }
11217
11218 if (speed_p)
11219 *cost += extra_cost->alu.arith;
11220 if (CONST_INT_P (XEXP (x, 1))
11221 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11222 {
11223 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11224 return true;
11225 }
11226 return false;
11227 }
11228
11229 /* Vector mode? */
11230
11231 *cost = LIBCALL_COST (2);
11232 return false;
11233 }
11234 return true;
11235
11236 case EQ:
11237 case NE:
11238 case LT:
11239 case LE:
11240 case GT:
11241 case GE:
11242 case LTU:
11243 case LEU:
11244 case GEU:
11245 case GTU:
11246 case ORDERED:
11247 case UNORDERED:
11248 case UNEQ:
11249 case UNLE:
11250 case UNLT:
11251 case UNGE:
11252 case UNGT:
11253 case LTGT:
11254 if (outer_code == SET)
11255 {
11256 /* Is it a store-flag operation? */
11257 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11258 && XEXP (x, 1) == const0_rtx)
11259 {
11260 /* Thumb also needs an IT insn. */
11261 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11262 return true;
11263 }
11264 if (XEXP (x, 1) == const0_rtx)
11265 {
11266 switch (code)
11267 {
11268 case LT:
11269 /* LSR Rd, Rn, #31. */
11270 if (speed_p)
11271 *cost += extra_cost->alu.shift;
11272 break;
11273
11274 case EQ:
11275 /* RSBS T1, Rn, #0
11276 ADC Rd, Rn, T1. */
11277
11278 case NE:
11279 /* SUBS T1, Rn, #1
11280 SBC Rd, Rn, T1. */
11281 *cost += COSTS_N_INSNS (1);
11282 break;
11283
11284 case LE:
11285 /* RSBS T1, Rn, Rn, LSR #31
11286 ADC Rd, Rn, T1. */
11287 *cost += COSTS_N_INSNS (1);
11288 if (speed_p)
11289 *cost += extra_cost->alu.arith_shift;
11290 break;
11291
11292 case GT:
11293 /* RSB Rd, Rn, Rn, ASR #1
11294 LSR Rd, Rd, #31. */
11295 *cost += COSTS_N_INSNS (1);
11296 if (speed_p)
11297 *cost += (extra_cost->alu.arith_shift
11298 + extra_cost->alu.shift);
11299 break;
11300
11301 case GE:
11302 /* ASR Rd, Rn, #31
11303 ADD Rd, Rn, #1. */
11304 *cost += COSTS_N_INSNS (1);
11305 if (speed_p)
11306 *cost += extra_cost->alu.shift;
11307 break;
11308
11309 default:
11310 /* Remaining cases are either meaningless or would take
11311 three insns anyway. */
11312 *cost = COSTS_N_INSNS (3);
11313 break;
11314 }
11315 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11316 return true;
11317 }
11318 else
11319 {
11320 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11321 if (CONST_INT_P (XEXP (x, 1))
11322 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11323 {
11324 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11325 return true;
11326 }
11327
11328 return false;
11329 }
11330 }
11331 /* Not directly inside a set. If it involves the condition code
11332 register it must be the condition for a branch, cond_exec or
11333 I_T_E operation. Since the comparison is performed elsewhere
11334 this is just the control part which has no additional
11335 cost. */
11336 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11337 && XEXP (x, 1) == const0_rtx)
11338 {
11339 *cost = 0;
11340 return true;
11341 }
11342 return false;
11343
11344 case ABS:
11345 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11346 && (mode == SFmode || !TARGET_VFP_SINGLE))
11347 {
11348 if (speed_p)
11349 *cost += extra_cost->fp[mode != SFmode].neg;
11350
11351 return false;
11352 }
11353 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11354 {
11355 *cost = LIBCALL_COST (1);
11356 return false;
11357 }
11358
11359 if (mode == SImode)
11360 {
11361 if (speed_p)
11362 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11363 return false;
11364 }
11365 /* Vector mode? */
11366 *cost = LIBCALL_COST (1);
11367 return false;
11368
11369 case SIGN_EXTEND:
11370 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11371 && MEM_P (XEXP (x, 0)))
11372 {
11373 if (mode == DImode)
11374 *cost += COSTS_N_INSNS (1);
11375
11376 if (!speed_p)
11377 return true;
11378
11379 if (GET_MODE (XEXP (x, 0)) == SImode)
11380 *cost += extra_cost->ldst.load;
11381 else
11382 *cost += extra_cost->ldst.load_sign_extend;
11383
11384 if (mode == DImode)
11385 *cost += extra_cost->alu.shift;
11386
11387 return true;
11388 }
11389
11390 /* Widening from less than 32-bits requires an extend operation. */
11391 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11392 {
11393 /* We have SXTB/SXTH. */
11394 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11395 if (speed_p)
11396 *cost += extra_cost->alu.extend;
11397 }
11398 else if (GET_MODE (XEXP (x, 0)) != SImode)
11399 {
11400 /* Needs two shifts. */
11401 *cost += COSTS_N_INSNS (1);
11402 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11403 if (speed_p)
11404 *cost += 2 * extra_cost->alu.shift;
11405 }
11406
11407 /* Widening beyond 32-bits requires one more insn. */
11408 if (mode == DImode)
11409 {
11410 *cost += COSTS_N_INSNS (1);
11411 if (speed_p)
11412 *cost += extra_cost->alu.shift;
11413 }
11414
11415 return true;
11416
11417 case ZERO_EXTEND:
11418 if ((arm_arch4
11419 || GET_MODE (XEXP (x, 0)) == SImode
11420 || GET_MODE (XEXP (x, 0)) == QImode)
11421 && MEM_P (XEXP (x, 0)))
11422 {
11423 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11424
11425 if (mode == DImode)
11426 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11427
11428 return true;
11429 }
11430
11431 /* Widening from less than 32-bits requires an extend operation. */
11432 if (GET_MODE (XEXP (x, 0)) == QImode)
11433 {
11434 /* UXTB can be a shorter instruction in Thumb2, but it might
11435 be slower than the AND Rd, Rn, #255 alternative. When
11436 optimizing for speed it should never be slower to use
11437 AND, and we don't really model 16-bit vs 32-bit insns
11438 here. */
11439 if (speed_p)
11440 *cost += extra_cost->alu.logical;
11441 }
11442 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11443 {
11444 /* We have UXTB/UXTH. */
11445 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11446 if (speed_p)
11447 *cost += extra_cost->alu.extend;
11448 }
11449 else if (GET_MODE (XEXP (x, 0)) != SImode)
11450 {
11451 /* Needs two shifts. It's marginally preferable to use
11452 shifts rather than two BIC instructions as the second
11453 shift may merge with a subsequent insn as a shifter
11454 op. */
11455 *cost = COSTS_N_INSNS (2);
11456 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11457 if (speed_p)
11458 *cost += 2 * extra_cost->alu.shift;
11459 }
11460
11461 /* Widening beyond 32-bits requires one more insn. */
11462 if (mode == DImode)
11463 {
11464 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11465 }
11466
11467 return true;
11468
11469 case CONST_INT:
11470 *cost = 0;
11471 /* CONST_INT has no mode, so we cannot tell for sure how many
11472 insns are really going to be needed. The best we can do is
11473 look at the value passed. If it fits in SImode, then assume
11474 that's the mode it will be used for. Otherwise assume it
11475 will be used in DImode. */
11476 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11477 mode = SImode;
11478 else
11479 mode = DImode;
11480
11481 /* Avoid blowing up in arm_gen_constant (). */
11482 if (!(outer_code == PLUS
11483 || outer_code == AND
11484 || outer_code == IOR
11485 || outer_code == XOR
11486 || outer_code == MINUS))
11487 outer_code = SET;
11488
11489 const_int_cost:
11490 if (mode == SImode)
11491 {
11492 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11493 INTVAL (x), NULL, NULL,
11494 0, 0));
11495 /* Extra costs? */
11496 }
11497 else
11498 {
11499 *cost += COSTS_N_INSNS (arm_gen_constant
11500 (outer_code, SImode, NULL,
11501 trunc_int_for_mode (INTVAL (x), SImode),
11502 NULL, NULL, 0, 0)
11503 + arm_gen_constant (outer_code, SImode, NULL,
11504 INTVAL (x) >> 32, NULL,
11505 NULL, 0, 0));
11506 /* Extra costs? */
11507 }
11508
11509 return true;
11510
11511 case CONST:
11512 case LABEL_REF:
11513 case SYMBOL_REF:
11514 if (speed_p)
11515 {
11516 if (arm_arch_thumb2 && !flag_pic)
11517 *cost += COSTS_N_INSNS (1);
11518 else
11519 *cost += extra_cost->ldst.load;
11520 }
11521 else
11522 *cost += COSTS_N_INSNS (1);
11523
11524 if (flag_pic)
11525 {
11526 *cost += COSTS_N_INSNS (1);
11527 if (speed_p)
11528 *cost += extra_cost->alu.arith;
11529 }
11530
11531 return true;
11532
11533 case CONST_FIXED:
11534 *cost = COSTS_N_INSNS (4);
11535 /* Fixme. */
11536 return true;
11537
11538 case CONST_DOUBLE:
11539 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11540 && (mode == SFmode || !TARGET_VFP_SINGLE))
11541 {
11542 if (vfp3_const_double_rtx (x))
11543 {
11544 if (speed_p)
11545 *cost += extra_cost->fp[mode == DFmode].fpconst;
11546 return true;
11547 }
11548
11549 if (speed_p)
11550 {
11551 if (mode == DFmode)
11552 *cost += extra_cost->ldst.loadd;
11553 else
11554 *cost += extra_cost->ldst.loadf;
11555 }
11556 else
11557 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11558
11559 return true;
11560 }
11561 *cost = COSTS_N_INSNS (4);
11562 return true;
11563
11564 case CONST_VECTOR:
11565 /* Fixme. */
11566 if (((TARGET_NEON && TARGET_HARD_FLOAT
11567 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11568 || TARGET_HAVE_MVE)
11569 && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11570 *cost = COSTS_N_INSNS (1);
11571 else
11572 *cost = COSTS_N_INSNS (4);
11573 return true;
11574
11575 case HIGH:
11576 case LO_SUM:
11577 /* When optimizing for size, we prefer constant pool entries to
11578 MOVW/MOVT pairs, so bump the cost of these slightly. */
11579 if (!speed_p)
11580 *cost += 1;
11581 return true;
11582
11583 case CLZ:
11584 if (speed_p)
11585 *cost += extra_cost->alu.clz;
11586 return false;
11587
11588 case SMIN:
11589 if (XEXP (x, 1) == const0_rtx)
11590 {
11591 if (speed_p)
11592 *cost += extra_cost->alu.log_shift;
11593 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11594 return true;
11595 }
11596 /* Fall through. */
11597 case SMAX:
11598 case UMIN:
11599 case UMAX:
11600 *cost += COSTS_N_INSNS (1);
11601 return false;
11602
11603 case TRUNCATE:
11604 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11605 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11606 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11607 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11608 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11609 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11610 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11611 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11612 == ZERO_EXTEND))))
11613 {
11614 if (speed_p)
11615 *cost += extra_cost->mult[1].extend;
11616 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11617 ZERO_EXTEND, 0, speed_p)
11618 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11619 ZERO_EXTEND, 0, speed_p));
11620 return true;
11621 }
11622 *cost = LIBCALL_COST (1);
11623 return false;
11624
11625 case UNSPEC_VOLATILE:
11626 case UNSPEC:
11627 return arm_unspec_cost (x, outer_code, speed_p, cost);
11628
11629 case PC:
11630 /* Reading the PC is like reading any other register. Writing it
11631 is more expensive, but we take that into account elsewhere. */
11632 *cost = 0;
11633 return true;
11634
11635 case ZERO_EXTRACT:
11636 /* TODO: Simple zero_extract of bottom bits using AND. */
11637 /* Fall through. */
11638 case SIGN_EXTRACT:
11639 if (arm_arch6
11640 && mode == SImode
11641 && CONST_INT_P (XEXP (x, 1))
11642 && CONST_INT_P (XEXP (x, 2)))
11643 {
11644 if (speed_p)
11645 *cost += extra_cost->alu.bfx;
11646 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11647 return true;
11648 }
11649 /* Without UBFX/SBFX, need to resort to shift operations. */
11650 *cost += COSTS_N_INSNS (1);
11651 if (speed_p)
11652 *cost += 2 * extra_cost->alu.shift;
11653 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11654 return true;
11655
11656 case FLOAT_EXTEND:
11657 if (TARGET_HARD_FLOAT)
11658 {
11659 if (speed_p)
11660 *cost += extra_cost->fp[mode == DFmode].widen;
11661 if (!TARGET_VFP5
11662 && GET_MODE (XEXP (x, 0)) == HFmode)
11663 {
11664 /* Pre v8, widening HF->DF is a two-step process, first
11665 widening to SFmode. */
11666 *cost += COSTS_N_INSNS (1);
11667 if (speed_p)
11668 *cost += extra_cost->fp[0].widen;
11669 }
11670 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11671 return true;
11672 }
11673
11674 *cost = LIBCALL_COST (1);
11675 return false;
11676
11677 case FLOAT_TRUNCATE:
11678 if (TARGET_HARD_FLOAT)
11679 {
11680 if (speed_p)
11681 *cost += extra_cost->fp[mode == DFmode].narrow;
11682 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11683 return true;
11684 /* Vector modes? */
11685 }
11686 *cost = LIBCALL_COST (1);
11687 return false;
11688
11689 case FMA:
11690 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11691 {
11692 rtx op0 = XEXP (x, 0);
11693 rtx op1 = XEXP (x, 1);
11694 rtx op2 = XEXP (x, 2);
11695
11696
11697 /* vfms or vfnma. */
11698 if (GET_CODE (op0) == NEG)
11699 op0 = XEXP (op0, 0);
11700
11701 /* vfnms or vfnma. */
11702 if (GET_CODE (op2) == NEG)
11703 op2 = XEXP (op2, 0);
11704
11705 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11706 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11707 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11708
11709 if (speed_p)
11710 *cost += extra_cost->fp[mode ==DFmode].fma;
11711
11712 return true;
11713 }
11714
11715 *cost = LIBCALL_COST (3);
11716 return false;
11717
11718 case FIX:
11719 case UNSIGNED_FIX:
11720 if (TARGET_HARD_FLOAT)
11721 {
11722 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11723 a vcvt fixed-point conversion. */
11724 if (code == FIX && mode == SImode
11725 && GET_CODE (XEXP (x, 0)) == FIX
11726 && GET_MODE (XEXP (x, 0)) == SFmode
11727 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11728 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11729 > 0)
11730 {
11731 if (speed_p)
11732 *cost += extra_cost->fp[0].toint;
11733
11734 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11735 code, 0, speed_p);
11736 return true;
11737 }
11738
11739 if (GET_MODE_CLASS (mode) == MODE_INT)
11740 {
11741 mode = GET_MODE (XEXP (x, 0));
11742 if (speed_p)
11743 *cost += extra_cost->fp[mode == DFmode].toint;
11744 /* Strip of the 'cost' of rounding towards zero. */
11745 if (GET_CODE (XEXP (x, 0)) == FIX)
11746 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11747 0, speed_p);
11748 else
11749 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11750 /* ??? Increase the cost to deal with transferring from
11751 FP -> CORE registers? */
11752 return true;
11753 }
11754 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11755 && TARGET_VFP5)
11756 {
11757 if (speed_p)
11758 *cost += extra_cost->fp[mode == DFmode].roundint;
11759 return false;
11760 }
11761 /* Vector costs? */
11762 }
11763 *cost = LIBCALL_COST (1);
11764 return false;
11765
11766 case FLOAT:
11767 case UNSIGNED_FLOAT:
11768 if (TARGET_HARD_FLOAT)
11769 {
11770 /* ??? Increase the cost to deal with transferring from CORE
11771 -> FP registers? */
11772 if (speed_p)
11773 *cost += extra_cost->fp[mode == DFmode].fromint;
11774 return false;
11775 }
11776 *cost = LIBCALL_COST (1);
11777 return false;
11778
11779 case CALL:
11780 return true;
11781
11782 case ASM_OPERANDS:
11783 {
11784 /* Just a guess. Guess number of instructions in the asm
11785 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11786 though (see PR60663). */
11787 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11788 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11789
11790 *cost = COSTS_N_INSNS (asm_length + num_operands);
11791 return true;
11792 }
11793 default:
11794 if (mode != VOIDmode)
11795 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11796 else
11797 *cost = COSTS_N_INSNS (4); /* Who knows? */
11798 return false;
11799 }
11800 }
11801
11802 #undef HANDLE_NARROW_SHIFT_ARITH
11803
11804 /* RTX costs entry point. */
11805
11806 static bool
11807 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11808 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11809 {
11810 bool result;
11811 int code = GET_CODE (x);
11812 gcc_assert (current_tune->insn_extra_cost);
11813
11814 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
11815 (enum rtx_code) outer_code,
11816 current_tune->insn_extra_cost,
11817 total, speed);
11818
11819 if (dump_file && arm_verbose_cost)
11820 {
11821 print_rtl_single (dump_file, x);
11822 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11823 *total, result ? "final" : "partial");
11824 }
11825 return result;
11826 }
11827
11828 static int
11829 arm_insn_cost (rtx_insn *insn, bool speed)
11830 {
11831 int cost;
11832
11833 /* Don't cost a simple reg-reg move at a full insn cost: such moves
11834 will likely disappear during register allocation. */
11835 if (!reload_completed
11836 && GET_CODE (PATTERN (insn)) == SET
11837 && REG_P (SET_DEST (PATTERN (insn)))
11838 && REG_P (SET_SRC (PATTERN (insn))))
11839 return 2;
11840 cost = pattern_cost (PATTERN (insn), speed);
11841 /* If the cost is zero, then it's likely a complex insn. We don't want the
11842 cost of these to be less than something we know about. */
11843 return cost ? cost : COSTS_N_INSNS (2);
11844 }
11845
11846 /* All address computations that can be done are free, but rtx cost returns
11847 the same for practically all of them. So we weight the different types
11848 of address here in the order (most pref first):
11849 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11850 static inline int
11851 arm_arm_address_cost (rtx x)
11852 {
11853 enum rtx_code c = GET_CODE (x);
11854
11855 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11856 return 0;
11857 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11858 return 10;
11859
11860 if (c == PLUS)
11861 {
11862 if (CONST_INT_P (XEXP (x, 1)))
11863 return 2;
11864
11865 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11866 return 3;
11867
11868 return 4;
11869 }
11870
11871 return 6;
11872 }
11873
11874 static inline int
11875 arm_thumb_address_cost (rtx x)
11876 {
11877 enum rtx_code c = GET_CODE (x);
11878
11879 if (c == REG)
11880 return 1;
11881 if (c == PLUS
11882 && REG_P (XEXP (x, 0))
11883 && CONST_INT_P (XEXP (x, 1)))
11884 return 1;
11885
11886 return 2;
11887 }
11888
11889 static int
11890 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11891 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11892 {
11893 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11894 }
11895
11896 /* Adjust cost hook for XScale. */
11897 static bool
11898 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11899 int * cost)
11900 {
11901 /* Some true dependencies can have a higher cost depending
11902 on precisely how certain input operands are used. */
11903 if (dep_type == 0
11904 && recog_memoized (insn) >= 0
11905 && recog_memoized (dep) >= 0)
11906 {
11907 int shift_opnum = get_attr_shift (insn);
11908 enum attr_type attr_type = get_attr_type (dep);
11909
11910 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11911 operand for INSN. If we have a shifted input operand and the
11912 instruction we depend on is another ALU instruction, then we may
11913 have to account for an additional stall. */
11914 if (shift_opnum != 0
11915 && (attr_type == TYPE_ALU_SHIFT_IMM
11916 || attr_type == TYPE_ALUS_SHIFT_IMM
11917 || attr_type == TYPE_LOGIC_SHIFT_IMM
11918 || attr_type == TYPE_LOGICS_SHIFT_IMM
11919 || attr_type == TYPE_ALU_SHIFT_REG
11920 || attr_type == TYPE_ALUS_SHIFT_REG
11921 || attr_type == TYPE_LOGIC_SHIFT_REG
11922 || attr_type == TYPE_LOGICS_SHIFT_REG
11923 || attr_type == TYPE_MOV_SHIFT
11924 || attr_type == TYPE_MVN_SHIFT
11925 || attr_type == TYPE_MOV_SHIFT_REG
11926 || attr_type == TYPE_MVN_SHIFT_REG))
11927 {
11928 rtx shifted_operand;
11929 int opno;
11930
11931 /* Get the shifted operand. */
11932 extract_insn (insn);
11933 shifted_operand = recog_data.operand[shift_opnum];
11934
11935 /* Iterate over all the operands in DEP. If we write an operand
11936 that overlaps with SHIFTED_OPERAND, then we have increase the
11937 cost of this dependency. */
11938 extract_insn (dep);
11939 preprocess_constraints (dep);
11940 for (opno = 0; opno < recog_data.n_operands; opno++)
11941 {
11942 /* We can ignore strict inputs. */
11943 if (recog_data.operand_type[opno] == OP_IN)
11944 continue;
11945
11946 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11947 shifted_operand))
11948 {
11949 *cost = 2;
11950 return false;
11951 }
11952 }
11953 }
11954 }
11955 return true;
11956 }
11957
11958 /* Adjust cost hook for Cortex A9. */
11959 static bool
11960 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11961 int * cost)
11962 {
11963 switch (dep_type)
11964 {
11965 case REG_DEP_ANTI:
11966 *cost = 0;
11967 return false;
11968
11969 case REG_DEP_TRUE:
11970 case REG_DEP_OUTPUT:
11971 if (recog_memoized (insn) >= 0
11972 && recog_memoized (dep) >= 0)
11973 {
11974 if (GET_CODE (PATTERN (insn)) == SET)
11975 {
11976 if (GET_MODE_CLASS
11977 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11978 || GET_MODE_CLASS
11979 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11980 {
11981 enum attr_type attr_type_insn = get_attr_type (insn);
11982 enum attr_type attr_type_dep = get_attr_type (dep);
11983
11984 /* By default all dependencies of the form
11985 s0 = s0 <op> s1
11986 s0 = s0 <op> s2
11987 have an extra latency of 1 cycle because
11988 of the input and output dependency in this
11989 case. However this gets modeled as an true
11990 dependency and hence all these checks. */
11991 if (REG_P (SET_DEST (PATTERN (insn)))
11992 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11993 {
11994 /* FMACS is a special case where the dependent
11995 instruction can be issued 3 cycles before
11996 the normal latency in case of an output
11997 dependency. */
11998 if ((attr_type_insn == TYPE_FMACS
11999 || attr_type_insn == TYPE_FMACD)
12000 && (attr_type_dep == TYPE_FMACS
12001 || attr_type_dep == TYPE_FMACD))
12002 {
12003 if (dep_type == REG_DEP_OUTPUT)
12004 *cost = insn_default_latency (dep) - 3;
12005 else
12006 *cost = insn_default_latency (dep);
12007 return false;
12008 }
12009 else
12010 {
12011 if (dep_type == REG_DEP_OUTPUT)
12012 *cost = insn_default_latency (dep) + 1;
12013 else
12014 *cost = insn_default_latency (dep);
12015 }
12016 return false;
12017 }
12018 }
12019 }
12020 }
12021 break;
12022
12023 default:
12024 gcc_unreachable ();
12025 }
12026
12027 return true;
12028 }
12029
12030 /* Adjust cost hook for FA726TE. */
12031 static bool
12032 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12033 int * cost)
12034 {
12035 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12036 have penalty of 3. */
12037 if (dep_type == REG_DEP_TRUE
12038 && recog_memoized (insn) >= 0
12039 && recog_memoized (dep) >= 0
12040 && get_attr_conds (dep) == CONDS_SET)
12041 {
12042 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12043 if (get_attr_conds (insn) == CONDS_USE
12044 && get_attr_type (insn) != TYPE_BRANCH)
12045 {
12046 *cost = 3;
12047 return false;
12048 }
12049
12050 if (GET_CODE (PATTERN (insn)) == COND_EXEC
12051 || get_attr_conds (insn) == CONDS_USE)
12052 {
12053 *cost = 0;
12054 return false;
12055 }
12056 }
12057
12058 return true;
12059 }
12060
12061 /* Implement TARGET_REGISTER_MOVE_COST.
12062
12063 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12064 it is typically more expensive than a single memory access. We set
12065 the cost to less than two memory accesses so that floating
12066 point to integer conversion does not go through memory. */
12067
12068 int
12069 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12070 reg_class_t from, reg_class_t to)
12071 {
12072 if (TARGET_32BIT)
12073 {
12074 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12075 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12076 return 15;
12077 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12078 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12079 return 4;
12080 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12081 return 20;
12082 else
12083 return 2;
12084 }
12085 else
12086 {
12087 if (from == HI_REGS || to == HI_REGS)
12088 return 4;
12089 else
12090 return 2;
12091 }
12092 }
12093
12094 /* Implement TARGET_MEMORY_MOVE_COST. */
12095
12096 int
12097 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12098 bool in ATTRIBUTE_UNUSED)
12099 {
12100 if (TARGET_32BIT)
12101 return 10;
12102 else
12103 {
12104 if (GET_MODE_SIZE (mode) < 4)
12105 return 8;
12106 else
12107 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12108 }
12109 }
12110
12111 /* Vectorizer cost model implementation. */
12112
12113 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12114 static int
12115 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12116 tree vectype,
12117 int misalign ATTRIBUTE_UNUSED)
12118 {
12119 unsigned elements;
12120
12121 switch (type_of_cost)
12122 {
12123 case scalar_stmt:
12124 return current_tune->vec_costs->scalar_stmt_cost;
12125
12126 case scalar_load:
12127 return current_tune->vec_costs->scalar_load_cost;
12128
12129 case scalar_store:
12130 return current_tune->vec_costs->scalar_store_cost;
12131
12132 case vector_stmt:
12133 return current_tune->vec_costs->vec_stmt_cost;
12134
12135 case vector_load:
12136 return current_tune->vec_costs->vec_align_load_cost;
12137
12138 case vector_store:
12139 return current_tune->vec_costs->vec_store_cost;
12140
12141 case vec_to_scalar:
12142 return current_tune->vec_costs->vec_to_scalar_cost;
12143
12144 case scalar_to_vec:
12145 return current_tune->vec_costs->scalar_to_vec_cost;
12146
12147 case unaligned_load:
12148 case vector_gather_load:
12149 return current_tune->vec_costs->vec_unalign_load_cost;
12150
12151 case unaligned_store:
12152 case vector_scatter_store:
12153 return current_tune->vec_costs->vec_unalign_store_cost;
12154
12155 case cond_branch_taken:
12156 return current_tune->vec_costs->cond_taken_branch_cost;
12157
12158 case cond_branch_not_taken:
12159 return current_tune->vec_costs->cond_not_taken_branch_cost;
12160
12161 case vec_perm:
12162 case vec_promote_demote:
12163 return current_tune->vec_costs->vec_stmt_cost;
12164
12165 case vec_construct:
12166 elements = TYPE_VECTOR_SUBPARTS (vectype);
12167 return elements / 2 + 1;
12168
12169 default:
12170 gcc_unreachable ();
12171 }
12172 }
12173
12174 /* Implement targetm.vectorize.add_stmt_cost. */
12175
12176 static unsigned
12177 arm_add_stmt_cost (vec_info *vinfo, void *data, int count,
12178 enum vect_cost_for_stmt kind,
12179 struct _stmt_vec_info *stmt_info, tree vectype,
12180 int misalign, enum vect_cost_model_location where)
12181 {
12182 unsigned *cost = (unsigned *) data;
12183 unsigned retval = 0;
12184
12185 if (flag_vect_cost_model)
12186 {
12187 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
12188
12189 /* Statements in an inner loop relative to the loop being
12190 vectorized are weighted more heavily. The value here is
12191 arbitrary and could potentially be improved with analysis. */
12192 if (where == vect_body && stmt_info
12193 && stmt_in_inner_loop_p (vinfo, stmt_info))
12194 count *= 50; /* FIXME. */
12195
12196 retval = (unsigned) (count * stmt_cost);
12197 cost[where] += retval;
12198 }
12199
12200 return retval;
12201 }
12202
12203 /* Return true if and only if this insn can dual-issue only as older. */
12204 static bool
12205 cortexa7_older_only (rtx_insn *insn)
12206 {
12207 if (recog_memoized (insn) < 0)
12208 return false;
12209
12210 switch (get_attr_type (insn))
12211 {
12212 case TYPE_ALU_DSP_REG:
12213 case TYPE_ALU_SREG:
12214 case TYPE_ALUS_SREG:
12215 case TYPE_LOGIC_REG:
12216 case TYPE_LOGICS_REG:
12217 case TYPE_ADC_REG:
12218 case TYPE_ADCS_REG:
12219 case TYPE_ADR:
12220 case TYPE_BFM:
12221 case TYPE_REV:
12222 case TYPE_MVN_REG:
12223 case TYPE_SHIFT_IMM:
12224 case TYPE_SHIFT_REG:
12225 case TYPE_LOAD_BYTE:
12226 case TYPE_LOAD_4:
12227 case TYPE_STORE_4:
12228 case TYPE_FFARITHS:
12229 case TYPE_FADDS:
12230 case TYPE_FFARITHD:
12231 case TYPE_FADDD:
12232 case TYPE_FMOV:
12233 case TYPE_F_CVT:
12234 case TYPE_FCMPS:
12235 case TYPE_FCMPD:
12236 case TYPE_FCONSTS:
12237 case TYPE_FCONSTD:
12238 case TYPE_FMULS:
12239 case TYPE_FMACS:
12240 case TYPE_FMULD:
12241 case TYPE_FMACD:
12242 case TYPE_FDIVS:
12243 case TYPE_FDIVD:
12244 case TYPE_F_MRC:
12245 case TYPE_F_MRRC:
12246 case TYPE_F_FLAG:
12247 case TYPE_F_LOADS:
12248 case TYPE_F_STORES:
12249 return true;
12250 default:
12251 return false;
12252 }
12253 }
12254
12255 /* Return true if and only if this insn can dual-issue as younger. */
12256 static bool
12257 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12258 {
12259 if (recog_memoized (insn) < 0)
12260 {
12261 if (verbose > 5)
12262 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12263 return false;
12264 }
12265
12266 switch (get_attr_type (insn))
12267 {
12268 case TYPE_ALU_IMM:
12269 case TYPE_ALUS_IMM:
12270 case TYPE_LOGIC_IMM:
12271 case TYPE_LOGICS_IMM:
12272 case TYPE_EXTEND:
12273 case TYPE_MVN_IMM:
12274 case TYPE_MOV_IMM:
12275 case TYPE_MOV_REG:
12276 case TYPE_MOV_SHIFT:
12277 case TYPE_MOV_SHIFT_REG:
12278 case TYPE_BRANCH:
12279 case TYPE_CALL:
12280 return true;
12281 default:
12282 return false;
12283 }
12284 }
12285
12286
12287 /* Look for an instruction that can dual issue only as an older
12288 instruction, and move it in front of any instructions that can
12289 dual-issue as younger, while preserving the relative order of all
12290 other instructions in the ready list. This is a hueuristic to help
12291 dual-issue in later cycles, by postponing issue of more flexible
12292 instructions. This heuristic may affect dual issue opportunities
12293 in the current cycle. */
12294 static void
12295 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12296 int *n_readyp, int clock)
12297 {
12298 int i;
12299 int first_older_only = -1, first_younger = -1;
12300
12301 if (verbose > 5)
12302 fprintf (file,
12303 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12304 clock,
12305 *n_readyp);
12306
12307 /* Traverse the ready list from the head (the instruction to issue
12308 first), and looking for the first instruction that can issue as
12309 younger and the first instruction that can dual-issue only as
12310 older. */
12311 for (i = *n_readyp - 1; i >= 0; i--)
12312 {
12313 rtx_insn *insn = ready[i];
12314 if (cortexa7_older_only (insn))
12315 {
12316 first_older_only = i;
12317 if (verbose > 5)
12318 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12319 break;
12320 }
12321 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12322 first_younger = i;
12323 }
12324
12325 /* Nothing to reorder because either no younger insn found or insn
12326 that can dual-issue only as older appears before any insn that
12327 can dual-issue as younger. */
12328 if (first_younger == -1)
12329 {
12330 if (verbose > 5)
12331 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12332 return;
12333 }
12334
12335 /* Nothing to reorder because no older-only insn in the ready list. */
12336 if (first_older_only == -1)
12337 {
12338 if (verbose > 5)
12339 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12340 return;
12341 }
12342
12343 /* Move first_older_only insn before first_younger. */
12344 if (verbose > 5)
12345 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12346 INSN_UID(ready [first_older_only]),
12347 INSN_UID(ready [first_younger]));
12348 rtx_insn *first_older_only_insn = ready [first_older_only];
12349 for (i = first_older_only; i < first_younger; i++)
12350 {
12351 ready[i] = ready[i+1];
12352 }
12353
12354 ready[i] = first_older_only_insn;
12355 return;
12356 }
12357
12358 /* Implement TARGET_SCHED_REORDER. */
12359 static int
12360 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12361 int clock)
12362 {
12363 switch (arm_tune)
12364 {
12365 case TARGET_CPU_cortexa7:
12366 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12367 break;
12368 default:
12369 /* Do nothing for other cores. */
12370 break;
12371 }
12372
12373 return arm_issue_rate ();
12374 }
12375
12376 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12377 It corrects the value of COST based on the relationship between
12378 INSN and DEP through the dependence LINK. It returns the new
12379 value. There is a per-core adjust_cost hook to adjust scheduler costs
12380 and the per-core hook can choose to completely override the generic
12381 adjust_cost function. Only put bits of code into arm_adjust_cost that
12382 are common across all cores. */
12383 static int
12384 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12385 unsigned int)
12386 {
12387 rtx i_pat, d_pat;
12388
12389 /* When generating Thumb-1 code, we want to place flag-setting operations
12390 close to a conditional branch which depends on them, so that we can
12391 omit the comparison. */
12392 if (TARGET_THUMB1
12393 && dep_type == 0
12394 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12395 && recog_memoized (dep) >= 0
12396 && get_attr_conds (dep) == CONDS_SET)
12397 return 0;
12398
12399 if (current_tune->sched_adjust_cost != NULL)
12400 {
12401 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12402 return cost;
12403 }
12404
12405 /* XXX Is this strictly true? */
12406 if (dep_type == REG_DEP_ANTI
12407 || dep_type == REG_DEP_OUTPUT)
12408 return 0;
12409
12410 /* Call insns don't incur a stall, even if they follow a load. */
12411 if (dep_type == 0
12412 && CALL_P (insn))
12413 return 1;
12414
12415 if ((i_pat = single_set (insn)) != NULL
12416 && MEM_P (SET_SRC (i_pat))
12417 && (d_pat = single_set (dep)) != NULL
12418 && MEM_P (SET_DEST (d_pat)))
12419 {
12420 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12421 /* This is a load after a store, there is no conflict if the load reads
12422 from a cached area. Assume that loads from the stack, and from the
12423 constant pool are cached, and that others will miss. This is a
12424 hack. */
12425
12426 if ((GET_CODE (src_mem) == SYMBOL_REF
12427 && CONSTANT_POOL_ADDRESS_P (src_mem))
12428 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12429 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12430 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12431 return 1;
12432 }
12433
12434 return cost;
12435 }
12436
12437 int
12438 arm_max_conditional_execute (void)
12439 {
12440 return max_insns_skipped;
12441 }
12442
12443 static int
12444 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12445 {
12446 if (TARGET_32BIT)
12447 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12448 else
12449 return (optimize > 0) ? 2 : 0;
12450 }
12451
12452 static int
12453 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12454 {
12455 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12456 }
12457
12458 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12459 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12460 sequences of non-executed instructions in IT blocks probably take the same
12461 amount of time as executed instructions (and the IT instruction itself takes
12462 space in icache). This function was experimentally determined to give good
12463 results on a popular embedded benchmark. */
12464
12465 static int
12466 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12467 {
12468 return (TARGET_32BIT && speed_p) ? 1
12469 : arm_default_branch_cost (speed_p, predictable_p);
12470 }
12471
12472 static int
12473 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12474 {
12475 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12476 }
12477
12478 static bool fp_consts_inited = false;
12479
12480 static REAL_VALUE_TYPE value_fp0;
12481
12482 static void
12483 init_fp_table (void)
12484 {
12485 REAL_VALUE_TYPE r;
12486
12487 r = REAL_VALUE_ATOF ("0", DFmode);
12488 value_fp0 = r;
12489 fp_consts_inited = true;
12490 }
12491
12492 /* Return TRUE if rtx X is a valid immediate FP constant. */
12493 int
12494 arm_const_double_rtx (rtx x)
12495 {
12496 const REAL_VALUE_TYPE *r;
12497
12498 if (!fp_consts_inited)
12499 init_fp_table ();
12500
12501 r = CONST_DOUBLE_REAL_VALUE (x);
12502 if (REAL_VALUE_MINUS_ZERO (*r))
12503 return 0;
12504
12505 if (real_equal (r, &value_fp0))
12506 return 1;
12507
12508 return 0;
12509 }
12510
12511 /* VFPv3 has a fairly wide range of representable immediates, formed from
12512 "quarter-precision" floating-point values. These can be evaluated using this
12513 formula (with ^ for exponentiation):
12514
12515 -1^s * n * 2^-r
12516
12517 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12518 16 <= n <= 31 and 0 <= r <= 7.
12519
12520 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12521
12522 - A (most-significant) is the sign bit.
12523 - BCD are the exponent (encoded as r XOR 3).
12524 - EFGH are the mantissa (encoded as n - 16).
12525 */
12526
12527 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12528 fconst[sd] instruction, or -1 if X isn't suitable. */
12529 static int
12530 vfp3_const_double_index (rtx x)
12531 {
12532 REAL_VALUE_TYPE r, m;
12533 int sign, exponent;
12534 unsigned HOST_WIDE_INT mantissa, mant_hi;
12535 unsigned HOST_WIDE_INT mask;
12536 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12537 bool fail;
12538
12539 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12540 return -1;
12541
12542 r = *CONST_DOUBLE_REAL_VALUE (x);
12543
12544 /* We can't represent these things, so detect them first. */
12545 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12546 return -1;
12547
12548 /* Extract sign, exponent and mantissa. */
12549 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12550 r = real_value_abs (&r);
12551 exponent = REAL_EXP (&r);
12552 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12553 highest (sign) bit, with a fixed binary point at bit point_pos.
12554 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12555 bits for the mantissa, this may fail (low bits would be lost). */
12556 real_ldexp (&m, &r, point_pos - exponent);
12557 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12558 mantissa = w.elt (0);
12559 mant_hi = w.elt (1);
12560
12561 /* If there are bits set in the low part of the mantissa, we can't
12562 represent this value. */
12563 if (mantissa != 0)
12564 return -1;
12565
12566 /* Now make it so that mantissa contains the most-significant bits, and move
12567 the point_pos to indicate that the least-significant bits have been
12568 discarded. */
12569 point_pos -= HOST_BITS_PER_WIDE_INT;
12570 mantissa = mant_hi;
12571
12572 /* We can permit four significant bits of mantissa only, plus a high bit
12573 which is always 1. */
12574 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12575 if ((mantissa & mask) != 0)
12576 return -1;
12577
12578 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12579 mantissa >>= point_pos - 5;
12580
12581 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12582 floating-point immediate zero with Neon using an integer-zero load, but
12583 that case is handled elsewhere.) */
12584 if (mantissa == 0)
12585 return -1;
12586
12587 gcc_assert (mantissa >= 16 && mantissa <= 31);
12588
12589 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12590 normalized significands are in the range [1, 2). (Our mantissa is shifted
12591 left 4 places at this point relative to normalized IEEE754 values). GCC
12592 internally uses [0.5, 1) (see real.c), so the exponent returned from
12593 REAL_EXP must be altered. */
12594 exponent = 5 - exponent;
12595
12596 if (exponent < 0 || exponent > 7)
12597 return -1;
12598
12599 /* Sign, mantissa and exponent are now in the correct form to plug into the
12600 formula described in the comment above. */
12601 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12602 }
12603
12604 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12605 int
12606 vfp3_const_double_rtx (rtx x)
12607 {
12608 if (!TARGET_VFP3)
12609 return 0;
12610
12611 return vfp3_const_double_index (x) != -1;
12612 }
12613
12614 /* Recognize immediates which can be used in various Neon and MVE instructions.
12615 Legal immediates are described by the following table (for VMVN variants, the
12616 bitwise inverse of the constant shown is recognized. In either case, VMOV
12617 is output and the correct instruction to use for a given constant is chosen
12618 by the assembler). The constant shown is replicated across all elements of
12619 the destination vector.
12620
12621 insn elems variant constant (binary)
12622 ---- ----- ------- -----------------
12623 vmov i32 0 00000000 00000000 00000000 abcdefgh
12624 vmov i32 1 00000000 00000000 abcdefgh 00000000
12625 vmov i32 2 00000000 abcdefgh 00000000 00000000
12626 vmov i32 3 abcdefgh 00000000 00000000 00000000
12627 vmov i16 4 00000000 abcdefgh
12628 vmov i16 5 abcdefgh 00000000
12629 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12630 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12631 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12632 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12633 vmvn i16 10 00000000 abcdefgh
12634 vmvn i16 11 abcdefgh 00000000
12635 vmov i32 12 00000000 00000000 abcdefgh 11111111
12636 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12637 vmov i32 14 00000000 abcdefgh 11111111 11111111
12638 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12639 vmov i8 16 abcdefgh
12640 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12641 eeeeeeee ffffffff gggggggg hhhhhhhh
12642 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12643 vmov f32 19 00000000 00000000 00000000 00000000
12644
12645 For case 18, B = !b. Representable values are exactly those accepted by
12646 vfp3_const_double_index, but are output as floating-point numbers rather
12647 than indices.
12648
12649 For case 19, we will change it to vmov.i32 when assembling.
12650
12651 Variants 0-5 (inclusive) may also be used as immediates for the second
12652 operand of VORR/VBIC instructions.
12653
12654 The INVERSE argument causes the bitwise inverse of the given operand to be
12655 recognized instead (used for recognizing legal immediates for the VAND/VORN
12656 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12657 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12658 output, rather than the real insns vbic/vorr).
12659
12660 INVERSE makes no difference to the recognition of float vectors.
12661
12662 The return value is the variant of immediate as shown in the above table, or
12663 -1 if the given value doesn't match any of the listed patterns.
12664 */
12665 static int
12666 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12667 rtx *modconst, int *elementwidth)
12668 {
12669 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12670 matches = 1; \
12671 for (i = 0; i < idx; i += (STRIDE)) \
12672 if (!(TEST)) \
12673 matches = 0; \
12674 if (matches) \
12675 { \
12676 immtype = (CLASS); \
12677 elsize = (ELSIZE); \
12678 break; \
12679 }
12680
12681 unsigned int i, elsize = 0, idx = 0, n_elts;
12682 unsigned int innersize;
12683 unsigned char bytes[16] = {};
12684 int immtype = -1, matches;
12685 unsigned int invmask = inverse ? 0xff : 0;
12686 bool vector = GET_CODE (op) == CONST_VECTOR;
12687
12688 if (vector)
12689 n_elts = CONST_VECTOR_NUNITS (op);
12690 else
12691 {
12692 n_elts = 1;
12693 gcc_assert (mode != VOIDmode);
12694 }
12695
12696 innersize = GET_MODE_UNIT_SIZE (mode);
12697
12698 /* Only support 128-bit vectors for MVE. */
12699 if (TARGET_HAVE_MVE && (!vector || n_elts * innersize != 16))
12700 return -1;
12701
12702 /* Vectors of float constants. */
12703 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12704 {
12705 rtx el0 = CONST_VECTOR_ELT (op, 0);
12706
12707 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12708 return -1;
12709
12710 /* FP16 vectors cannot be represented. */
12711 if (GET_MODE_INNER (mode) == HFmode)
12712 return -1;
12713
12714 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12715 are distinct in this context. */
12716 if (!const_vec_duplicate_p (op))
12717 return -1;
12718
12719 if (modconst)
12720 *modconst = CONST_VECTOR_ELT (op, 0);
12721
12722 if (elementwidth)
12723 *elementwidth = 0;
12724
12725 if (el0 == CONST0_RTX (GET_MODE (el0)))
12726 return 19;
12727 else
12728 return 18;
12729 }
12730
12731 /* The tricks done in the code below apply for little-endian vector layout.
12732 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12733 FIXME: Implement logic for big-endian vectors. */
12734 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12735 return -1;
12736
12737 /* Splat vector constant out into a byte vector. */
12738 for (i = 0; i < n_elts; i++)
12739 {
12740 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12741 unsigned HOST_WIDE_INT elpart;
12742
12743 gcc_assert (CONST_INT_P (el));
12744 elpart = INTVAL (el);
12745
12746 for (unsigned int byte = 0; byte < innersize; byte++)
12747 {
12748 bytes[idx++] = (elpart & 0xff) ^ invmask;
12749 elpart >>= BITS_PER_UNIT;
12750 }
12751 }
12752
12753 /* Sanity check. */
12754 gcc_assert (idx == GET_MODE_SIZE (mode));
12755
12756 do
12757 {
12758 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12759 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12760
12761 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12762 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12763
12764 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12765 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12766
12767 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12768 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12769
12770 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12771
12772 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12773
12774 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12775 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12776
12777 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12778 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12779
12780 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12781 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12782
12783 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12784 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12785
12786 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12787
12788 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12789
12790 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12791 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12792
12793 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12794 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12795
12796 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12797 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12798
12799 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12800 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12801
12802 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12803
12804 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12805 && bytes[i] == bytes[(i + 8) % idx]);
12806 }
12807 while (0);
12808
12809 if (immtype == -1)
12810 return -1;
12811
12812 if (elementwidth)
12813 *elementwidth = elsize;
12814
12815 if (modconst)
12816 {
12817 unsigned HOST_WIDE_INT imm = 0;
12818
12819 /* Un-invert bytes of recognized vector, if necessary. */
12820 if (invmask != 0)
12821 for (i = 0; i < idx; i++)
12822 bytes[i] ^= invmask;
12823
12824 if (immtype == 17)
12825 {
12826 /* FIXME: Broken on 32-bit H_W_I hosts. */
12827 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12828
12829 for (i = 0; i < 8; i++)
12830 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12831 << (i * BITS_PER_UNIT);
12832
12833 *modconst = GEN_INT (imm);
12834 }
12835 else
12836 {
12837 unsigned HOST_WIDE_INT imm = 0;
12838
12839 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12840 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12841
12842 *modconst = GEN_INT (imm);
12843 }
12844 }
12845
12846 return immtype;
12847 #undef CHECK
12848 }
12849
12850 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
12851 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
12852 (or zero for float elements), and a modified constant (whatever should be
12853 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
12854 modified to "simd_immediate_valid_for_move" as this function will be used
12855 both by neon and mve. */
12856 int
12857 simd_immediate_valid_for_move (rtx op, machine_mode mode,
12858 rtx *modconst, int *elementwidth)
12859 {
12860 rtx tmpconst;
12861 int tmpwidth;
12862 int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12863
12864 if (retval == -1)
12865 return 0;
12866
12867 if (modconst)
12868 *modconst = tmpconst;
12869
12870 if (elementwidth)
12871 *elementwidth = tmpwidth;
12872
12873 return 1;
12874 }
12875
12876 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12877 the immediate is valid, write a constant suitable for using as an operand
12878 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12879 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
12880
12881 int
12882 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12883 rtx *modconst, int *elementwidth)
12884 {
12885 rtx tmpconst;
12886 int tmpwidth;
12887 int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12888
12889 if (retval < 0 || retval > 5)
12890 return 0;
12891
12892 if (modconst)
12893 *modconst = tmpconst;
12894
12895 if (elementwidth)
12896 *elementwidth = tmpwidth;
12897
12898 return 1;
12899 }
12900
12901 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12902 the immediate is valid, write a constant suitable for using as an operand
12903 to VSHR/VSHL to *MODCONST and the corresponding element width to
12904 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12905 because they have different limitations. */
12906
12907 int
12908 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12909 rtx *modconst, int *elementwidth,
12910 bool isleftshift)
12911 {
12912 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12913 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12914 unsigned HOST_WIDE_INT last_elt = 0;
12915 unsigned HOST_WIDE_INT maxshift;
12916
12917 /* Split vector constant out into a byte vector. */
12918 for (i = 0; i < n_elts; i++)
12919 {
12920 rtx el = CONST_VECTOR_ELT (op, i);
12921 unsigned HOST_WIDE_INT elpart;
12922
12923 if (CONST_INT_P (el))
12924 elpart = INTVAL (el);
12925 else if (CONST_DOUBLE_P (el))
12926 return 0;
12927 else
12928 gcc_unreachable ();
12929
12930 if (i != 0 && elpart != last_elt)
12931 return 0;
12932
12933 last_elt = elpart;
12934 }
12935
12936 /* Shift less than element size. */
12937 maxshift = innersize * 8;
12938
12939 if (isleftshift)
12940 {
12941 /* Left shift immediate value can be from 0 to <size>-1. */
12942 if (last_elt >= maxshift)
12943 return 0;
12944 }
12945 else
12946 {
12947 /* Right shift immediate value can be from 1 to <size>. */
12948 if (last_elt == 0 || last_elt > maxshift)
12949 return 0;
12950 }
12951
12952 if (elementwidth)
12953 *elementwidth = innersize * 8;
12954
12955 if (modconst)
12956 *modconst = CONST_VECTOR_ELT (op, 0);
12957
12958 return 1;
12959 }
12960
12961 /* Return a string suitable for output of Neon immediate logic operation
12962 MNEM. */
12963
12964 char *
12965 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12966 int inverse, int quad)
12967 {
12968 int width, is_valid;
12969 static char templ[40];
12970
12971 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12972
12973 gcc_assert (is_valid != 0);
12974
12975 if (quad)
12976 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12977 else
12978 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12979
12980 return templ;
12981 }
12982
12983 /* Return a string suitable for output of Neon immediate shift operation
12984 (VSHR or VSHL) MNEM. */
12985
12986 char *
12987 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12988 machine_mode mode, int quad,
12989 bool isleftshift)
12990 {
12991 int width, is_valid;
12992 static char templ[40];
12993
12994 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12995 gcc_assert (is_valid != 0);
12996
12997 if (quad)
12998 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12999 else
13000 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13001
13002 return templ;
13003 }
13004
13005 /* Output a sequence of pairwise operations to implement a reduction.
13006 NOTE: We do "too much work" here, because pairwise operations work on two
13007 registers-worth of operands in one go. Unfortunately we can't exploit those
13008 extra calculations to do the full operation in fewer steps, I don't think.
13009 Although all vector elements of the result but the first are ignored, we
13010 actually calculate the same result in each of the elements. An alternative
13011 such as initially loading a vector with zero to use as each of the second
13012 operands would use up an additional register and take an extra instruction,
13013 for no particular gain. */
13014
13015 void
13016 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13017 rtx (*reduc) (rtx, rtx, rtx))
13018 {
13019 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13020 rtx tmpsum = op1;
13021
13022 for (i = parts / 2; i >= 1; i /= 2)
13023 {
13024 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13025 emit_insn (reduc (dest, tmpsum, tmpsum));
13026 tmpsum = dest;
13027 }
13028 }
13029
13030 /* If VALS is a vector constant that can be loaded into a register
13031 using VDUP, generate instructions to do so and return an RTX to
13032 assign to the register. Otherwise return NULL_RTX. */
13033
13034 static rtx
13035 neon_vdup_constant (rtx vals)
13036 {
13037 machine_mode mode = GET_MODE (vals);
13038 machine_mode inner_mode = GET_MODE_INNER (mode);
13039 rtx x;
13040
13041 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13042 return NULL_RTX;
13043
13044 if (!const_vec_duplicate_p (vals, &x))
13045 /* The elements are not all the same. We could handle repeating
13046 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13047 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13048 vdup.i16). */
13049 return NULL_RTX;
13050
13051 /* We can load this constant by using VDUP and a constant in a
13052 single ARM register. This will be cheaper than a vector
13053 load. */
13054
13055 x = copy_to_mode_reg (inner_mode, x);
13056 return gen_vec_duplicate (mode, x);
13057 }
13058
13059 /* Generate code to load VALS, which is a PARALLEL containing only
13060 constants (for vec_init) or CONST_VECTOR, efficiently into a
13061 register. Returns an RTX to copy into the register, or NULL_RTX
13062 for a PARALLEL that cannot be converted into a CONST_VECTOR. */
13063
13064 rtx
13065 neon_make_constant (rtx vals)
13066 {
13067 machine_mode mode = GET_MODE (vals);
13068 rtx target;
13069 rtx const_vec = NULL_RTX;
13070 int n_elts = GET_MODE_NUNITS (mode);
13071 int n_const = 0;
13072 int i;
13073
13074 if (GET_CODE (vals) == CONST_VECTOR)
13075 const_vec = vals;
13076 else if (GET_CODE (vals) == PARALLEL)
13077 {
13078 /* A CONST_VECTOR must contain only CONST_INTs and
13079 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13080 Only store valid constants in a CONST_VECTOR. */
13081 for (i = 0; i < n_elts; ++i)
13082 {
13083 rtx x = XVECEXP (vals, 0, i);
13084 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13085 n_const++;
13086 }
13087 if (n_const == n_elts)
13088 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13089 }
13090 else
13091 gcc_unreachable ();
13092
13093 if (const_vec != NULL
13094 && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13095 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13096 return const_vec;
13097 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
13098 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13099 pipeline cycle; creating the constant takes one or two ARM
13100 pipeline cycles. */
13101 return target;
13102 else if (const_vec != NULL_RTX)
13103 /* Load from constant pool. On Cortex-A8 this takes two cycles
13104 (for either double or quad vectors). We cannot take advantage
13105 of single-cycle VLD1 because we need a PC-relative addressing
13106 mode. */
13107 return const_vec;
13108 else
13109 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13110 We cannot construct an initializer. */
13111 return NULL_RTX;
13112 }
13113
13114 /* Initialize vector TARGET to VALS. */
13115
13116 void
13117 neon_expand_vector_init (rtx target, rtx vals)
13118 {
13119 machine_mode mode = GET_MODE (target);
13120 machine_mode inner_mode = GET_MODE_INNER (mode);
13121 int n_elts = GET_MODE_NUNITS (mode);
13122 int n_var = 0, one_var = -1;
13123 bool all_same = true;
13124 rtx x, mem;
13125 int i;
13126
13127 for (i = 0; i < n_elts; ++i)
13128 {
13129 x = XVECEXP (vals, 0, i);
13130 if (!CONSTANT_P (x))
13131 ++n_var, one_var = i;
13132
13133 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13134 all_same = false;
13135 }
13136
13137 if (n_var == 0)
13138 {
13139 rtx constant = neon_make_constant (vals);
13140 if (constant != NULL_RTX)
13141 {
13142 emit_move_insn (target, constant);
13143 return;
13144 }
13145 }
13146
13147 /* Splat a single non-constant element if we can. */
13148 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13149 {
13150 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13151 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13152 return;
13153 }
13154
13155 /* One field is non-constant. Load constant then overwrite varying
13156 field. This is more efficient than using the stack. */
13157 if (n_var == 1)
13158 {
13159 rtx copy = copy_rtx (vals);
13160 rtx merge_mask = GEN_INT (1 << one_var);
13161
13162 /* Load constant part of vector, substitute neighboring value for
13163 varying element. */
13164 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13165 neon_expand_vector_init (target, copy);
13166
13167 /* Insert variable. */
13168 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13169 emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13170 return;
13171 }
13172
13173 /* Construct the vector in memory one field at a time
13174 and load the whole vector. */
13175 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13176 for (i = 0; i < n_elts; i++)
13177 emit_move_insn (adjust_address_nv (mem, inner_mode,
13178 i * GET_MODE_SIZE (inner_mode)),
13179 XVECEXP (vals, 0, i));
13180 emit_move_insn (target, mem);
13181 }
13182
13183 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13184 ERR if it doesn't. EXP indicates the source location, which includes the
13185 inlining history for intrinsics. */
13186
13187 static void
13188 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13189 const_tree exp, const char *desc)
13190 {
13191 HOST_WIDE_INT lane;
13192
13193 gcc_assert (CONST_INT_P (operand));
13194
13195 lane = INTVAL (operand);
13196
13197 if (lane < low || lane >= high)
13198 {
13199 if (exp)
13200 error ("%K%s %wd out of range %wd - %wd",
13201 exp, desc, lane, low, high - 1);
13202 else
13203 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13204 }
13205 }
13206
13207 /* Bounds-check lanes. */
13208
13209 void
13210 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13211 const_tree exp)
13212 {
13213 bounds_check (operand, low, high, exp, "lane");
13214 }
13215
13216 /* Bounds-check constants. */
13217
13218 void
13219 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13220 {
13221 bounds_check (operand, low, high, NULL_TREE, "constant");
13222 }
13223
13224 HOST_WIDE_INT
13225 neon_element_bits (machine_mode mode)
13226 {
13227 return GET_MODE_UNIT_BITSIZE (mode);
13228 }
13229
13230 \f
13231 /* Predicates for `match_operand' and `match_operator'. */
13232
13233 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13234 WB level is 2 if full writeback address modes are allowed, 1
13235 if limited writeback address modes (POST_INC and PRE_DEC) are
13236 allowed and 0 if no writeback at all is supported. */
13237
13238 int
13239 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13240 {
13241 gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13242 rtx ind;
13243
13244 /* Reject eliminable registers. */
13245 if (! (reload_in_progress || reload_completed || lra_in_progress)
13246 && ( reg_mentioned_p (frame_pointer_rtx, op)
13247 || reg_mentioned_p (arg_pointer_rtx, op)
13248 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13249 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13250 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13251 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13252 return FALSE;
13253
13254 /* Constants are converted into offsets from labels. */
13255 if (!MEM_P (op))
13256 return FALSE;
13257
13258 ind = XEXP (op, 0);
13259
13260 if (reload_completed
13261 && (GET_CODE (ind) == LABEL_REF
13262 || (GET_CODE (ind) == CONST
13263 && GET_CODE (XEXP (ind, 0)) == PLUS
13264 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13265 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13266 return TRUE;
13267
13268 /* Match: (mem (reg)). */
13269 if (REG_P (ind))
13270 return arm_address_register_rtx_p (ind, 0);
13271
13272 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13273 acceptable in any case (subject to verification by
13274 arm_address_register_rtx_p). We need full writeback to accept
13275 PRE_INC and POST_DEC, and at least restricted writeback for
13276 PRE_INC and POST_DEC. */
13277 if (wb_level > 0
13278 && (GET_CODE (ind) == POST_INC
13279 || GET_CODE (ind) == PRE_DEC
13280 || (wb_level > 1
13281 && (GET_CODE (ind) == PRE_INC
13282 || GET_CODE (ind) == POST_DEC))))
13283 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13284
13285 if (wb_level > 1
13286 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13287 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13288 && GET_CODE (XEXP (ind, 1)) == PLUS
13289 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13290 ind = XEXP (ind, 1);
13291
13292 /* Match:
13293 (plus (reg)
13294 (const))
13295
13296 The encoded immediate for 16-bit modes is multiplied by 2,
13297 while the encoded immediate for 32-bit and 64-bit modes is
13298 multiplied by 4. */
13299 int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13300 if (GET_CODE (ind) == PLUS
13301 && REG_P (XEXP (ind, 0))
13302 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13303 && CONST_INT_P (XEXP (ind, 1))
13304 && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13305 && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13306 return TRUE;
13307
13308 return FALSE;
13309 }
13310
13311 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13312 WB is true if full writeback address modes are allowed and is false
13313 if limited writeback address modes (POST_INC and PRE_DEC) are
13314 allowed. */
13315
13316 int arm_coproc_mem_operand (rtx op, bool wb)
13317 {
13318 return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13319 }
13320
13321 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13322 context in which no writeback address modes are allowed. */
13323
13324 int
13325 arm_coproc_mem_operand_no_writeback (rtx op)
13326 {
13327 return arm_coproc_mem_operand_wb (op, 0);
13328 }
13329
13330 /* This function returns TRUE on matching mode and op.
13331 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13332 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13333 int
13334 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13335 {
13336 enum rtx_code code;
13337 int val, reg_no;
13338
13339 /* Match: (mem (reg)). */
13340 if (REG_P (op))
13341 {
13342 int reg_no = REGNO (op);
13343 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13344 ? reg_no <= LAST_LO_REGNUM
13345 :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13346 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13347 }
13348 code = GET_CODE (op);
13349
13350 if (code == POST_INC || code == PRE_DEC
13351 || code == PRE_INC || code == POST_DEC)
13352 {
13353 reg_no = REGNO (XEXP (op, 0));
13354 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13355 ? reg_no <= LAST_LO_REGNUM
13356 :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13357 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13358 }
13359 else if ((code == POST_MODIFY || code == PRE_MODIFY)
13360 && GET_CODE (XEXP (op, 1)) == PLUS && REG_P (XEXP (XEXP (op, 1), 1)))
13361 {
13362 reg_no = REGNO (XEXP (op, 0));
13363 val = INTVAL (XEXP ( XEXP (op, 1), 1));
13364 switch (mode)
13365 {
13366 case E_V16QImode:
13367 if (abs (val) <= 127)
13368 return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13369 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13370 return FALSE;
13371 case E_V8HImode:
13372 case E_V8HFmode:
13373 if (abs (val) <= 255)
13374 return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13375 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13376 return FALSE;
13377 case E_V8QImode:
13378 case E_V4QImode:
13379 if (abs (val) <= 127)
13380 return (reg_no <= LAST_LO_REGNUM
13381 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13382 return FALSE;
13383 case E_V4HImode:
13384 case E_V4HFmode:
13385 if (val % 2 == 0 && abs (val) <= 254)
13386 return (reg_no <= LAST_LO_REGNUM
13387 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13388 return FALSE;
13389 case E_V4SImode:
13390 case E_V4SFmode:
13391 if (val % 4 == 0 && abs (val) <= 508)
13392 return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13393 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13394 return FALSE;
13395 case E_V2DImode:
13396 case E_V2DFmode:
13397 case E_TImode:
13398 if (val % 4 == 0 && val >= 0 && val <= 1020)
13399 return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13400 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13401 return FALSE;
13402 default:
13403 return FALSE;
13404 }
13405 }
13406 return FALSE;
13407 }
13408
13409 /* Return TRUE if OP is a memory operand which we can load or store a vector
13410 to/from. TYPE is one of the following values:
13411 0 - Vector load/stor (vldr)
13412 1 - Core registers (ldm)
13413 2 - Element/structure loads (vld1)
13414 */
13415 int
13416 neon_vector_mem_operand (rtx op, int type, bool strict)
13417 {
13418 rtx ind;
13419
13420 /* Reject eliminable registers. */
13421 if (strict && ! (reload_in_progress || reload_completed)
13422 && (reg_mentioned_p (frame_pointer_rtx, op)
13423 || reg_mentioned_p (arg_pointer_rtx, op)
13424 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13425 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13426 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13427 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13428 return FALSE;
13429
13430 /* Constants are converted into offsets from labels. */
13431 if (!MEM_P (op))
13432 return FALSE;
13433
13434 ind = XEXP (op, 0);
13435
13436 if (reload_completed
13437 && (GET_CODE (ind) == LABEL_REF
13438 || (GET_CODE (ind) == CONST
13439 && GET_CODE (XEXP (ind, 0)) == PLUS
13440 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13441 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13442 return TRUE;
13443
13444 /* Match: (mem (reg)). */
13445 if (REG_P (ind))
13446 return arm_address_register_rtx_p (ind, 0);
13447
13448 /* Allow post-increment with Neon registers. */
13449 if ((type != 1 && GET_CODE (ind) == POST_INC)
13450 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13451 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13452
13453 /* Allow post-increment by register for VLDn */
13454 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13455 && GET_CODE (XEXP (ind, 1)) == PLUS
13456 && REG_P (XEXP (XEXP (ind, 1), 1)))
13457 return true;
13458
13459 /* Match:
13460 (plus (reg)
13461 (const)). */
13462 if (type == 0
13463 && GET_CODE (ind) == PLUS
13464 && REG_P (XEXP (ind, 0))
13465 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13466 && CONST_INT_P (XEXP (ind, 1))
13467 && INTVAL (XEXP (ind, 1)) > -1024
13468 /* For quad modes, we restrict the constant offset to be slightly less
13469 than what the instruction format permits. We have no such constraint
13470 on double mode offsets. (This must match arm_legitimate_index_p.) */
13471 && (INTVAL (XEXP (ind, 1))
13472 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13473 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13474 return TRUE;
13475
13476 return FALSE;
13477 }
13478
13479 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13480 type. */
13481 int
13482 neon_struct_mem_operand (rtx op)
13483 {
13484 rtx ind;
13485
13486 /* Reject eliminable registers. */
13487 if (! (reload_in_progress || reload_completed)
13488 && ( reg_mentioned_p (frame_pointer_rtx, op)
13489 || reg_mentioned_p (arg_pointer_rtx, op)
13490 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13491 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13492 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13493 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13494 return FALSE;
13495
13496 /* Constants are converted into offsets from labels. */
13497 if (!MEM_P (op))
13498 return FALSE;
13499
13500 ind = XEXP (op, 0);
13501
13502 if (reload_completed
13503 && (GET_CODE (ind) == LABEL_REF
13504 || (GET_CODE (ind) == CONST
13505 && GET_CODE (XEXP (ind, 0)) == PLUS
13506 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13507 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13508 return TRUE;
13509
13510 /* Match: (mem (reg)). */
13511 if (REG_P (ind))
13512 return arm_address_register_rtx_p (ind, 0);
13513
13514 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13515 if (GET_CODE (ind) == POST_INC
13516 || GET_CODE (ind) == PRE_DEC)
13517 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13518
13519 return FALSE;
13520 }
13521
13522 /* Prepares the operands for the VCMLA by lane instruction such that the right
13523 register number is selected. This instruction is special in that it always
13524 requires a D register, however there is a choice to be made between Dn[0],
13525 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13526
13527 The VCMLA by lane function always selects two values. For instance given D0
13528 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13529 used by the instruction. However given V4SF then index 0 and 1 are valid as
13530 D0[0] or D1[0] are both valid.
13531
13532 This function centralizes that information based on OPERANDS, OPERANDS[3]
13533 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13534 updated to contain the right index. */
13535
13536 rtx *
13537 neon_vcmla_lane_prepare_operands (rtx *operands)
13538 {
13539 int lane = INTVAL (operands[4]);
13540 machine_mode constmode = SImode;
13541 machine_mode mode = GET_MODE (operands[3]);
13542 int regno = REGNO (operands[3]);
13543 regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13544 if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13545 {
13546 operands[3] = gen_int_mode (regno + 1, constmode);
13547 operands[4]
13548 = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13549 }
13550 else
13551 {
13552 operands[3] = gen_int_mode (regno, constmode);
13553 operands[4] = gen_int_mode (lane, constmode);
13554 }
13555 return operands;
13556 }
13557
13558
13559 /* Return true if X is a register that will be eliminated later on. */
13560 int
13561 arm_eliminable_register (rtx x)
13562 {
13563 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13564 || REGNO (x) == ARG_POINTER_REGNUM
13565 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13566 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13567 }
13568
13569 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13570 coprocessor registers. Otherwise return NO_REGS. */
13571
13572 enum reg_class
13573 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13574 {
13575 if (mode == HFmode)
13576 {
13577 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13578 return GENERAL_REGS;
13579 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13580 return NO_REGS;
13581 return GENERAL_REGS;
13582 }
13583
13584 /* The neon move patterns handle all legitimate vector and struct
13585 addresses. */
13586 if (TARGET_NEON
13587 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13588 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13589 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13590 || VALID_NEON_STRUCT_MODE (mode)))
13591 return NO_REGS;
13592
13593 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13594 return NO_REGS;
13595
13596 return GENERAL_REGS;
13597 }
13598
13599 /* Values which must be returned in the most-significant end of the return
13600 register. */
13601
13602 static bool
13603 arm_return_in_msb (const_tree valtype)
13604 {
13605 return (TARGET_AAPCS_BASED
13606 && BYTES_BIG_ENDIAN
13607 && (AGGREGATE_TYPE_P (valtype)
13608 || TREE_CODE (valtype) == COMPLEX_TYPE
13609 || FIXED_POINT_TYPE_P (valtype)));
13610 }
13611
13612 /* Return TRUE if X references a SYMBOL_REF. */
13613 int
13614 symbol_mentioned_p (rtx x)
13615 {
13616 const char * fmt;
13617 int i;
13618
13619 if (GET_CODE (x) == SYMBOL_REF)
13620 return 1;
13621
13622 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13623 are constant offsets, not symbols. */
13624 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13625 return 0;
13626
13627 fmt = GET_RTX_FORMAT (GET_CODE (x));
13628
13629 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13630 {
13631 if (fmt[i] == 'E')
13632 {
13633 int j;
13634
13635 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13636 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13637 return 1;
13638 }
13639 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13640 return 1;
13641 }
13642
13643 return 0;
13644 }
13645
13646 /* Return TRUE if X references a LABEL_REF. */
13647 int
13648 label_mentioned_p (rtx x)
13649 {
13650 const char * fmt;
13651 int i;
13652
13653 if (GET_CODE (x) == LABEL_REF)
13654 return 1;
13655
13656 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13657 instruction, but they are constant offsets, not symbols. */
13658 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13659 return 0;
13660
13661 fmt = GET_RTX_FORMAT (GET_CODE (x));
13662 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13663 {
13664 if (fmt[i] == 'E')
13665 {
13666 int j;
13667
13668 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13669 if (label_mentioned_p (XVECEXP (x, i, j)))
13670 return 1;
13671 }
13672 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13673 return 1;
13674 }
13675
13676 return 0;
13677 }
13678
13679 int
13680 tls_mentioned_p (rtx x)
13681 {
13682 switch (GET_CODE (x))
13683 {
13684 case CONST:
13685 return tls_mentioned_p (XEXP (x, 0));
13686
13687 case UNSPEC:
13688 if (XINT (x, 1) == UNSPEC_TLS)
13689 return 1;
13690
13691 /* Fall through. */
13692 default:
13693 return 0;
13694 }
13695 }
13696
13697 /* Must not copy any rtx that uses a pc-relative address.
13698 Also, disallow copying of load-exclusive instructions that
13699 may appear after splitting of compare-and-swap-style operations
13700 so as to prevent those loops from being transformed away from their
13701 canonical forms (see PR 69904). */
13702
13703 static bool
13704 arm_cannot_copy_insn_p (rtx_insn *insn)
13705 {
13706 /* The tls call insn cannot be copied, as it is paired with a data
13707 word. */
13708 if (recog_memoized (insn) == CODE_FOR_tlscall)
13709 return true;
13710
13711 subrtx_iterator::array_type array;
13712 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13713 {
13714 const_rtx x = *iter;
13715 if (GET_CODE (x) == UNSPEC
13716 && (XINT (x, 1) == UNSPEC_PIC_BASE
13717 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13718 return true;
13719 }
13720
13721 rtx set = single_set (insn);
13722 if (set)
13723 {
13724 rtx src = SET_SRC (set);
13725 if (GET_CODE (src) == ZERO_EXTEND)
13726 src = XEXP (src, 0);
13727
13728 /* Catch the load-exclusive and load-acquire operations. */
13729 if (GET_CODE (src) == UNSPEC_VOLATILE
13730 && (XINT (src, 1) == VUNSPEC_LL
13731 || XINT (src, 1) == VUNSPEC_LAX))
13732 return true;
13733 }
13734 return false;
13735 }
13736
13737 enum rtx_code
13738 minmax_code (rtx x)
13739 {
13740 enum rtx_code code = GET_CODE (x);
13741
13742 switch (code)
13743 {
13744 case SMAX:
13745 return GE;
13746 case SMIN:
13747 return LE;
13748 case UMIN:
13749 return LEU;
13750 case UMAX:
13751 return GEU;
13752 default:
13753 gcc_unreachable ();
13754 }
13755 }
13756
13757 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13758
13759 bool
13760 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13761 int *mask, bool *signed_sat)
13762 {
13763 /* The high bound must be a power of two minus one. */
13764 int log = exact_log2 (INTVAL (hi_bound) + 1);
13765 if (log == -1)
13766 return false;
13767
13768 /* The low bound is either zero (for usat) or one less than the
13769 negation of the high bound (for ssat). */
13770 if (INTVAL (lo_bound) == 0)
13771 {
13772 if (mask)
13773 *mask = log;
13774 if (signed_sat)
13775 *signed_sat = false;
13776
13777 return true;
13778 }
13779
13780 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13781 {
13782 if (mask)
13783 *mask = log + 1;
13784 if (signed_sat)
13785 *signed_sat = true;
13786
13787 return true;
13788 }
13789
13790 return false;
13791 }
13792
13793 /* Return 1 if memory locations are adjacent. */
13794 int
13795 adjacent_mem_locations (rtx a, rtx b)
13796 {
13797 /* We don't guarantee to preserve the order of these memory refs. */
13798 if (volatile_refs_p (a) || volatile_refs_p (b))
13799 return 0;
13800
13801 if ((REG_P (XEXP (a, 0))
13802 || (GET_CODE (XEXP (a, 0)) == PLUS
13803 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13804 && (REG_P (XEXP (b, 0))
13805 || (GET_CODE (XEXP (b, 0)) == PLUS
13806 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13807 {
13808 HOST_WIDE_INT val0 = 0, val1 = 0;
13809 rtx reg0, reg1;
13810 int val_diff;
13811
13812 if (GET_CODE (XEXP (a, 0)) == PLUS)
13813 {
13814 reg0 = XEXP (XEXP (a, 0), 0);
13815 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13816 }
13817 else
13818 reg0 = XEXP (a, 0);
13819
13820 if (GET_CODE (XEXP (b, 0)) == PLUS)
13821 {
13822 reg1 = XEXP (XEXP (b, 0), 0);
13823 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13824 }
13825 else
13826 reg1 = XEXP (b, 0);
13827
13828 /* Don't accept any offset that will require multiple
13829 instructions to handle, since this would cause the
13830 arith_adjacentmem pattern to output an overlong sequence. */
13831 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13832 return 0;
13833
13834 /* Don't allow an eliminable register: register elimination can make
13835 the offset too large. */
13836 if (arm_eliminable_register (reg0))
13837 return 0;
13838
13839 val_diff = val1 - val0;
13840
13841 if (arm_ld_sched)
13842 {
13843 /* If the target has load delay slots, then there's no benefit
13844 to using an ldm instruction unless the offset is zero and
13845 we are optimizing for size. */
13846 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13847 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13848 && (val_diff == 4 || val_diff == -4));
13849 }
13850
13851 return ((REGNO (reg0) == REGNO (reg1))
13852 && (val_diff == 4 || val_diff == -4));
13853 }
13854
13855 return 0;
13856 }
13857
13858 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13859 for load operations, false for store operations. CONSECUTIVE is true
13860 if the register numbers in the operation must be consecutive in the register
13861 bank. RETURN_PC is true if value is to be loaded in PC.
13862 The pattern we are trying to match for load is:
13863 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13864 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13865 :
13866 :
13867 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13868 ]
13869 where
13870 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13871 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13872 3. If consecutive is TRUE, then for kth register being loaded,
13873 REGNO (R_dk) = REGNO (R_d0) + k.
13874 The pattern for store is similar. */
13875 bool
13876 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13877 bool consecutive, bool return_pc)
13878 {
13879 HOST_WIDE_INT count = XVECLEN (op, 0);
13880 rtx reg, mem, addr;
13881 unsigned regno;
13882 unsigned first_regno;
13883 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13884 rtx elt;
13885 bool addr_reg_in_reglist = false;
13886 bool update = false;
13887 int reg_increment;
13888 int offset_adj;
13889 int regs_per_val;
13890
13891 /* If not in SImode, then registers must be consecutive
13892 (e.g., VLDM instructions for DFmode). */
13893 gcc_assert ((mode == SImode) || consecutive);
13894 /* Setting return_pc for stores is illegal. */
13895 gcc_assert (!return_pc || load);
13896
13897 /* Set up the increments and the regs per val based on the mode. */
13898 reg_increment = GET_MODE_SIZE (mode);
13899 regs_per_val = reg_increment / 4;
13900 offset_adj = return_pc ? 1 : 0;
13901
13902 if (count <= 1
13903 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13904 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13905 return false;
13906
13907 /* Check if this is a write-back. */
13908 elt = XVECEXP (op, 0, offset_adj);
13909 if (GET_CODE (SET_SRC (elt)) == PLUS)
13910 {
13911 i++;
13912 base = 1;
13913 update = true;
13914
13915 /* The offset adjustment must be the number of registers being
13916 popped times the size of a single register. */
13917 if (!REG_P (SET_DEST (elt))
13918 || !REG_P (XEXP (SET_SRC (elt), 0))
13919 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13920 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13921 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13922 ((count - 1 - offset_adj) * reg_increment))
13923 return false;
13924 }
13925
13926 i = i + offset_adj;
13927 base = base + offset_adj;
13928 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13929 success depends on the type: VLDM can do just one reg,
13930 LDM must do at least two. */
13931 if ((count <= i) && (mode == SImode))
13932 return false;
13933
13934 elt = XVECEXP (op, 0, i - 1);
13935 if (GET_CODE (elt) != SET)
13936 return false;
13937
13938 if (load)
13939 {
13940 reg = SET_DEST (elt);
13941 mem = SET_SRC (elt);
13942 }
13943 else
13944 {
13945 reg = SET_SRC (elt);
13946 mem = SET_DEST (elt);
13947 }
13948
13949 if (!REG_P (reg) || !MEM_P (mem))
13950 return false;
13951
13952 regno = REGNO (reg);
13953 first_regno = regno;
13954 addr = XEXP (mem, 0);
13955 if (GET_CODE (addr) == PLUS)
13956 {
13957 if (!CONST_INT_P (XEXP (addr, 1)))
13958 return false;
13959
13960 offset = INTVAL (XEXP (addr, 1));
13961 addr = XEXP (addr, 0);
13962 }
13963
13964 if (!REG_P (addr))
13965 return false;
13966
13967 /* Don't allow SP to be loaded unless it is also the base register. It
13968 guarantees that SP is reset correctly when an LDM instruction
13969 is interrupted. Otherwise, we might end up with a corrupt stack. */
13970 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13971 return false;
13972
13973 if (regno == REGNO (addr))
13974 addr_reg_in_reglist = true;
13975
13976 for (; i < count; i++)
13977 {
13978 elt = XVECEXP (op, 0, i);
13979 if (GET_CODE (elt) != SET)
13980 return false;
13981
13982 if (load)
13983 {
13984 reg = SET_DEST (elt);
13985 mem = SET_SRC (elt);
13986 }
13987 else
13988 {
13989 reg = SET_SRC (elt);
13990 mem = SET_DEST (elt);
13991 }
13992
13993 if (!REG_P (reg)
13994 || GET_MODE (reg) != mode
13995 || REGNO (reg) <= regno
13996 || (consecutive
13997 && (REGNO (reg) !=
13998 (unsigned int) (first_regno + regs_per_val * (i - base))))
13999 /* Don't allow SP to be loaded unless it is also the base register. It
14000 guarantees that SP is reset correctly when an LDM instruction
14001 is interrupted. Otherwise, we might end up with a corrupt stack. */
14002 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14003 || !MEM_P (mem)
14004 || GET_MODE (mem) != mode
14005 || ((GET_CODE (XEXP (mem, 0)) != PLUS
14006 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14007 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14008 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14009 offset + (i - base) * reg_increment))
14010 && (!REG_P (XEXP (mem, 0))
14011 || offset + (i - base) * reg_increment != 0)))
14012 return false;
14013
14014 regno = REGNO (reg);
14015 if (regno == REGNO (addr))
14016 addr_reg_in_reglist = true;
14017 }
14018
14019 if (load)
14020 {
14021 if (update && addr_reg_in_reglist)
14022 return false;
14023
14024 /* For Thumb-1, address register is always modified - either by write-back
14025 or by explicit load. If the pattern does not describe an update,
14026 then the address register must be in the list of loaded registers. */
14027 if (TARGET_THUMB1)
14028 return update || addr_reg_in_reglist;
14029 }
14030
14031 return true;
14032 }
14033
14034 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14035 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14036 following form:
14037
14038 [(set (reg:SI <N>) (const_int 0))
14039 (set (reg:SI <M>) (const_int 0))
14040 ...
14041 (unspec_volatile [(const_int 0)]
14042 VUNSPEC_CLRM_APSR)
14043 (clobber (reg:CC CC_REGNUM))
14044 ]
14045
14046 Any number (including 0) of set expressions is valid, the volatile unspec is
14047 optional. All registers but SP and PC are allowed and registers must be in
14048 strict increasing order.
14049
14050 To be a valid VSCCLRM pattern, OP must have the following form:
14051
14052 [(unspec_volatile [(const_int 0)]
14053 VUNSPEC_VSCCLRM_VPR)
14054 (set (reg:SF <N>) (const_int 0))
14055 (set (reg:SF <M>) (const_int 0))
14056 ...
14057 ]
14058
14059 As with CLRM, any number (including 0) of set expressions is valid, however
14060 the volatile unspec is mandatory here. Any VFP single-precision register is
14061 accepted but all registers must be consecutive and in increasing order. */
14062
14063 bool
14064 clear_operation_p (rtx op, bool vfp)
14065 {
14066 unsigned regno;
14067 unsigned last_regno = INVALID_REGNUM;
14068 rtx elt, reg, zero;
14069 int count = XVECLEN (op, 0);
14070 int first_set = vfp ? 1 : 0;
14071 machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14072
14073 for (int i = first_set; i < count; i++)
14074 {
14075 elt = XVECEXP (op, 0, i);
14076
14077 if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14078 {
14079 if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14080 || XVECLEN (elt, 0) != 1
14081 || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14082 || i != count - 2)
14083 return false;
14084
14085 continue;
14086 }
14087
14088 if (GET_CODE (elt) == CLOBBER)
14089 continue;
14090
14091 if (GET_CODE (elt) != SET)
14092 return false;
14093
14094 reg = SET_DEST (elt);
14095 zero = SET_SRC (elt);
14096
14097 if (!REG_P (reg)
14098 || GET_MODE (reg) != expected_mode
14099 || zero != CONST0_RTX (SImode))
14100 return false;
14101
14102 regno = REGNO (reg);
14103
14104 if (vfp)
14105 {
14106 if (i != first_set && regno != last_regno + 1)
14107 return false;
14108 }
14109 else
14110 {
14111 if (regno == SP_REGNUM || regno == PC_REGNUM)
14112 return false;
14113 if (i != first_set && regno <= last_regno)
14114 return false;
14115 }
14116
14117 last_regno = regno;
14118 }
14119
14120 return true;
14121 }
14122
14123 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14124 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14125 instruction. ADD_OFFSET is nonzero if the base address register needs
14126 to be modified with an add instruction before we can use it. */
14127
14128 static bool
14129 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14130 int nops, HOST_WIDE_INT add_offset)
14131 {
14132 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14133 if the offset isn't small enough. The reason 2 ldrs are faster
14134 is because these ARMs are able to do more than one cache access
14135 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14136 whilst the ARM8 has a double bandwidth cache. This means that
14137 these cores can do both an instruction fetch and a data fetch in
14138 a single cycle, so the trick of calculating the address into a
14139 scratch register (one of the result regs) and then doing a load
14140 multiple actually becomes slower (and no smaller in code size).
14141 That is the transformation
14142
14143 ldr rd1, [rbase + offset]
14144 ldr rd2, [rbase + offset + 4]
14145
14146 to
14147
14148 add rd1, rbase, offset
14149 ldmia rd1, {rd1, rd2}
14150
14151 produces worse code -- '3 cycles + any stalls on rd2' instead of
14152 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14153 access per cycle, the first sequence could never complete in less
14154 than 6 cycles, whereas the ldm sequence would only take 5 and
14155 would make better use of sequential accesses if not hitting the
14156 cache.
14157
14158 We cheat here and test 'arm_ld_sched' which we currently know to
14159 only be true for the ARM8, ARM9 and StrongARM. If this ever
14160 changes, then the test below needs to be reworked. */
14161 if (nops == 2 && arm_ld_sched && add_offset != 0)
14162 return false;
14163
14164 /* XScale has load-store double instructions, but they have stricter
14165 alignment requirements than load-store multiple, so we cannot
14166 use them.
14167
14168 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14169 the pipeline until completion.
14170
14171 NREGS CYCLES
14172 1 3
14173 2 4
14174 3 5
14175 4 6
14176
14177 An ldr instruction takes 1-3 cycles, but does not block the
14178 pipeline.
14179
14180 NREGS CYCLES
14181 1 1-3
14182 2 2-6
14183 3 3-9
14184 4 4-12
14185
14186 Best case ldr will always win. However, the more ldr instructions
14187 we issue, the less likely we are to be able to schedule them well.
14188 Using ldr instructions also increases code size.
14189
14190 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14191 for counts of 3 or 4 regs. */
14192 if (nops <= 2 && arm_tune_xscale && !optimize_size)
14193 return false;
14194 return true;
14195 }
14196
14197 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14198 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14199 an array ORDER which describes the sequence to use when accessing the
14200 offsets that produces an ascending order. In this sequence, each
14201 offset must be larger by exactly 4 than the previous one. ORDER[0]
14202 must have been filled in with the lowest offset by the caller.
14203 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14204 we use to verify that ORDER produces an ascending order of registers.
14205 Return true if it was possible to construct such an order, false if
14206 not. */
14207
14208 static bool
14209 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14210 int *unsorted_regs)
14211 {
14212 int i;
14213 for (i = 1; i < nops; i++)
14214 {
14215 int j;
14216
14217 order[i] = order[i - 1];
14218 for (j = 0; j < nops; j++)
14219 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14220 {
14221 /* We must find exactly one offset that is higher than the
14222 previous one by 4. */
14223 if (order[i] != order[i - 1])
14224 return false;
14225 order[i] = j;
14226 }
14227 if (order[i] == order[i - 1])
14228 return false;
14229 /* The register numbers must be ascending. */
14230 if (unsorted_regs != NULL
14231 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14232 return false;
14233 }
14234 return true;
14235 }
14236
14237 /* Used to determine in a peephole whether a sequence of load
14238 instructions can be changed into a load-multiple instruction.
14239 NOPS is the number of separate load instructions we are examining. The
14240 first NOPS entries in OPERANDS are the destination registers, the
14241 next NOPS entries are memory operands. If this function is
14242 successful, *BASE is set to the common base register of the memory
14243 accesses; *LOAD_OFFSET is set to the first memory location's offset
14244 from that base register.
14245 REGS is an array filled in with the destination register numbers.
14246 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14247 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14248 the sequence of registers in REGS matches the loads from ascending memory
14249 locations, and the function verifies that the register numbers are
14250 themselves ascending. If CHECK_REGS is false, the register numbers
14251 are stored in the order they are found in the operands. */
14252 static int
14253 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14254 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14255 {
14256 int unsorted_regs[MAX_LDM_STM_OPS];
14257 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14258 int order[MAX_LDM_STM_OPS];
14259 int base_reg = -1;
14260 int i, ldm_case;
14261
14262 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14263 easily extended if required. */
14264 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14265
14266 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14267
14268 /* Loop over the operands and check that the memory references are
14269 suitable (i.e. immediate offsets from the same base register). At
14270 the same time, extract the target register, and the memory
14271 offsets. */
14272 for (i = 0; i < nops; i++)
14273 {
14274 rtx reg;
14275 rtx offset;
14276
14277 /* Convert a subreg of a mem into the mem itself. */
14278 if (GET_CODE (operands[nops + i]) == SUBREG)
14279 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14280
14281 gcc_assert (MEM_P (operands[nops + i]));
14282
14283 /* Don't reorder volatile memory references; it doesn't seem worth
14284 looking for the case where the order is ok anyway. */
14285 if (MEM_VOLATILE_P (operands[nops + i]))
14286 return 0;
14287
14288 offset = const0_rtx;
14289
14290 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14291 || (GET_CODE (reg) == SUBREG
14292 && REG_P (reg = SUBREG_REG (reg))))
14293 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14294 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14295 || (GET_CODE (reg) == SUBREG
14296 && REG_P (reg = SUBREG_REG (reg))))
14297 && (CONST_INT_P (offset
14298 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14299 {
14300 if (i == 0)
14301 {
14302 base_reg = REGNO (reg);
14303 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14304 return 0;
14305 }
14306 else if (base_reg != (int) REGNO (reg))
14307 /* Not addressed from the same base register. */
14308 return 0;
14309
14310 unsorted_regs[i] = (REG_P (operands[i])
14311 ? REGNO (operands[i])
14312 : REGNO (SUBREG_REG (operands[i])));
14313
14314 /* If it isn't an integer register, or if it overwrites the
14315 base register but isn't the last insn in the list, then
14316 we can't do this. */
14317 if (unsorted_regs[i] < 0
14318 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14319 || unsorted_regs[i] > 14
14320 || (i != nops - 1 && unsorted_regs[i] == base_reg))
14321 return 0;
14322
14323 /* Don't allow SP to be loaded unless it is also the base
14324 register. It guarantees that SP is reset correctly when
14325 an LDM instruction is interrupted. Otherwise, we might
14326 end up with a corrupt stack. */
14327 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14328 return 0;
14329
14330 unsorted_offsets[i] = INTVAL (offset);
14331 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14332 order[0] = i;
14333 }
14334 else
14335 /* Not a suitable memory address. */
14336 return 0;
14337 }
14338
14339 /* All the useful information has now been extracted from the
14340 operands into unsorted_regs and unsorted_offsets; additionally,
14341 order[0] has been set to the lowest offset in the list. Sort
14342 the offsets into order, verifying that they are adjacent, and
14343 check that the register numbers are ascending. */
14344 if (!compute_offset_order (nops, unsorted_offsets, order,
14345 check_regs ? unsorted_regs : NULL))
14346 return 0;
14347
14348 if (saved_order)
14349 memcpy (saved_order, order, sizeof order);
14350
14351 if (base)
14352 {
14353 *base = base_reg;
14354
14355 for (i = 0; i < nops; i++)
14356 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14357
14358 *load_offset = unsorted_offsets[order[0]];
14359 }
14360
14361 if (unsorted_offsets[order[0]] == 0)
14362 ldm_case = 1; /* ldmia */
14363 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14364 ldm_case = 2; /* ldmib */
14365 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14366 ldm_case = 3; /* ldmda */
14367 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14368 ldm_case = 4; /* ldmdb */
14369 else if (const_ok_for_arm (unsorted_offsets[order[0]])
14370 || const_ok_for_arm (-unsorted_offsets[order[0]]))
14371 ldm_case = 5;
14372 else
14373 return 0;
14374
14375 if (!multiple_operation_profitable_p (false, nops,
14376 ldm_case == 5
14377 ? unsorted_offsets[order[0]] : 0))
14378 return 0;
14379
14380 return ldm_case;
14381 }
14382
14383 /* Used to determine in a peephole whether a sequence of store instructions can
14384 be changed into a store-multiple instruction.
14385 NOPS is the number of separate store instructions we are examining.
14386 NOPS_TOTAL is the total number of instructions recognized by the peephole
14387 pattern.
14388 The first NOPS entries in OPERANDS are the source registers, the next
14389 NOPS entries are memory operands. If this function is successful, *BASE is
14390 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14391 to the first memory location's offset from that base register. REGS is an
14392 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14393 likewise filled with the corresponding rtx's.
14394 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14395 numbers to an ascending order of stores.
14396 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14397 from ascending memory locations, and the function verifies that the register
14398 numbers are themselves ascending. If CHECK_REGS is false, the register
14399 numbers are stored in the order they are found in the operands. */
14400 static int
14401 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14402 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14403 HOST_WIDE_INT *load_offset, bool check_regs)
14404 {
14405 int unsorted_regs[MAX_LDM_STM_OPS];
14406 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14407 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14408 int order[MAX_LDM_STM_OPS];
14409 int base_reg = -1;
14410 rtx base_reg_rtx = NULL;
14411 int i, stm_case;
14412
14413 /* Write back of base register is currently only supported for Thumb 1. */
14414 int base_writeback = TARGET_THUMB1;
14415
14416 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14417 easily extended if required. */
14418 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14419
14420 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14421
14422 /* Loop over the operands and check that the memory references are
14423 suitable (i.e. immediate offsets from the same base register). At
14424 the same time, extract the target register, and the memory
14425 offsets. */
14426 for (i = 0; i < nops; i++)
14427 {
14428 rtx reg;
14429 rtx offset;
14430
14431 /* Convert a subreg of a mem into the mem itself. */
14432 if (GET_CODE (operands[nops + i]) == SUBREG)
14433 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14434
14435 gcc_assert (MEM_P (operands[nops + i]));
14436
14437 /* Don't reorder volatile memory references; it doesn't seem worth
14438 looking for the case where the order is ok anyway. */
14439 if (MEM_VOLATILE_P (operands[nops + i]))
14440 return 0;
14441
14442 offset = const0_rtx;
14443
14444 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14445 || (GET_CODE (reg) == SUBREG
14446 && REG_P (reg = SUBREG_REG (reg))))
14447 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14448 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14449 || (GET_CODE (reg) == SUBREG
14450 && REG_P (reg = SUBREG_REG (reg))))
14451 && (CONST_INT_P (offset
14452 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14453 {
14454 unsorted_reg_rtxs[i] = (REG_P (operands[i])
14455 ? operands[i] : SUBREG_REG (operands[i]));
14456 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14457
14458 if (i == 0)
14459 {
14460 base_reg = REGNO (reg);
14461 base_reg_rtx = reg;
14462 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14463 return 0;
14464 }
14465 else if (base_reg != (int) REGNO (reg))
14466 /* Not addressed from the same base register. */
14467 return 0;
14468
14469 /* If it isn't an integer register, then we can't do this. */
14470 if (unsorted_regs[i] < 0
14471 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14472 /* The effects are unpredictable if the base register is
14473 both updated and stored. */
14474 || (base_writeback && unsorted_regs[i] == base_reg)
14475 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14476 || unsorted_regs[i] > 14)
14477 return 0;
14478
14479 unsorted_offsets[i] = INTVAL (offset);
14480 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14481 order[0] = i;
14482 }
14483 else
14484 /* Not a suitable memory address. */
14485 return 0;
14486 }
14487
14488 /* All the useful information has now been extracted from the
14489 operands into unsorted_regs and unsorted_offsets; additionally,
14490 order[0] has been set to the lowest offset in the list. Sort
14491 the offsets into order, verifying that they are adjacent, and
14492 check that the register numbers are ascending. */
14493 if (!compute_offset_order (nops, unsorted_offsets, order,
14494 check_regs ? unsorted_regs : NULL))
14495 return 0;
14496
14497 if (saved_order)
14498 memcpy (saved_order, order, sizeof order);
14499
14500 if (base)
14501 {
14502 *base = base_reg;
14503
14504 for (i = 0; i < nops; i++)
14505 {
14506 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14507 if (reg_rtxs)
14508 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14509 }
14510
14511 *load_offset = unsorted_offsets[order[0]];
14512 }
14513
14514 if (TARGET_THUMB1
14515 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14516 return 0;
14517
14518 if (unsorted_offsets[order[0]] == 0)
14519 stm_case = 1; /* stmia */
14520 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14521 stm_case = 2; /* stmib */
14522 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14523 stm_case = 3; /* stmda */
14524 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14525 stm_case = 4; /* stmdb */
14526 else
14527 return 0;
14528
14529 if (!multiple_operation_profitable_p (false, nops, 0))
14530 return 0;
14531
14532 return stm_case;
14533 }
14534 \f
14535 /* Routines for use in generating RTL. */
14536
14537 /* Generate a load-multiple instruction. COUNT is the number of loads in
14538 the instruction; REGS and MEMS are arrays containing the operands.
14539 BASEREG is the base register to be used in addressing the memory operands.
14540 WBACK_OFFSET is nonzero if the instruction should update the base
14541 register. */
14542
14543 static rtx
14544 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14545 HOST_WIDE_INT wback_offset)
14546 {
14547 int i = 0, j;
14548 rtx result;
14549
14550 if (!multiple_operation_profitable_p (false, count, 0))
14551 {
14552 rtx seq;
14553
14554 start_sequence ();
14555
14556 for (i = 0; i < count; i++)
14557 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14558
14559 if (wback_offset != 0)
14560 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14561
14562 seq = get_insns ();
14563 end_sequence ();
14564
14565 return seq;
14566 }
14567
14568 result = gen_rtx_PARALLEL (VOIDmode,
14569 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14570 if (wback_offset != 0)
14571 {
14572 XVECEXP (result, 0, 0)
14573 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14574 i = 1;
14575 count++;
14576 }
14577
14578 for (j = 0; i < count; i++, j++)
14579 XVECEXP (result, 0, i)
14580 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14581
14582 return result;
14583 }
14584
14585 /* Generate a store-multiple instruction. COUNT is the number of stores in
14586 the instruction; REGS and MEMS are arrays containing the operands.
14587 BASEREG is the base register to be used in addressing the memory operands.
14588 WBACK_OFFSET is nonzero if the instruction should update the base
14589 register. */
14590
14591 static rtx
14592 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14593 HOST_WIDE_INT wback_offset)
14594 {
14595 int i = 0, j;
14596 rtx result;
14597
14598 if (GET_CODE (basereg) == PLUS)
14599 basereg = XEXP (basereg, 0);
14600
14601 if (!multiple_operation_profitable_p (false, count, 0))
14602 {
14603 rtx seq;
14604
14605 start_sequence ();
14606
14607 for (i = 0; i < count; i++)
14608 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14609
14610 if (wback_offset != 0)
14611 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14612
14613 seq = get_insns ();
14614 end_sequence ();
14615
14616 return seq;
14617 }
14618
14619 result = gen_rtx_PARALLEL (VOIDmode,
14620 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14621 if (wback_offset != 0)
14622 {
14623 XVECEXP (result, 0, 0)
14624 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14625 i = 1;
14626 count++;
14627 }
14628
14629 for (j = 0; i < count; i++, j++)
14630 XVECEXP (result, 0, i)
14631 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14632
14633 return result;
14634 }
14635
14636 /* Generate either a load-multiple or a store-multiple instruction. This
14637 function can be used in situations where we can start with a single MEM
14638 rtx and adjust its address upwards.
14639 COUNT is the number of operations in the instruction, not counting a
14640 possible update of the base register. REGS is an array containing the
14641 register operands.
14642 BASEREG is the base register to be used in addressing the memory operands,
14643 which are constructed from BASEMEM.
14644 WRITE_BACK specifies whether the generated instruction should include an
14645 update of the base register.
14646 OFFSETP is used to pass an offset to and from this function; this offset
14647 is not used when constructing the address (instead BASEMEM should have an
14648 appropriate offset in its address), it is used only for setting
14649 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14650
14651 static rtx
14652 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14653 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14654 {
14655 rtx mems[MAX_LDM_STM_OPS];
14656 HOST_WIDE_INT offset = *offsetp;
14657 int i;
14658
14659 gcc_assert (count <= MAX_LDM_STM_OPS);
14660
14661 if (GET_CODE (basereg) == PLUS)
14662 basereg = XEXP (basereg, 0);
14663
14664 for (i = 0; i < count; i++)
14665 {
14666 rtx addr = plus_constant (Pmode, basereg, i * 4);
14667 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14668 offset += 4;
14669 }
14670
14671 if (write_back)
14672 *offsetp = offset;
14673
14674 if (is_load)
14675 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14676 write_back ? 4 * count : 0);
14677 else
14678 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14679 write_back ? 4 * count : 0);
14680 }
14681
14682 rtx
14683 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14684 rtx basemem, HOST_WIDE_INT *offsetp)
14685 {
14686 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14687 offsetp);
14688 }
14689
14690 rtx
14691 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14692 rtx basemem, HOST_WIDE_INT *offsetp)
14693 {
14694 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14695 offsetp);
14696 }
14697
14698 /* Called from a peephole2 expander to turn a sequence of loads into an
14699 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14700 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14701 is true if we can reorder the registers because they are used commutatively
14702 subsequently.
14703 Returns true iff we could generate a new instruction. */
14704
14705 bool
14706 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14707 {
14708 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14709 rtx mems[MAX_LDM_STM_OPS];
14710 int i, j, base_reg;
14711 rtx base_reg_rtx;
14712 HOST_WIDE_INT offset;
14713 int write_back = FALSE;
14714 int ldm_case;
14715 rtx addr;
14716
14717 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14718 &base_reg, &offset, !sort_regs);
14719
14720 if (ldm_case == 0)
14721 return false;
14722
14723 if (sort_regs)
14724 for (i = 0; i < nops - 1; i++)
14725 for (j = i + 1; j < nops; j++)
14726 if (regs[i] > regs[j])
14727 {
14728 int t = regs[i];
14729 regs[i] = regs[j];
14730 regs[j] = t;
14731 }
14732 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14733
14734 if (TARGET_THUMB1)
14735 {
14736 gcc_assert (ldm_case == 1 || ldm_case == 5);
14737
14738 /* Thumb-1 ldm uses writeback except if the base is loaded. */
14739 write_back = true;
14740 for (i = 0; i < nops; i++)
14741 if (base_reg == regs[i])
14742 write_back = false;
14743
14744 /* Ensure the base is dead if it is updated. */
14745 if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
14746 return false;
14747 }
14748
14749 if (ldm_case == 5)
14750 {
14751 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14752 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14753 offset = 0;
14754 base_reg_rtx = newbase;
14755 }
14756
14757 for (i = 0; i < nops; i++)
14758 {
14759 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14760 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14761 SImode, addr, 0);
14762 }
14763 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14764 write_back ? offset + i * 4 : 0));
14765 return true;
14766 }
14767
14768 /* Called from a peephole2 expander to turn a sequence of stores into an
14769 STM instruction. OPERANDS are the operands found by the peephole matcher;
14770 NOPS indicates how many separate stores we are trying to combine.
14771 Returns true iff we could generate a new instruction. */
14772
14773 bool
14774 gen_stm_seq (rtx *operands, int nops)
14775 {
14776 int i;
14777 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14778 rtx mems[MAX_LDM_STM_OPS];
14779 int base_reg;
14780 rtx base_reg_rtx;
14781 HOST_WIDE_INT offset;
14782 int write_back = FALSE;
14783 int stm_case;
14784 rtx addr;
14785 bool base_reg_dies;
14786
14787 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14788 mem_order, &base_reg, &offset, true);
14789
14790 if (stm_case == 0)
14791 return false;
14792
14793 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14794
14795 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14796 if (TARGET_THUMB1)
14797 {
14798 gcc_assert (base_reg_dies);
14799 write_back = TRUE;
14800 }
14801
14802 if (stm_case == 5)
14803 {
14804 gcc_assert (base_reg_dies);
14805 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14806 offset = 0;
14807 }
14808
14809 addr = plus_constant (Pmode, base_reg_rtx, offset);
14810
14811 for (i = 0; i < nops; i++)
14812 {
14813 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14814 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14815 SImode, addr, 0);
14816 }
14817 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14818 write_back ? offset + i * 4 : 0));
14819 return true;
14820 }
14821
14822 /* Called from a peephole2 expander to turn a sequence of stores that are
14823 preceded by constant loads into an STM instruction. OPERANDS are the
14824 operands found by the peephole matcher; NOPS indicates how many
14825 separate stores we are trying to combine; there are 2 * NOPS
14826 instructions in the peephole.
14827 Returns true iff we could generate a new instruction. */
14828
14829 bool
14830 gen_const_stm_seq (rtx *operands, int nops)
14831 {
14832 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14833 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14834 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14835 rtx mems[MAX_LDM_STM_OPS];
14836 int base_reg;
14837 rtx base_reg_rtx;
14838 HOST_WIDE_INT offset;
14839 int write_back = FALSE;
14840 int stm_case;
14841 rtx addr;
14842 bool base_reg_dies;
14843 int i, j;
14844 HARD_REG_SET allocated;
14845
14846 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14847 mem_order, &base_reg, &offset, false);
14848
14849 if (stm_case == 0)
14850 return false;
14851
14852 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14853
14854 /* If the same register is used more than once, try to find a free
14855 register. */
14856 CLEAR_HARD_REG_SET (allocated);
14857 for (i = 0; i < nops; i++)
14858 {
14859 for (j = i + 1; j < nops; j++)
14860 if (regs[i] == regs[j])
14861 {
14862 rtx t = peep2_find_free_register (0, nops * 2,
14863 TARGET_THUMB1 ? "l" : "r",
14864 SImode, &allocated);
14865 if (t == NULL_RTX)
14866 return false;
14867 reg_rtxs[i] = t;
14868 regs[i] = REGNO (t);
14869 }
14870 }
14871
14872 /* Compute an ordering that maps the register numbers to an ascending
14873 sequence. */
14874 reg_order[0] = 0;
14875 for (i = 0; i < nops; i++)
14876 if (regs[i] < regs[reg_order[0]])
14877 reg_order[0] = i;
14878
14879 for (i = 1; i < nops; i++)
14880 {
14881 int this_order = reg_order[i - 1];
14882 for (j = 0; j < nops; j++)
14883 if (regs[j] > regs[reg_order[i - 1]]
14884 && (this_order == reg_order[i - 1]
14885 || regs[j] < regs[this_order]))
14886 this_order = j;
14887 reg_order[i] = this_order;
14888 }
14889
14890 /* Ensure that registers that must be live after the instruction end
14891 up with the correct value. */
14892 for (i = 0; i < nops; i++)
14893 {
14894 int this_order = reg_order[i];
14895 if ((this_order != mem_order[i]
14896 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14897 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14898 return false;
14899 }
14900
14901 /* Load the constants. */
14902 for (i = 0; i < nops; i++)
14903 {
14904 rtx op = operands[2 * nops + mem_order[i]];
14905 sorted_regs[i] = regs[reg_order[i]];
14906 emit_move_insn (reg_rtxs[reg_order[i]], op);
14907 }
14908
14909 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14910
14911 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14912 if (TARGET_THUMB1)
14913 {
14914 gcc_assert (base_reg_dies);
14915 write_back = TRUE;
14916 }
14917
14918 if (stm_case == 5)
14919 {
14920 gcc_assert (base_reg_dies);
14921 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14922 offset = 0;
14923 }
14924
14925 addr = plus_constant (Pmode, base_reg_rtx, offset);
14926
14927 for (i = 0; i < nops; i++)
14928 {
14929 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14930 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14931 SImode, addr, 0);
14932 }
14933 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14934 write_back ? offset + i * 4 : 0));
14935 return true;
14936 }
14937
14938 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14939 unaligned copies on processors which support unaligned semantics for those
14940 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14941 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14942 An interleave factor of 1 (the minimum) will perform no interleaving.
14943 Load/store multiple are used for aligned addresses where possible. */
14944
14945 static void
14946 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14947 HOST_WIDE_INT length,
14948 unsigned int interleave_factor)
14949 {
14950 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14951 int *regnos = XALLOCAVEC (int, interleave_factor);
14952 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14953 HOST_WIDE_INT i, j;
14954 HOST_WIDE_INT remaining = length, words;
14955 rtx halfword_tmp = NULL, byte_tmp = NULL;
14956 rtx dst, src;
14957 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14958 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14959 HOST_WIDE_INT srcoffset, dstoffset;
14960 HOST_WIDE_INT src_autoinc, dst_autoinc;
14961 rtx mem, addr;
14962
14963 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
14964
14965 /* Use hard registers if we have aligned source or destination so we can use
14966 load/store multiple with contiguous registers. */
14967 if (dst_aligned || src_aligned)
14968 for (i = 0; i < interleave_factor; i++)
14969 regs[i] = gen_rtx_REG (SImode, i);
14970 else
14971 for (i = 0; i < interleave_factor; i++)
14972 regs[i] = gen_reg_rtx (SImode);
14973
14974 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14975 src = copy_addr_to_reg (XEXP (srcbase, 0));
14976
14977 srcoffset = dstoffset = 0;
14978
14979 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14980 For copying the last bytes we want to subtract this offset again. */
14981 src_autoinc = dst_autoinc = 0;
14982
14983 for (i = 0; i < interleave_factor; i++)
14984 regnos[i] = i;
14985
14986 /* Copy BLOCK_SIZE_BYTES chunks. */
14987
14988 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14989 {
14990 /* Load words. */
14991 if (src_aligned && interleave_factor > 1)
14992 {
14993 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14994 TRUE, srcbase, &srcoffset));
14995 src_autoinc += UNITS_PER_WORD * interleave_factor;
14996 }
14997 else
14998 {
14999 for (j = 0; j < interleave_factor; j++)
15000 {
15001 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15002 - src_autoinc));
15003 mem = adjust_automodify_address (srcbase, SImode, addr,
15004 srcoffset + j * UNITS_PER_WORD);
15005 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15006 }
15007 srcoffset += block_size_bytes;
15008 }
15009
15010 /* Store words. */
15011 if (dst_aligned && interleave_factor > 1)
15012 {
15013 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15014 TRUE, dstbase, &dstoffset));
15015 dst_autoinc += UNITS_PER_WORD * interleave_factor;
15016 }
15017 else
15018 {
15019 for (j = 0; j < interleave_factor; j++)
15020 {
15021 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15022 - dst_autoinc));
15023 mem = adjust_automodify_address (dstbase, SImode, addr,
15024 dstoffset + j * UNITS_PER_WORD);
15025 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15026 }
15027 dstoffset += block_size_bytes;
15028 }
15029
15030 remaining -= block_size_bytes;
15031 }
15032
15033 /* Copy any whole words left (note these aren't interleaved with any
15034 subsequent halfword/byte load/stores in the interests of simplicity). */
15035
15036 words = remaining / UNITS_PER_WORD;
15037
15038 gcc_assert (words < interleave_factor);
15039
15040 if (src_aligned && words > 1)
15041 {
15042 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15043 &srcoffset));
15044 src_autoinc += UNITS_PER_WORD * words;
15045 }
15046 else
15047 {
15048 for (j = 0; j < words; j++)
15049 {
15050 addr = plus_constant (Pmode, src,
15051 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15052 mem = adjust_automodify_address (srcbase, SImode, addr,
15053 srcoffset + j * UNITS_PER_WORD);
15054 if (src_aligned)
15055 emit_move_insn (regs[j], mem);
15056 else
15057 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15058 }
15059 srcoffset += words * UNITS_PER_WORD;
15060 }
15061
15062 if (dst_aligned && words > 1)
15063 {
15064 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15065 &dstoffset));
15066 dst_autoinc += words * UNITS_PER_WORD;
15067 }
15068 else
15069 {
15070 for (j = 0; j < words; j++)
15071 {
15072 addr = plus_constant (Pmode, dst,
15073 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15074 mem = adjust_automodify_address (dstbase, SImode, addr,
15075 dstoffset + j * UNITS_PER_WORD);
15076 if (dst_aligned)
15077 emit_move_insn (mem, regs[j]);
15078 else
15079 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15080 }
15081 dstoffset += words * UNITS_PER_WORD;
15082 }
15083
15084 remaining -= words * UNITS_PER_WORD;
15085
15086 gcc_assert (remaining < 4);
15087
15088 /* Copy a halfword if necessary. */
15089
15090 if (remaining >= 2)
15091 {
15092 halfword_tmp = gen_reg_rtx (SImode);
15093
15094 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15095 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15096 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15097
15098 /* Either write out immediately, or delay until we've loaded the last
15099 byte, depending on interleave factor. */
15100 if (interleave_factor == 1)
15101 {
15102 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15103 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15104 emit_insn (gen_unaligned_storehi (mem,
15105 gen_lowpart (HImode, halfword_tmp)));
15106 halfword_tmp = NULL;
15107 dstoffset += 2;
15108 }
15109
15110 remaining -= 2;
15111 srcoffset += 2;
15112 }
15113
15114 gcc_assert (remaining < 2);
15115
15116 /* Copy last byte. */
15117
15118 if ((remaining & 1) != 0)
15119 {
15120 byte_tmp = gen_reg_rtx (SImode);
15121
15122 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15123 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15124 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15125
15126 if (interleave_factor == 1)
15127 {
15128 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15129 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15130 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15131 byte_tmp = NULL;
15132 dstoffset++;
15133 }
15134
15135 remaining--;
15136 srcoffset++;
15137 }
15138
15139 /* Store last halfword if we haven't done so already. */
15140
15141 if (halfword_tmp)
15142 {
15143 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15144 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15145 emit_insn (gen_unaligned_storehi (mem,
15146 gen_lowpart (HImode, halfword_tmp)));
15147 dstoffset += 2;
15148 }
15149
15150 /* Likewise for last byte. */
15151
15152 if (byte_tmp)
15153 {
15154 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15155 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15156 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15157 dstoffset++;
15158 }
15159
15160 gcc_assert (remaining == 0 && srcoffset == dstoffset);
15161 }
15162
15163 /* From mips_adjust_block_mem:
15164
15165 Helper function for doing a loop-based block operation on memory
15166 reference MEM. Each iteration of the loop will operate on LENGTH
15167 bytes of MEM.
15168
15169 Create a new base register for use within the loop and point it to
15170 the start of MEM. Create a new memory reference that uses this
15171 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15172
15173 static void
15174 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15175 rtx *loop_mem)
15176 {
15177 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15178
15179 /* Although the new mem does not refer to a known location,
15180 it does keep up to LENGTH bytes of alignment. */
15181 *loop_mem = change_address (mem, BLKmode, *loop_reg);
15182 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15183 }
15184
15185 /* From mips_block_move_loop:
15186
15187 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15188 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15189 the memory regions do not overlap. */
15190
15191 static void
15192 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15193 unsigned int interleave_factor,
15194 HOST_WIDE_INT bytes_per_iter)
15195 {
15196 rtx src_reg, dest_reg, final_src, test;
15197 HOST_WIDE_INT leftover;
15198
15199 leftover = length % bytes_per_iter;
15200 length -= leftover;
15201
15202 /* Create registers and memory references for use within the loop. */
15203 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15204 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15205
15206 /* Calculate the value that SRC_REG should have after the last iteration of
15207 the loop. */
15208 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15209 0, 0, OPTAB_WIDEN);
15210
15211 /* Emit the start of the loop. */
15212 rtx_code_label *label = gen_label_rtx ();
15213 emit_label (label);
15214
15215 /* Emit the loop body. */
15216 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15217 interleave_factor);
15218
15219 /* Move on to the next block. */
15220 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15221 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15222
15223 /* Emit the loop condition. */
15224 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15225 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15226
15227 /* Mop up any left-over bytes. */
15228 if (leftover)
15229 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15230 }
15231
15232 /* Emit a block move when either the source or destination is unaligned (not
15233 aligned to a four-byte boundary). This may need further tuning depending on
15234 core type, optimize_size setting, etc. */
15235
15236 static int
15237 arm_cpymemqi_unaligned (rtx *operands)
15238 {
15239 HOST_WIDE_INT length = INTVAL (operands[2]);
15240
15241 if (optimize_size)
15242 {
15243 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15244 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15245 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15246 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15247 or dst_aligned though: allow more interleaving in those cases since the
15248 resulting code can be smaller. */
15249 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15250 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15251
15252 if (length > 12)
15253 arm_block_move_unaligned_loop (operands[0], operands[1], length,
15254 interleave_factor, bytes_per_iter);
15255 else
15256 arm_block_move_unaligned_straight (operands[0], operands[1], length,
15257 interleave_factor);
15258 }
15259 else
15260 {
15261 /* Note that the loop created by arm_block_move_unaligned_loop may be
15262 subject to loop unrolling, which makes tuning this condition a little
15263 redundant. */
15264 if (length > 32)
15265 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15266 else
15267 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15268 }
15269
15270 return 1;
15271 }
15272
15273 int
15274 arm_gen_cpymemqi (rtx *operands)
15275 {
15276 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15277 HOST_WIDE_INT srcoffset, dstoffset;
15278 rtx src, dst, srcbase, dstbase;
15279 rtx part_bytes_reg = NULL;
15280 rtx mem;
15281
15282 if (!CONST_INT_P (operands[2])
15283 || !CONST_INT_P (operands[3])
15284 || INTVAL (operands[2]) > 64)
15285 return 0;
15286
15287 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15288 return arm_cpymemqi_unaligned (operands);
15289
15290 if (INTVAL (operands[3]) & 3)
15291 return 0;
15292
15293 dstbase = operands[0];
15294 srcbase = operands[1];
15295
15296 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15297 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15298
15299 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15300 out_words_to_go = INTVAL (operands[2]) / 4;
15301 last_bytes = INTVAL (operands[2]) & 3;
15302 dstoffset = srcoffset = 0;
15303
15304 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15305 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15306
15307 while (in_words_to_go >= 2)
15308 {
15309 if (in_words_to_go > 4)
15310 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15311 TRUE, srcbase, &srcoffset));
15312 else
15313 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15314 src, FALSE, srcbase,
15315 &srcoffset));
15316
15317 if (out_words_to_go)
15318 {
15319 if (out_words_to_go > 4)
15320 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15321 TRUE, dstbase, &dstoffset));
15322 else if (out_words_to_go != 1)
15323 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15324 out_words_to_go, dst,
15325 (last_bytes == 0
15326 ? FALSE : TRUE),
15327 dstbase, &dstoffset));
15328 else
15329 {
15330 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15331 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15332 if (last_bytes != 0)
15333 {
15334 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15335 dstoffset += 4;
15336 }
15337 }
15338 }
15339
15340 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15341 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15342 }
15343
15344 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15345 if (out_words_to_go)
15346 {
15347 rtx sreg;
15348
15349 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15350 sreg = copy_to_reg (mem);
15351
15352 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15353 emit_move_insn (mem, sreg);
15354 in_words_to_go--;
15355
15356 gcc_assert (!in_words_to_go); /* Sanity check */
15357 }
15358
15359 if (in_words_to_go)
15360 {
15361 gcc_assert (in_words_to_go > 0);
15362
15363 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15364 part_bytes_reg = copy_to_mode_reg (SImode, mem);
15365 }
15366
15367 gcc_assert (!last_bytes || part_bytes_reg);
15368
15369 if (BYTES_BIG_ENDIAN && last_bytes)
15370 {
15371 rtx tmp = gen_reg_rtx (SImode);
15372
15373 /* The bytes we want are in the top end of the word. */
15374 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15375 GEN_INT (8 * (4 - last_bytes))));
15376 part_bytes_reg = tmp;
15377
15378 while (last_bytes)
15379 {
15380 mem = adjust_automodify_address (dstbase, QImode,
15381 plus_constant (Pmode, dst,
15382 last_bytes - 1),
15383 dstoffset + last_bytes - 1);
15384 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15385
15386 if (--last_bytes)
15387 {
15388 tmp = gen_reg_rtx (SImode);
15389 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15390 part_bytes_reg = tmp;
15391 }
15392 }
15393
15394 }
15395 else
15396 {
15397 if (last_bytes > 1)
15398 {
15399 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15400 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15401 last_bytes -= 2;
15402 if (last_bytes)
15403 {
15404 rtx tmp = gen_reg_rtx (SImode);
15405 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15406 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15407 part_bytes_reg = tmp;
15408 dstoffset += 2;
15409 }
15410 }
15411
15412 if (last_bytes)
15413 {
15414 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15415 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15416 }
15417 }
15418
15419 return 1;
15420 }
15421
15422 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15423 by mode size. */
15424 inline static rtx
15425 next_consecutive_mem (rtx mem)
15426 {
15427 machine_mode mode = GET_MODE (mem);
15428 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15429 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15430
15431 return adjust_automodify_address (mem, mode, addr, offset);
15432 }
15433
15434 /* Copy using LDRD/STRD instructions whenever possible.
15435 Returns true upon success. */
15436 bool
15437 gen_cpymem_ldrd_strd (rtx *operands)
15438 {
15439 unsigned HOST_WIDE_INT len;
15440 HOST_WIDE_INT align;
15441 rtx src, dst, base;
15442 rtx reg0;
15443 bool src_aligned, dst_aligned;
15444 bool src_volatile, dst_volatile;
15445
15446 gcc_assert (CONST_INT_P (operands[2]));
15447 gcc_assert (CONST_INT_P (operands[3]));
15448
15449 len = UINTVAL (operands[2]);
15450 if (len > 64)
15451 return false;
15452
15453 /* Maximum alignment we can assume for both src and dst buffers. */
15454 align = INTVAL (operands[3]);
15455
15456 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15457 return false;
15458
15459 /* Place src and dst addresses in registers
15460 and update the corresponding mem rtx. */
15461 dst = operands[0];
15462 dst_volatile = MEM_VOLATILE_P (dst);
15463 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15464 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15465 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15466
15467 src = operands[1];
15468 src_volatile = MEM_VOLATILE_P (src);
15469 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15470 base = copy_to_mode_reg (SImode, XEXP (src, 0));
15471 src = adjust_automodify_address (src, VOIDmode, base, 0);
15472
15473 if (!unaligned_access && !(src_aligned && dst_aligned))
15474 return false;
15475
15476 if (src_volatile || dst_volatile)
15477 return false;
15478
15479 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15480 if (!(dst_aligned || src_aligned))
15481 return arm_gen_cpymemqi (operands);
15482
15483 /* If the either src or dst is unaligned we'll be accessing it as pairs
15484 of unaligned SImode accesses. Otherwise we can generate DImode
15485 ldrd/strd instructions. */
15486 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15487 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15488
15489 while (len >= 8)
15490 {
15491 len -= 8;
15492 reg0 = gen_reg_rtx (DImode);
15493 rtx low_reg = NULL_RTX;
15494 rtx hi_reg = NULL_RTX;
15495
15496 if (!src_aligned || !dst_aligned)
15497 {
15498 low_reg = gen_lowpart (SImode, reg0);
15499 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
15500 }
15501 if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15502 emit_move_insn (reg0, src);
15503 else if (src_aligned)
15504 emit_insn (gen_unaligned_loaddi (reg0, src));
15505 else
15506 {
15507 emit_insn (gen_unaligned_loadsi (low_reg, src));
15508 src = next_consecutive_mem (src);
15509 emit_insn (gen_unaligned_loadsi (hi_reg, src));
15510 }
15511
15512 if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15513 emit_move_insn (dst, reg0);
15514 else if (dst_aligned)
15515 emit_insn (gen_unaligned_storedi (dst, reg0));
15516 else
15517 {
15518 emit_insn (gen_unaligned_storesi (dst, low_reg));
15519 dst = next_consecutive_mem (dst);
15520 emit_insn (gen_unaligned_storesi (dst, hi_reg));
15521 }
15522
15523 src = next_consecutive_mem (src);
15524 dst = next_consecutive_mem (dst);
15525 }
15526
15527 gcc_assert (len < 8);
15528 if (len >= 4)
15529 {
15530 /* More than a word but less than a double-word to copy. Copy a word. */
15531 reg0 = gen_reg_rtx (SImode);
15532 src = adjust_address (src, SImode, 0);
15533 dst = adjust_address (dst, SImode, 0);
15534 if (src_aligned)
15535 emit_move_insn (reg0, src);
15536 else
15537 emit_insn (gen_unaligned_loadsi (reg0, src));
15538
15539 if (dst_aligned)
15540 emit_move_insn (dst, reg0);
15541 else
15542 emit_insn (gen_unaligned_storesi (dst, reg0));
15543
15544 src = next_consecutive_mem (src);
15545 dst = next_consecutive_mem (dst);
15546 len -= 4;
15547 }
15548
15549 if (len == 0)
15550 return true;
15551
15552 /* Copy the remaining bytes. */
15553 if (len >= 2)
15554 {
15555 dst = adjust_address (dst, HImode, 0);
15556 src = adjust_address (src, HImode, 0);
15557 reg0 = gen_reg_rtx (SImode);
15558 if (src_aligned)
15559 emit_insn (gen_zero_extendhisi2 (reg0, src));
15560 else
15561 emit_insn (gen_unaligned_loadhiu (reg0, src));
15562
15563 if (dst_aligned)
15564 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15565 else
15566 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15567
15568 src = next_consecutive_mem (src);
15569 dst = next_consecutive_mem (dst);
15570 if (len == 2)
15571 return true;
15572 }
15573
15574 dst = adjust_address (dst, QImode, 0);
15575 src = adjust_address (src, QImode, 0);
15576 reg0 = gen_reg_rtx (QImode);
15577 emit_move_insn (reg0, src);
15578 emit_move_insn (dst, reg0);
15579 return true;
15580 }
15581
15582 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15583 into its component 32-bit subregs. OP2 may be an immediate
15584 constant and we want to simplify it in that case. */
15585 void
15586 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15587 rtx *lo_op2, rtx *hi_op2)
15588 {
15589 *lo_op1 = gen_lowpart (SImode, op1);
15590 *hi_op1 = gen_highpart (SImode, op1);
15591 *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15592 subreg_lowpart_offset (SImode, DImode));
15593 *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15594 subreg_highpart_offset (SImode, DImode));
15595 }
15596
15597 /* Select a dominance comparison mode if possible for a test of the general
15598 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15599 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15600 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15601 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15602 In all cases OP will be either EQ or NE, but we don't need to know which
15603 here. If we are unable to support a dominance comparison we return
15604 CC mode. This will then fail to match for the RTL expressions that
15605 generate this call. */
15606 machine_mode
15607 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15608 {
15609 enum rtx_code cond1, cond2;
15610 int swapped = 0;
15611
15612 /* Currently we will probably get the wrong result if the individual
15613 comparisons are not simple. This also ensures that it is safe to
15614 reverse a comparison if necessary. */
15615 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15616 != CCmode)
15617 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15618 != CCmode))
15619 return CCmode;
15620
15621 /* The if_then_else variant of this tests the second condition if the
15622 first passes, but is true if the first fails. Reverse the first
15623 condition to get a true "inclusive-or" expression. */
15624 if (cond_or == DOM_CC_NX_OR_Y)
15625 cond1 = reverse_condition (cond1);
15626
15627 /* If the comparisons are not equal, and one doesn't dominate the other,
15628 then we can't do this. */
15629 if (cond1 != cond2
15630 && !comparison_dominates_p (cond1, cond2)
15631 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15632 return CCmode;
15633
15634 if (swapped)
15635 std::swap (cond1, cond2);
15636
15637 switch (cond1)
15638 {
15639 case EQ:
15640 if (cond_or == DOM_CC_X_AND_Y)
15641 return CC_DEQmode;
15642
15643 switch (cond2)
15644 {
15645 case EQ: return CC_DEQmode;
15646 case LE: return CC_DLEmode;
15647 case LEU: return CC_DLEUmode;
15648 case GE: return CC_DGEmode;
15649 case GEU: return CC_DGEUmode;
15650 default: gcc_unreachable ();
15651 }
15652
15653 case LT:
15654 if (cond_or == DOM_CC_X_AND_Y)
15655 return CC_DLTmode;
15656
15657 switch (cond2)
15658 {
15659 case LT:
15660 return CC_DLTmode;
15661 case LE:
15662 return CC_DLEmode;
15663 case NE:
15664 return CC_DNEmode;
15665 default:
15666 gcc_unreachable ();
15667 }
15668
15669 case GT:
15670 if (cond_or == DOM_CC_X_AND_Y)
15671 return CC_DGTmode;
15672
15673 switch (cond2)
15674 {
15675 case GT:
15676 return CC_DGTmode;
15677 case GE:
15678 return CC_DGEmode;
15679 case NE:
15680 return CC_DNEmode;
15681 default:
15682 gcc_unreachable ();
15683 }
15684
15685 case LTU:
15686 if (cond_or == DOM_CC_X_AND_Y)
15687 return CC_DLTUmode;
15688
15689 switch (cond2)
15690 {
15691 case LTU:
15692 return CC_DLTUmode;
15693 case LEU:
15694 return CC_DLEUmode;
15695 case NE:
15696 return CC_DNEmode;
15697 default:
15698 gcc_unreachable ();
15699 }
15700
15701 case GTU:
15702 if (cond_or == DOM_CC_X_AND_Y)
15703 return CC_DGTUmode;
15704
15705 switch (cond2)
15706 {
15707 case GTU:
15708 return CC_DGTUmode;
15709 case GEU:
15710 return CC_DGEUmode;
15711 case NE:
15712 return CC_DNEmode;
15713 default:
15714 gcc_unreachable ();
15715 }
15716
15717 /* The remaining cases only occur when both comparisons are the
15718 same. */
15719 case NE:
15720 gcc_assert (cond1 == cond2);
15721 return CC_DNEmode;
15722
15723 case LE:
15724 gcc_assert (cond1 == cond2);
15725 return CC_DLEmode;
15726
15727 case GE:
15728 gcc_assert (cond1 == cond2);
15729 return CC_DGEmode;
15730
15731 case LEU:
15732 gcc_assert (cond1 == cond2);
15733 return CC_DLEUmode;
15734
15735 case GEU:
15736 gcc_assert (cond1 == cond2);
15737 return CC_DGEUmode;
15738
15739 default:
15740 gcc_unreachable ();
15741 }
15742 }
15743
15744 machine_mode
15745 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15746 {
15747 /* All floating point compares return CCFP if it is an equality
15748 comparison, and CCFPE otherwise. */
15749 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15750 {
15751 switch (op)
15752 {
15753 case EQ:
15754 case NE:
15755 case UNORDERED:
15756 case ORDERED:
15757 case UNLT:
15758 case UNLE:
15759 case UNGT:
15760 case UNGE:
15761 case UNEQ:
15762 case LTGT:
15763 return CCFPmode;
15764
15765 case LT:
15766 case LE:
15767 case GT:
15768 case GE:
15769 return CCFPEmode;
15770
15771 default:
15772 gcc_unreachable ();
15773 }
15774 }
15775
15776 /* A compare with a shifted operand. Because of canonicalization, the
15777 comparison will have to be swapped when we emit the assembler. */
15778 if (GET_MODE (y) == SImode
15779 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15780 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15781 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15782 || GET_CODE (x) == ROTATERT))
15783 return CC_SWPmode;
15784
15785 /* A widened compare of the sum of a value plus a carry against a
15786 constant. This is a representation of RSC. We want to swap the
15787 result of the comparison at output. Not valid if the Z bit is
15788 needed. */
15789 if (GET_MODE (x) == DImode
15790 && GET_CODE (x) == PLUS
15791 && arm_borrow_operation (XEXP (x, 1), DImode)
15792 && CONST_INT_P (y)
15793 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
15794 && (op == LE || op == GT))
15795 || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
15796 && (op == LEU || op == GTU))))
15797 return CC_SWPmode;
15798
15799 /* If X is a constant we want to use CC_RSBmode. This is
15800 non-canonical, but arm_gen_compare_reg uses this to generate the
15801 correct canonical form. */
15802 if (GET_MODE (y) == SImode
15803 && (REG_P (y) || GET_CODE (y) == SUBREG)
15804 && CONST_INT_P (x))
15805 return CC_RSBmode;
15806
15807 /* This operation is performed swapped, but since we only rely on the Z
15808 flag we don't need an additional mode. */
15809 if (GET_MODE (y) == SImode
15810 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15811 && GET_CODE (x) == NEG
15812 && (op == EQ || op == NE))
15813 return CC_Zmode;
15814
15815 /* This is a special case that is used by combine to allow a
15816 comparison of a shifted byte load to be split into a zero-extend
15817 followed by a comparison of the shifted integer (only valid for
15818 equalities and unsigned inequalities). */
15819 if (GET_MODE (x) == SImode
15820 && GET_CODE (x) == ASHIFT
15821 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15822 && GET_CODE (XEXP (x, 0)) == SUBREG
15823 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15824 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15825 && (op == EQ || op == NE
15826 || op == GEU || op == GTU || op == LTU || op == LEU)
15827 && CONST_INT_P (y))
15828 return CC_Zmode;
15829
15830 /* A construct for a conditional compare, if the false arm contains
15831 0, then both conditions must be true, otherwise either condition
15832 must be true. Not all conditions are possible, so CCmode is
15833 returned if it can't be done. */
15834 if (GET_CODE (x) == IF_THEN_ELSE
15835 && (XEXP (x, 2) == const0_rtx
15836 || XEXP (x, 2) == const1_rtx)
15837 && COMPARISON_P (XEXP (x, 0))
15838 && COMPARISON_P (XEXP (x, 1)))
15839 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15840 INTVAL (XEXP (x, 2)));
15841
15842 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15843 if (GET_CODE (x) == AND
15844 && (op == EQ || op == NE)
15845 && COMPARISON_P (XEXP (x, 0))
15846 && COMPARISON_P (XEXP (x, 1)))
15847 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15848 DOM_CC_X_AND_Y);
15849
15850 if (GET_CODE (x) == IOR
15851 && (op == EQ || op == NE)
15852 && COMPARISON_P (XEXP (x, 0))
15853 && COMPARISON_P (XEXP (x, 1)))
15854 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15855 DOM_CC_X_OR_Y);
15856
15857 /* An operation (on Thumb) where we want to test for a single bit.
15858 This is done by shifting that bit up into the top bit of a
15859 scratch register; we can then branch on the sign bit. */
15860 if (TARGET_THUMB1
15861 && GET_MODE (x) == SImode
15862 && (op == EQ || op == NE)
15863 && GET_CODE (x) == ZERO_EXTRACT
15864 && XEXP (x, 1) == const1_rtx)
15865 return CC_Nmode;
15866
15867 /* An operation that sets the condition codes as a side-effect, the
15868 V flag is not set correctly, so we can only use comparisons where
15869 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15870 instead.) */
15871 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15872 if (GET_MODE (x) == SImode
15873 && y == const0_rtx
15874 && (op == EQ || op == NE || op == LT || op == GE)
15875 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15876 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15877 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15878 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15879 || GET_CODE (x) == LSHIFTRT
15880 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15881 || GET_CODE (x) == ROTATERT
15882 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15883 return CC_NZmode;
15884
15885 /* A comparison of ~reg with a const is really a special
15886 canoncialization of compare (~const, reg), which is a reverse
15887 subtract operation. We may not get here if CONST is 0, but that
15888 doesn't matter because ~0 isn't a valid immediate for RSB. */
15889 if (GET_MODE (x) == SImode
15890 && GET_CODE (x) == NOT
15891 && CONST_INT_P (y))
15892 return CC_RSBmode;
15893
15894 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15895 return CC_Zmode;
15896
15897 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15898 && GET_CODE (x) == PLUS
15899 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15900 return CC_Cmode;
15901
15902 if (GET_MODE (x) == DImode
15903 && GET_CODE (x) == PLUS
15904 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
15905 && CONST_INT_P (y)
15906 && UINTVAL (y) == 0x800000000
15907 && (op == GEU || op == LTU))
15908 return CC_ADCmode;
15909
15910 if (GET_MODE (x) == DImode
15911 && (op == GE || op == LT)
15912 && GET_CODE (x) == SIGN_EXTEND
15913 && ((GET_CODE (y) == PLUS
15914 && arm_borrow_operation (XEXP (y, 0), DImode))
15915 || arm_borrow_operation (y, DImode)))
15916 return CC_NVmode;
15917
15918 if (GET_MODE (x) == DImode
15919 && (op == GEU || op == LTU)
15920 && GET_CODE (x) == ZERO_EXTEND
15921 && ((GET_CODE (y) == PLUS
15922 && arm_borrow_operation (XEXP (y, 0), DImode))
15923 || arm_borrow_operation (y, DImode)))
15924 return CC_Bmode;
15925
15926 if (GET_MODE (x) == DImode
15927 && (op == EQ || op == NE)
15928 && (GET_CODE (x) == PLUS
15929 || GET_CODE (x) == MINUS)
15930 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
15931 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
15932 && GET_CODE (y) == SIGN_EXTEND
15933 && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
15934 return CC_Vmode;
15935
15936 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15937 return GET_MODE (x);
15938
15939 return CCmode;
15940 }
15941
15942 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
15943 the sequence of instructions needed to generate a suitable condition
15944 code register. Return the CC register result. */
15945 static rtx
15946 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
15947 {
15948 machine_mode mode;
15949 rtx cc_reg;
15950
15951 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
15952 gcc_assert (TARGET_32BIT);
15953 gcc_assert (!CONST_INT_P (x));
15954
15955 rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
15956 subreg_lowpart_offset (SImode, DImode));
15957 rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
15958 subreg_highpart_offset (SImode, DImode));
15959 rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
15960 subreg_lowpart_offset (SImode, DImode));
15961 rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
15962 subreg_highpart_offset (SImode, DImode));
15963 switch (code)
15964 {
15965 case EQ:
15966 case NE:
15967 {
15968 if (y_lo == const0_rtx || y_hi == const0_rtx)
15969 {
15970 if (y_lo != const0_rtx)
15971 {
15972 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
15973
15974 gcc_assert (y_hi == const0_rtx);
15975 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
15976 if (!arm_add_operand (y_lo, SImode))
15977 y_lo = force_reg (SImode, y_lo);
15978 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
15979 x_lo = scratch2;
15980 }
15981 else if (y_hi != const0_rtx)
15982 {
15983 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
15984
15985 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
15986 if (!arm_add_operand (y_hi, SImode))
15987 y_hi = force_reg (SImode, y_hi);
15988 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
15989 x_hi = scratch2;
15990 }
15991
15992 if (!scratch)
15993 {
15994 gcc_assert (!reload_completed);
15995 scratch = gen_rtx_SCRATCH (SImode);
15996 }
15997
15998 rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15999 cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16000
16001 rtx set
16002 = gen_rtx_SET (cc_reg,
16003 gen_rtx_COMPARE (CC_NZmode,
16004 gen_rtx_IOR (SImode, x_lo, x_hi),
16005 const0_rtx));
16006 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16007 clobber)));
16008 return cc_reg;
16009 }
16010
16011 if (!arm_add_operand (y_lo, SImode))
16012 y_lo = force_reg (SImode, y_lo);
16013
16014 if (!arm_add_operand (y_hi, SImode))
16015 y_hi = force_reg (SImode, y_hi);
16016
16017 rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16018 rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16019 rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16020 mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16021 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16022
16023 emit_insn (gen_rtx_SET (cc_reg,
16024 gen_rtx_COMPARE (mode, conjunction,
16025 const0_rtx)));
16026 return cc_reg;
16027 }
16028
16029 case LT:
16030 case GE:
16031 {
16032 if (y_lo == const0_rtx)
16033 {
16034 /* If the low word of y is 0, then this is simply a normal
16035 compare of the upper words. */
16036 if (!arm_add_operand (y_hi, SImode))
16037 y_hi = force_reg (SImode, y_hi);
16038
16039 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16040 }
16041
16042 if (!arm_add_operand (y_lo, SImode))
16043 y_lo = force_reg (SImode, y_lo);
16044
16045 rtx cmp1
16046 = gen_rtx_LTU (DImode,
16047 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16048 const0_rtx);
16049
16050 if (!scratch)
16051 scratch = gen_rtx_SCRATCH (SImode);
16052
16053 if (!arm_not_operand (y_hi, SImode))
16054 y_hi = force_reg (SImode, y_hi);
16055
16056 rtx_insn *insn;
16057 if (y_hi == const0_rtx)
16058 insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16059 cmp1));
16060 else if (CONST_INT_P (y_hi))
16061 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16062 y_hi, cmp1));
16063 else
16064 insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16065 cmp1));
16066 return SET_DEST (single_set (insn));
16067 }
16068
16069 case LE:
16070 case GT:
16071 {
16072 /* During expansion, we only expect to get here if y is a
16073 constant that we want to handle, otherwise we should have
16074 swapped the operands already. */
16075 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16076
16077 if (!const_ok_for_arm (INTVAL (y_lo)))
16078 y_lo = force_reg (SImode, y_lo);
16079
16080 /* Perform a reverse subtract and compare. */
16081 rtx cmp1
16082 = gen_rtx_LTU (DImode,
16083 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16084 const0_rtx);
16085 rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16086 x_hi, cmp1));
16087 return SET_DEST (single_set (insn));
16088 }
16089
16090 case LTU:
16091 case GEU:
16092 {
16093 if (y_lo == const0_rtx)
16094 {
16095 /* If the low word of y is 0, then this is simply a normal
16096 compare of the upper words. */
16097 if (!arm_add_operand (y_hi, SImode))
16098 y_hi = force_reg (SImode, y_hi);
16099
16100 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16101 }
16102
16103 if (!arm_add_operand (y_lo, SImode))
16104 y_lo = force_reg (SImode, y_lo);
16105
16106 rtx cmp1
16107 = gen_rtx_LTU (DImode,
16108 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16109 const0_rtx);
16110
16111 if (!scratch)
16112 scratch = gen_rtx_SCRATCH (SImode);
16113 if (!arm_not_operand (y_hi, SImode))
16114 y_hi = force_reg (SImode, y_hi);
16115
16116 rtx_insn *insn;
16117 if (y_hi == const0_rtx)
16118 insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16119 cmp1));
16120 else if (CONST_INT_P (y_hi))
16121 {
16122 /* Constant is viewed as unsigned when zero-extended. */
16123 y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16124 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16125 y_hi, cmp1));
16126 }
16127 else
16128 insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16129 cmp1));
16130 return SET_DEST (single_set (insn));
16131 }
16132
16133 case LEU:
16134 case GTU:
16135 {
16136 /* During expansion, we only expect to get here if y is a
16137 constant that we want to handle, otherwise we should have
16138 swapped the operands already. */
16139 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16140
16141 if (!const_ok_for_arm (INTVAL (y_lo)))
16142 y_lo = force_reg (SImode, y_lo);
16143
16144 /* Perform a reverse subtract and compare. */
16145 rtx cmp1
16146 = gen_rtx_LTU (DImode,
16147 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16148 const0_rtx);
16149 y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16150 rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16151 x_hi, cmp1));
16152 return SET_DEST (single_set (insn));
16153 }
16154
16155 default:
16156 gcc_unreachable ();
16157 }
16158 }
16159
16160 /* X and Y are two things to compare using CODE. Emit the compare insn and
16161 return the rtx for register 0 in the proper mode. */
16162 rtx
16163 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16164 {
16165 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16166 return arm_gen_dicompare_reg (code, x, y, scratch);
16167
16168 machine_mode mode = SELECT_CC_MODE (code, x, y);
16169 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16170 if (mode == CC_RSBmode)
16171 {
16172 if (!scratch)
16173 scratch = gen_rtx_SCRATCH (SImode);
16174 emit_insn (gen_rsb_imm_compare_scratch (scratch,
16175 GEN_INT (~UINTVAL (x)), y));
16176 }
16177 else
16178 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16179
16180 return cc_reg;
16181 }
16182
16183 /* Generate a sequence of insns that will generate the correct return
16184 address mask depending on the physical architecture that the program
16185 is running on. */
16186 rtx
16187 arm_gen_return_addr_mask (void)
16188 {
16189 rtx reg = gen_reg_rtx (Pmode);
16190
16191 emit_insn (gen_return_addr_mask (reg));
16192 return reg;
16193 }
16194
16195 void
16196 arm_reload_in_hi (rtx *operands)
16197 {
16198 rtx ref = operands[1];
16199 rtx base, scratch;
16200 HOST_WIDE_INT offset = 0;
16201
16202 if (GET_CODE (ref) == SUBREG)
16203 {
16204 offset = SUBREG_BYTE (ref);
16205 ref = SUBREG_REG (ref);
16206 }
16207
16208 if (REG_P (ref))
16209 {
16210 /* We have a pseudo which has been spilt onto the stack; there
16211 are two cases here: the first where there is a simple
16212 stack-slot replacement and a second where the stack-slot is
16213 out of range, or is used as a subreg. */
16214 if (reg_equiv_mem (REGNO (ref)))
16215 {
16216 ref = reg_equiv_mem (REGNO (ref));
16217 base = find_replacement (&XEXP (ref, 0));
16218 }
16219 else
16220 /* The slot is out of range, or was dressed up in a SUBREG. */
16221 base = reg_equiv_address (REGNO (ref));
16222
16223 /* PR 62554: If there is no equivalent memory location then just move
16224 the value as an SImode register move. This happens when the target
16225 architecture variant does not have an HImode register move. */
16226 if (base == NULL)
16227 {
16228 gcc_assert (REG_P (operands[0]));
16229 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16230 gen_rtx_SUBREG (SImode, ref, 0)));
16231 return;
16232 }
16233 }
16234 else
16235 base = find_replacement (&XEXP (ref, 0));
16236
16237 /* Handle the case where the address is too complex to be offset by 1. */
16238 if (GET_CODE (base) == MINUS
16239 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16240 {
16241 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16242
16243 emit_set_insn (base_plus, base);
16244 base = base_plus;
16245 }
16246 else if (GET_CODE (base) == PLUS)
16247 {
16248 /* The addend must be CONST_INT, or we would have dealt with it above. */
16249 HOST_WIDE_INT hi, lo;
16250
16251 offset += INTVAL (XEXP (base, 1));
16252 base = XEXP (base, 0);
16253
16254 /* Rework the address into a legal sequence of insns. */
16255 /* Valid range for lo is -4095 -> 4095 */
16256 lo = (offset >= 0
16257 ? (offset & 0xfff)
16258 : -((-offset) & 0xfff));
16259
16260 /* Corner case, if lo is the max offset then we would be out of range
16261 once we have added the additional 1 below, so bump the msb into the
16262 pre-loading insn(s). */
16263 if (lo == 4095)
16264 lo &= 0x7ff;
16265
16266 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16267 ^ (HOST_WIDE_INT) 0x80000000)
16268 - (HOST_WIDE_INT) 0x80000000);
16269
16270 gcc_assert (hi + lo == offset);
16271
16272 if (hi != 0)
16273 {
16274 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16275
16276 /* Get the base address; addsi3 knows how to handle constants
16277 that require more than one insn. */
16278 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16279 base = base_plus;
16280 offset = lo;
16281 }
16282 }
16283
16284 /* Operands[2] may overlap operands[0] (though it won't overlap
16285 operands[1]), that's why we asked for a DImode reg -- so we can
16286 use the bit that does not overlap. */
16287 if (REGNO (operands[2]) == REGNO (operands[0]))
16288 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16289 else
16290 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16291
16292 emit_insn (gen_zero_extendqisi2 (scratch,
16293 gen_rtx_MEM (QImode,
16294 plus_constant (Pmode, base,
16295 offset))));
16296 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16297 gen_rtx_MEM (QImode,
16298 plus_constant (Pmode, base,
16299 offset + 1))));
16300 if (!BYTES_BIG_ENDIAN)
16301 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16302 gen_rtx_IOR (SImode,
16303 gen_rtx_ASHIFT
16304 (SImode,
16305 gen_rtx_SUBREG (SImode, operands[0], 0),
16306 GEN_INT (8)),
16307 scratch));
16308 else
16309 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16310 gen_rtx_IOR (SImode,
16311 gen_rtx_ASHIFT (SImode, scratch,
16312 GEN_INT (8)),
16313 gen_rtx_SUBREG (SImode, operands[0], 0)));
16314 }
16315
16316 /* Handle storing a half-word to memory during reload by synthesizing as two
16317 byte stores. Take care not to clobber the input values until after we
16318 have moved them somewhere safe. This code assumes that if the DImode
16319 scratch in operands[2] overlaps either the input value or output address
16320 in some way, then that value must die in this insn (we absolutely need
16321 two scratch registers for some corner cases). */
16322 void
16323 arm_reload_out_hi (rtx *operands)
16324 {
16325 rtx ref = operands[0];
16326 rtx outval = operands[1];
16327 rtx base, scratch;
16328 HOST_WIDE_INT offset = 0;
16329
16330 if (GET_CODE (ref) == SUBREG)
16331 {
16332 offset = SUBREG_BYTE (ref);
16333 ref = SUBREG_REG (ref);
16334 }
16335
16336 if (REG_P (ref))
16337 {
16338 /* We have a pseudo which has been spilt onto the stack; there
16339 are two cases here: the first where there is a simple
16340 stack-slot replacement and a second where the stack-slot is
16341 out of range, or is used as a subreg. */
16342 if (reg_equiv_mem (REGNO (ref)))
16343 {
16344 ref = reg_equiv_mem (REGNO (ref));
16345 base = find_replacement (&XEXP (ref, 0));
16346 }
16347 else
16348 /* The slot is out of range, or was dressed up in a SUBREG. */
16349 base = reg_equiv_address (REGNO (ref));
16350
16351 /* PR 62254: If there is no equivalent memory location then just move
16352 the value as an SImode register move. This happens when the target
16353 architecture variant does not have an HImode register move. */
16354 if (base == NULL)
16355 {
16356 gcc_assert (REG_P (outval) || SUBREG_P (outval));
16357
16358 if (REG_P (outval))
16359 {
16360 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16361 gen_rtx_SUBREG (SImode, outval, 0)));
16362 }
16363 else /* SUBREG_P (outval) */
16364 {
16365 if (GET_MODE (SUBREG_REG (outval)) == SImode)
16366 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16367 SUBREG_REG (outval)));
16368 else
16369 /* FIXME: Handle other cases ? */
16370 gcc_unreachable ();
16371 }
16372 return;
16373 }
16374 }
16375 else
16376 base = find_replacement (&XEXP (ref, 0));
16377
16378 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16379
16380 /* Handle the case where the address is too complex to be offset by 1. */
16381 if (GET_CODE (base) == MINUS
16382 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16383 {
16384 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16385
16386 /* Be careful not to destroy OUTVAL. */
16387 if (reg_overlap_mentioned_p (base_plus, outval))
16388 {
16389 /* Updating base_plus might destroy outval, see if we can
16390 swap the scratch and base_plus. */
16391 if (!reg_overlap_mentioned_p (scratch, outval))
16392 std::swap (scratch, base_plus);
16393 else
16394 {
16395 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16396
16397 /* Be conservative and copy OUTVAL into the scratch now,
16398 this should only be necessary if outval is a subreg
16399 of something larger than a word. */
16400 /* XXX Might this clobber base? I can't see how it can,
16401 since scratch is known to overlap with OUTVAL, and
16402 must be wider than a word. */
16403 emit_insn (gen_movhi (scratch_hi, outval));
16404 outval = scratch_hi;
16405 }
16406 }
16407
16408 emit_set_insn (base_plus, base);
16409 base = base_plus;
16410 }
16411 else if (GET_CODE (base) == PLUS)
16412 {
16413 /* The addend must be CONST_INT, or we would have dealt with it above. */
16414 HOST_WIDE_INT hi, lo;
16415
16416 offset += INTVAL (XEXP (base, 1));
16417 base = XEXP (base, 0);
16418
16419 /* Rework the address into a legal sequence of insns. */
16420 /* Valid range for lo is -4095 -> 4095 */
16421 lo = (offset >= 0
16422 ? (offset & 0xfff)
16423 : -((-offset) & 0xfff));
16424
16425 /* Corner case, if lo is the max offset then we would be out of range
16426 once we have added the additional 1 below, so bump the msb into the
16427 pre-loading insn(s). */
16428 if (lo == 4095)
16429 lo &= 0x7ff;
16430
16431 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16432 ^ (HOST_WIDE_INT) 0x80000000)
16433 - (HOST_WIDE_INT) 0x80000000);
16434
16435 gcc_assert (hi + lo == offset);
16436
16437 if (hi != 0)
16438 {
16439 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16440
16441 /* Be careful not to destroy OUTVAL. */
16442 if (reg_overlap_mentioned_p (base_plus, outval))
16443 {
16444 /* Updating base_plus might destroy outval, see if we
16445 can swap the scratch and base_plus. */
16446 if (!reg_overlap_mentioned_p (scratch, outval))
16447 std::swap (scratch, base_plus);
16448 else
16449 {
16450 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16451
16452 /* Be conservative and copy outval into scratch now,
16453 this should only be necessary if outval is a
16454 subreg of something larger than a word. */
16455 /* XXX Might this clobber base? I can't see how it
16456 can, since scratch is known to overlap with
16457 outval. */
16458 emit_insn (gen_movhi (scratch_hi, outval));
16459 outval = scratch_hi;
16460 }
16461 }
16462
16463 /* Get the base address; addsi3 knows how to handle constants
16464 that require more than one insn. */
16465 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16466 base = base_plus;
16467 offset = lo;
16468 }
16469 }
16470
16471 if (BYTES_BIG_ENDIAN)
16472 {
16473 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16474 plus_constant (Pmode, base,
16475 offset + 1)),
16476 gen_lowpart (QImode, outval)));
16477 emit_insn (gen_lshrsi3 (scratch,
16478 gen_rtx_SUBREG (SImode, outval, 0),
16479 GEN_INT (8)));
16480 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16481 offset)),
16482 gen_lowpart (QImode, scratch)));
16483 }
16484 else
16485 {
16486 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16487 offset)),
16488 gen_lowpart (QImode, outval)));
16489 emit_insn (gen_lshrsi3 (scratch,
16490 gen_rtx_SUBREG (SImode, outval, 0),
16491 GEN_INT (8)));
16492 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16493 plus_constant (Pmode, base,
16494 offset + 1)),
16495 gen_lowpart (QImode, scratch)));
16496 }
16497 }
16498
16499 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16500 (padded to the size of a word) should be passed in a register. */
16501
16502 static bool
16503 arm_must_pass_in_stack (const function_arg_info &arg)
16504 {
16505 if (TARGET_AAPCS_BASED)
16506 return must_pass_in_stack_var_size (arg);
16507 else
16508 return must_pass_in_stack_var_size_or_pad (arg);
16509 }
16510
16511
16512 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16513 byte of a stack argument has useful data. For legacy APCS ABIs we use
16514 the default. For AAPCS based ABIs small aggregate types are placed
16515 in the lowest memory address. */
16516
16517 static pad_direction
16518 arm_function_arg_padding (machine_mode mode, const_tree type)
16519 {
16520 if (!TARGET_AAPCS_BASED)
16521 return default_function_arg_padding (mode, type);
16522
16523 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16524 return PAD_DOWNWARD;
16525
16526 return PAD_UPWARD;
16527 }
16528
16529
16530 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16531 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16532 register has useful data, and return the opposite if the most
16533 significant byte does. */
16534
16535 bool
16536 arm_pad_reg_upward (machine_mode mode,
16537 tree type, int first ATTRIBUTE_UNUSED)
16538 {
16539 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16540 {
16541 /* For AAPCS, small aggregates, small fixed-point types,
16542 and small complex types are always padded upwards. */
16543 if (type)
16544 {
16545 if ((AGGREGATE_TYPE_P (type)
16546 || TREE_CODE (type) == COMPLEX_TYPE
16547 || FIXED_POINT_TYPE_P (type))
16548 && int_size_in_bytes (type) <= 4)
16549 return true;
16550 }
16551 else
16552 {
16553 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16554 && GET_MODE_SIZE (mode) <= 4)
16555 return true;
16556 }
16557 }
16558
16559 /* Otherwise, use default padding. */
16560 return !BYTES_BIG_ENDIAN;
16561 }
16562
16563 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16564 assuming that the address in the base register is word aligned. */
16565 bool
16566 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16567 {
16568 HOST_WIDE_INT max_offset;
16569
16570 /* Offset must be a multiple of 4 in Thumb mode. */
16571 if (TARGET_THUMB2 && ((offset & 3) != 0))
16572 return false;
16573
16574 if (TARGET_THUMB2)
16575 max_offset = 1020;
16576 else if (TARGET_ARM)
16577 max_offset = 255;
16578 else
16579 return false;
16580
16581 return ((offset <= max_offset) && (offset >= -max_offset));
16582 }
16583
16584 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16585 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16586 Assumes that the address in the base register RN is word aligned. Pattern
16587 guarantees that both memory accesses use the same base register,
16588 the offsets are constants within the range, and the gap between the offsets is 4.
16589 If preload complete then check that registers are legal. WBACK indicates whether
16590 address is updated. LOAD indicates whether memory access is load or store. */
16591 bool
16592 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16593 bool wback, bool load)
16594 {
16595 unsigned int t, t2, n;
16596
16597 if (!reload_completed)
16598 return true;
16599
16600 if (!offset_ok_for_ldrd_strd (offset))
16601 return false;
16602
16603 t = REGNO (rt);
16604 t2 = REGNO (rt2);
16605 n = REGNO (rn);
16606
16607 if ((TARGET_THUMB2)
16608 && ((wback && (n == t || n == t2))
16609 || (t == SP_REGNUM)
16610 || (t == PC_REGNUM)
16611 || (t2 == SP_REGNUM)
16612 || (t2 == PC_REGNUM)
16613 || (!load && (n == PC_REGNUM))
16614 || (load && (t == t2))
16615 /* Triggers Cortex-M3 LDRD errata. */
16616 || (!wback && load && fix_cm3_ldrd && (n == t))))
16617 return false;
16618
16619 if ((TARGET_ARM)
16620 && ((wback && (n == t || n == t2))
16621 || (t2 == PC_REGNUM)
16622 || (t % 2 != 0) /* First destination register is not even. */
16623 || (t2 != t + 1)
16624 /* PC can be used as base register (for offset addressing only),
16625 but it is depricated. */
16626 || (n == PC_REGNUM)))
16627 return false;
16628
16629 return true;
16630 }
16631
16632 /* Return true if a 64-bit access with alignment ALIGN and with a
16633 constant offset OFFSET from the base pointer is permitted on this
16634 architecture. */
16635 static bool
16636 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
16637 {
16638 return (unaligned_access
16639 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
16640 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
16641 }
16642
16643 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
16644 operand MEM's address contains an immediate offset from the base
16645 register and has no side effects, in which case it sets BASE,
16646 OFFSET and ALIGN accordingly. */
16647 static bool
16648 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
16649 {
16650 rtx addr;
16651
16652 gcc_assert (base != NULL && offset != NULL);
16653
16654 /* TODO: Handle more general memory operand patterns, such as
16655 PRE_DEC and PRE_INC. */
16656
16657 if (side_effects_p (mem))
16658 return false;
16659
16660 /* Can't deal with subregs. */
16661 if (GET_CODE (mem) == SUBREG)
16662 return false;
16663
16664 gcc_assert (MEM_P (mem));
16665
16666 *offset = const0_rtx;
16667 *align = MEM_ALIGN (mem);
16668
16669 addr = XEXP (mem, 0);
16670
16671 /* If addr isn't valid for DImode, then we can't handle it. */
16672 if (!arm_legitimate_address_p (DImode, addr,
16673 reload_in_progress || reload_completed))
16674 return false;
16675
16676 if (REG_P (addr))
16677 {
16678 *base = addr;
16679 return true;
16680 }
16681 else if (GET_CODE (addr) == PLUS)
16682 {
16683 *base = XEXP (addr, 0);
16684 *offset = XEXP (addr, 1);
16685 return (REG_P (*base) && CONST_INT_P (*offset));
16686 }
16687
16688 return false;
16689 }
16690
16691 /* Called from a peephole2 to replace two word-size accesses with a
16692 single LDRD/STRD instruction. Returns true iff we can generate a
16693 new instruction sequence. That is, both accesses use the same base
16694 register and the gap between constant offsets is 4. This function
16695 may reorder its operands to match ldrd/strd RTL templates.
16696 OPERANDS are the operands found by the peephole matcher;
16697 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16698 corresponding memory operands. LOAD indicaates whether the access
16699 is load or store. CONST_STORE indicates a store of constant
16700 integer values held in OPERANDS[4,5] and assumes that the pattern
16701 is of length 4 insn, for the purpose of checking dead registers.
16702 COMMUTE indicates that register operands may be reordered. */
16703 bool
16704 gen_operands_ldrd_strd (rtx *operands, bool load,
16705 bool const_store, bool commute)
16706 {
16707 int nops = 2;
16708 HOST_WIDE_INT offsets[2], offset, align[2];
16709 rtx base = NULL_RTX;
16710 rtx cur_base, cur_offset, tmp;
16711 int i, gap;
16712 HARD_REG_SET regset;
16713
16714 gcc_assert (!const_store || !load);
16715 /* Check that the memory references are immediate offsets from the
16716 same base register. Extract the base register, the destination
16717 registers, and the corresponding memory offsets. */
16718 for (i = 0; i < nops; i++)
16719 {
16720 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16721 &align[i]))
16722 return false;
16723
16724 if (i == 0)
16725 base = cur_base;
16726 else if (REGNO (base) != REGNO (cur_base))
16727 return false;
16728
16729 offsets[i] = INTVAL (cur_offset);
16730 if (GET_CODE (operands[i]) == SUBREG)
16731 {
16732 tmp = SUBREG_REG (operands[i]);
16733 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
16734 operands[i] = tmp;
16735 }
16736 }
16737
16738 /* Make sure there is no dependency between the individual loads. */
16739 if (load && REGNO (operands[0]) == REGNO (base))
16740 return false; /* RAW */
16741
16742 if (load && REGNO (operands[0]) == REGNO (operands[1]))
16743 return false; /* WAW */
16744
16745 /* If the same input register is used in both stores
16746 when storing different constants, try to find a free register.
16747 For example, the code
16748 mov r0, 0
16749 str r0, [r2]
16750 mov r0, 1
16751 str r0, [r2, #4]
16752 can be transformed into
16753 mov r1, 0
16754 mov r0, 1
16755 strd r1, r0, [r2]
16756 in Thumb mode assuming that r1 is free.
16757 For ARM mode do the same but only if the starting register
16758 can be made to be even. */
16759 if (const_store
16760 && REGNO (operands[0]) == REGNO (operands[1])
16761 && INTVAL (operands[4]) != INTVAL (operands[5]))
16762 {
16763 if (TARGET_THUMB2)
16764 {
16765 CLEAR_HARD_REG_SET (regset);
16766 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16767 if (tmp == NULL_RTX)
16768 return false;
16769
16770 /* Use the new register in the first load to ensure that
16771 if the original input register is not dead after peephole,
16772 then it will have the correct constant value. */
16773 operands[0] = tmp;
16774 }
16775 else if (TARGET_ARM)
16776 {
16777 int regno = REGNO (operands[0]);
16778 if (!peep2_reg_dead_p (4, operands[0]))
16779 {
16780 /* When the input register is even and is not dead after the
16781 pattern, it has to hold the second constant but we cannot
16782 form a legal STRD in ARM mode with this register as the second
16783 register. */
16784 if (regno % 2 == 0)
16785 return false;
16786
16787 /* Is regno-1 free? */
16788 SET_HARD_REG_SET (regset);
16789 CLEAR_HARD_REG_BIT(regset, regno - 1);
16790 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16791 if (tmp == NULL_RTX)
16792 return false;
16793
16794 operands[0] = tmp;
16795 }
16796 else
16797 {
16798 /* Find a DImode register. */
16799 CLEAR_HARD_REG_SET (regset);
16800 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16801 if (tmp != NULL_RTX)
16802 {
16803 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16804 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16805 }
16806 else
16807 {
16808 /* Can we use the input register to form a DI register? */
16809 SET_HARD_REG_SET (regset);
16810 CLEAR_HARD_REG_BIT(regset,
16811 regno % 2 == 0 ? regno + 1 : regno - 1);
16812 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16813 if (tmp == NULL_RTX)
16814 return false;
16815 operands[regno % 2 == 1 ? 0 : 1] = tmp;
16816 }
16817 }
16818
16819 gcc_assert (operands[0] != NULL_RTX);
16820 gcc_assert (operands[1] != NULL_RTX);
16821 gcc_assert (REGNO (operands[0]) % 2 == 0);
16822 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
16823 }
16824 }
16825
16826 /* Make sure the instructions are ordered with lower memory access first. */
16827 if (offsets[0] > offsets[1])
16828 {
16829 gap = offsets[0] - offsets[1];
16830 offset = offsets[1];
16831
16832 /* Swap the instructions such that lower memory is accessed first. */
16833 std::swap (operands[0], operands[1]);
16834 std::swap (operands[2], operands[3]);
16835 std::swap (align[0], align[1]);
16836 if (const_store)
16837 std::swap (operands[4], operands[5]);
16838 }
16839 else
16840 {
16841 gap = offsets[1] - offsets[0];
16842 offset = offsets[0];
16843 }
16844
16845 /* Make sure accesses are to consecutive memory locations. */
16846 if (gap != GET_MODE_SIZE (SImode))
16847 return false;
16848
16849 if (!align_ok_ldrd_strd (align[0], offset))
16850 return false;
16851
16852 /* Make sure we generate legal instructions. */
16853 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16854 false, load))
16855 return true;
16856
16857 /* In Thumb state, where registers are almost unconstrained, there
16858 is little hope to fix it. */
16859 if (TARGET_THUMB2)
16860 return false;
16861
16862 if (load && commute)
16863 {
16864 /* Try reordering registers. */
16865 std::swap (operands[0], operands[1]);
16866 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16867 false, load))
16868 return true;
16869 }
16870
16871 if (const_store)
16872 {
16873 /* If input registers are dead after this pattern, they can be
16874 reordered or replaced by other registers that are free in the
16875 current pattern. */
16876 if (!peep2_reg_dead_p (4, operands[0])
16877 || !peep2_reg_dead_p (4, operands[1]))
16878 return false;
16879
16880 /* Try to reorder the input registers. */
16881 /* For example, the code
16882 mov r0, 0
16883 mov r1, 1
16884 str r1, [r2]
16885 str r0, [r2, #4]
16886 can be transformed into
16887 mov r1, 0
16888 mov r0, 1
16889 strd r0, [r2]
16890 */
16891 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
16892 false, false))
16893 {
16894 std::swap (operands[0], operands[1]);
16895 return true;
16896 }
16897
16898 /* Try to find a free DI register. */
16899 CLEAR_HARD_REG_SET (regset);
16900 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16901 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16902 while (true)
16903 {
16904 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16905 if (tmp == NULL_RTX)
16906 return false;
16907
16908 /* DREG must be an even-numbered register in DImode.
16909 Split it into SI registers. */
16910 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16911 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16912 gcc_assert (operands[0] != NULL_RTX);
16913 gcc_assert (operands[1] != NULL_RTX);
16914 gcc_assert (REGNO (operands[0]) % 2 == 0);
16915 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16916
16917 return (operands_ok_ldrd_strd (operands[0], operands[1],
16918 base, offset,
16919 false, load));
16920 }
16921 }
16922
16923 return false;
16924 }
16925
16926
16927 /* Return true if parallel execution of the two word-size accesses provided
16928 could be satisfied with a single LDRD/STRD instruction. Two word-size
16929 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
16930 register operands and OPERANDS[2,3] are the corresponding memory operands.
16931 */
16932 bool
16933 valid_operands_ldrd_strd (rtx *operands, bool load)
16934 {
16935 int nops = 2;
16936 HOST_WIDE_INT offsets[2], offset, align[2];
16937 rtx base = NULL_RTX;
16938 rtx cur_base, cur_offset;
16939 int i, gap;
16940
16941 /* Check that the memory references are immediate offsets from the
16942 same base register. Extract the base register, the destination
16943 registers, and the corresponding memory offsets. */
16944 for (i = 0; i < nops; i++)
16945 {
16946 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16947 &align[i]))
16948 return false;
16949
16950 if (i == 0)
16951 base = cur_base;
16952 else if (REGNO (base) != REGNO (cur_base))
16953 return false;
16954
16955 offsets[i] = INTVAL (cur_offset);
16956 if (GET_CODE (operands[i]) == SUBREG)
16957 return false;
16958 }
16959
16960 if (offsets[0] > offsets[1])
16961 return false;
16962
16963 gap = offsets[1] - offsets[0];
16964 offset = offsets[0];
16965
16966 /* Make sure accesses are to consecutive memory locations. */
16967 if (gap != GET_MODE_SIZE (SImode))
16968 return false;
16969
16970 if (!align_ok_ldrd_strd (align[0], offset))
16971 return false;
16972
16973 return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16974 false, load);
16975 }
16976
16977 \f
16978 /* Print a symbolic form of X to the debug file, F. */
16979 static void
16980 arm_print_value (FILE *f, rtx x)
16981 {
16982 switch (GET_CODE (x))
16983 {
16984 case CONST_INT:
16985 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16986 return;
16987
16988 case CONST_DOUBLE:
16989 {
16990 char fpstr[20];
16991 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16992 sizeof (fpstr), 0, 1);
16993 fputs (fpstr, f);
16994 }
16995 return;
16996
16997 case CONST_VECTOR:
16998 {
16999 int i;
17000
17001 fprintf (f, "<");
17002 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17003 {
17004 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17005 if (i < (CONST_VECTOR_NUNITS (x) - 1))
17006 fputc (',', f);
17007 }
17008 fprintf (f, ">");
17009 }
17010 return;
17011
17012 case CONST_STRING:
17013 fprintf (f, "\"%s\"", XSTR (x, 0));
17014 return;
17015
17016 case SYMBOL_REF:
17017 fprintf (f, "`%s'", XSTR (x, 0));
17018 return;
17019
17020 case LABEL_REF:
17021 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17022 return;
17023
17024 case CONST:
17025 arm_print_value (f, XEXP (x, 0));
17026 return;
17027
17028 case PLUS:
17029 arm_print_value (f, XEXP (x, 0));
17030 fprintf (f, "+");
17031 arm_print_value (f, XEXP (x, 1));
17032 return;
17033
17034 case PC:
17035 fprintf (f, "pc");
17036 return;
17037
17038 default:
17039 fprintf (f, "????");
17040 return;
17041 }
17042 }
17043 \f
17044 /* Routines for manipulation of the constant pool. */
17045
17046 /* Arm instructions cannot load a large constant directly into a
17047 register; they have to come from a pc relative load. The constant
17048 must therefore be placed in the addressable range of the pc
17049 relative load. Depending on the precise pc relative load
17050 instruction the range is somewhere between 256 bytes and 4k. This
17051 means that we often have to dump a constant inside a function, and
17052 generate code to branch around it.
17053
17054 It is important to minimize this, since the branches will slow
17055 things down and make the code larger.
17056
17057 Normally we can hide the table after an existing unconditional
17058 branch so that there is no interruption of the flow, but in the
17059 worst case the code looks like this:
17060
17061 ldr rn, L1
17062 ...
17063 b L2
17064 align
17065 L1: .long value
17066 L2:
17067 ...
17068
17069 ldr rn, L3
17070 ...
17071 b L4
17072 align
17073 L3: .long value
17074 L4:
17075 ...
17076
17077 We fix this by performing a scan after scheduling, which notices
17078 which instructions need to have their operands fetched from the
17079 constant table and builds the table.
17080
17081 The algorithm starts by building a table of all the constants that
17082 need fixing up and all the natural barriers in the function (places
17083 where a constant table can be dropped without breaking the flow).
17084 For each fixup we note how far the pc-relative replacement will be
17085 able to reach and the offset of the instruction into the function.
17086
17087 Having built the table we then group the fixes together to form
17088 tables that are as large as possible (subject to addressing
17089 constraints) and emit each table of constants after the last
17090 barrier that is within range of all the instructions in the group.
17091 If a group does not contain a barrier, then we forcibly create one
17092 by inserting a jump instruction into the flow. Once the table has
17093 been inserted, the insns are then modified to reference the
17094 relevant entry in the pool.
17095
17096 Possible enhancements to the algorithm (not implemented) are:
17097
17098 1) For some processors and object formats, there may be benefit in
17099 aligning the pools to the start of cache lines; this alignment
17100 would need to be taken into account when calculating addressability
17101 of a pool. */
17102
17103 /* These typedefs are located at the start of this file, so that
17104 they can be used in the prototypes there. This comment is to
17105 remind readers of that fact so that the following structures
17106 can be understood more easily.
17107
17108 typedef struct minipool_node Mnode;
17109 typedef struct minipool_fixup Mfix; */
17110
17111 struct minipool_node
17112 {
17113 /* Doubly linked chain of entries. */
17114 Mnode * next;
17115 Mnode * prev;
17116 /* The maximum offset into the code that this entry can be placed. While
17117 pushing fixes for forward references, all entries are sorted in order
17118 of increasing max_address. */
17119 HOST_WIDE_INT max_address;
17120 /* Similarly for an entry inserted for a backwards ref. */
17121 HOST_WIDE_INT min_address;
17122 /* The number of fixes referencing this entry. This can become zero
17123 if we "unpush" an entry. In this case we ignore the entry when we
17124 come to emit the code. */
17125 int refcount;
17126 /* The offset from the start of the minipool. */
17127 HOST_WIDE_INT offset;
17128 /* The value in table. */
17129 rtx value;
17130 /* The mode of value. */
17131 machine_mode mode;
17132 /* The size of the value. With iWMMXt enabled
17133 sizes > 4 also imply an alignment of 8-bytes. */
17134 int fix_size;
17135 };
17136
17137 struct minipool_fixup
17138 {
17139 Mfix * next;
17140 rtx_insn * insn;
17141 HOST_WIDE_INT address;
17142 rtx * loc;
17143 machine_mode mode;
17144 int fix_size;
17145 rtx value;
17146 Mnode * minipool;
17147 HOST_WIDE_INT forwards;
17148 HOST_WIDE_INT backwards;
17149 };
17150
17151 /* Fixes less than a word need padding out to a word boundary. */
17152 #define MINIPOOL_FIX_SIZE(mode) \
17153 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17154
17155 static Mnode * minipool_vector_head;
17156 static Mnode * minipool_vector_tail;
17157 static rtx_code_label *minipool_vector_label;
17158 static int minipool_pad;
17159
17160 /* The linked list of all minipool fixes required for this function. */
17161 Mfix * minipool_fix_head;
17162 Mfix * minipool_fix_tail;
17163 /* The fix entry for the current minipool, once it has been placed. */
17164 Mfix * minipool_barrier;
17165
17166 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17167 #define JUMP_TABLES_IN_TEXT_SECTION 0
17168 #endif
17169
17170 static HOST_WIDE_INT
17171 get_jump_table_size (rtx_jump_table_data *insn)
17172 {
17173 /* ADDR_VECs only take room if read-only data does into the text
17174 section. */
17175 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17176 {
17177 rtx body = PATTERN (insn);
17178 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17179 HOST_WIDE_INT size;
17180 HOST_WIDE_INT modesize;
17181
17182 modesize = GET_MODE_SIZE (GET_MODE (body));
17183 size = modesize * XVECLEN (body, elt);
17184 switch (modesize)
17185 {
17186 case 1:
17187 /* Round up size of TBB table to a halfword boundary. */
17188 size = (size + 1) & ~HOST_WIDE_INT_1;
17189 break;
17190 case 2:
17191 /* No padding necessary for TBH. */
17192 break;
17193 case 4:
17194 /* Add two bytes for alignment on Thumb. */
17195 if (TARGET_THUMB)
17196 size += 2;
17197 break;
17198 default:
17199 gcc_unreachable ();
17200 }
17201 return size;
17202 }
17203
17204 return 0;
17205 }
17206
17207 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17208 function descriptor) into a register and the GOT address into the
17209 FDPIC register, returning an rtx for the register holding the
17210 function address. */
17211
17212 rtx
17213 arm_load_function_descriptor (rtx funcdesc)
17214 {
17215 rtx fnaddr_reg = gen_reg_rtx (Pmode);
17216 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17217 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17218 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17219
17220 emit_move_insn (fnaddr_reg, fnaddr);
17221
17222 /* The ABI requires the entry point address to be loaded first, but
17223 since we cannot support lazy binding for lack of atomic load of
17224 two 32-bits values, we do not need to bother to prevent the
17225 previous load from being moved after that of the GOT address. */
17226 emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17227
17228 return fnaddr_reg;
17229 }
17230
17231 /* Return the maximum amount of padding that will be inserted before
17232 label LABEL. */
17233 static HOST_WIDE_INT
17234 get_label_padding (rtx label)
17235 {
17236 HOST_WIDE_INT align, min_insn_size;
17237
17238 align = 1 << label_to_alignment (label).levels[0].log;
17239 min_insn_size = TARGET_THUMB ? 2 : 4;
17240 return align > min_insn_size ? align - min_insn_size : 0;
17241 }
17242
17243 /* Move a minipool fix MP from its current location to before MAX_MP.
17244 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17245 constraints may need updating. */
17246 static Mnode *
17247 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17248 HOST_WIDE_INT max_address)
17249 {
17250 /* The code below assumes these are different. */
17251 gcc_assert (mp != max_mp);
17252
17253 if (max_mp == NULL)
17254 {
17255 if (max_address < mp->max_address)
17256 mp->max_address = max_address;
17257 }
17258 else
17259 {
17260 if (max_address > max_mp->max_address - mp->fix_size)
17261 mp->max_address = max_mp->max_address - mp->fix_size;
17262 else
17263 mp->max_address = max_address;
17264
17265 /* Unlink MP from its current position. Since max_mp is non-null,
17266 mp->prev must be non-null. */
17267 mp->prev->next = mp->next;
17268 if (mp->next != NULL)
17269 mp->next->prev = mp->prev;
17270 else
17271 minipool_vector_tail = mp->prev;
17272
17273 /* Re-insert it before MAX_MP. */
17274 mp->next = max_mp;
17275 mp->prev = max_mp->prev;
17276 max_mp->prev = mp;
17277
17278 if (mp->prev != NULL)
17279 mp->prev->next = mp;
17280 else
17281 minipool_vector_head = mp;
17282 }
17283
17284 /* Save the new entry. */
17285 max_mp = mp;
17286
17287 /* Scan over the preceding entries and adjust their addresses as
17288 required. */
17289 while (mp->prev != NULL
17290 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17291 {
17292 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17293 mp = mp->prev;
17294 }
17295
17296 return max_mp;
17297 }
17298
17299 /* Add a constant to the minipool for a forward reference. Returns the
17300 node added or NULL if the constant will not fit in this pool. */
17301 static Mnode *
17302 add_minipool_forward_ref (Mfix *fix)
17303 {
17304 /* If set, max_mp is the first pool_entry that has a lower
17305 constraint than the one we are trying to add. */
17306 Mnode * max_mp = NULL;
17307 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17308 Mnode * mp;
17309
17310 /* If the minipool starts before the end of FIX->INSN then this FIX
17311 cannot be placed into the current pool. Furthermore, adding the
17312 new constant pool entry may cause the pool to start FIX_SIZE bytes
17313 earlier. */
17314 if (minipool_vector_head &&
17315 (fix->address + get_attr_length (fix->insn)
17316 >= minipool_vector_head->max_address - fix->fix_size))
17317 return NULL;
17318
17319 /* Scan the pool to see if a constant with the same value has
17320 already been added. While we are doing this, also note the
17321 location where we must insert the constant if it doesn't already
17322 exist. */
17323 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17324 {
17325 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17326 && fix->mode == mp->mode
17327 && (!LABEL_P (fix->value)
17328 || (CODE_LABEL_NUMBER (fix->value)
17329 == CODE_LABEL_NUMBER (mp->value)))
17330 && rtx_equal_p (fix->value, mp->value))
17331 {
17332 /* More than one fix references this entry. */
17333 mp->refcount++;
17334 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17335 }
17336
17337 /* Note the insertion point if necessary. */
17338 if (max_mp == NULL
17339 && mp->max_address > max_address)
17340 max_mp = mp;
17341
17342 /* If we are inserting an 8-bytes aligned quantity and
17343 we have not already found an insertion point, then
17344 make sure that all such 8-byte aligned quantities are
17345 placed at the start of the pool. */
17346 if (ARM_DOUBLEWORD_ALIGN
17347 && max_mp == NULL
17348 && fix->fix_size >= 8
17349 && mp->fix_size < 8)
17350 {
17351 max_mp = mp;
17352 max_address = mp->max_address;
17353 }
17354 }
17355
17356 /* The value is not currently in the minipool, so we need to create
17357 a new entry for it. If MAX_MP is NULL, the entry will be put on
17358 the end of the list since the placement is less constrained than
17359 any existing entry. Otherwise, we insert the new fix before
17360 MAX_MP and, if necessary, adjust the constraints on the other
17361 entries. */
17362 mp = XNEW (Mnode);
17363 mp->fix_size = fix->fix_size;
17364 mp->mode = fix->mode;
17365 mp->value = fix->value;
17366 mp->refcount = 1;
17367 /* Not yet required for a backwards ref. */
17368 mp->min_address = -65536;
17369
17370 if (max_mp == NULL)
17371 {
17372 mp->max_address = max_address;
17373 mp->next = NULL;
17374 mp->prev = minipool_vector_tail;
17375
17376 if (mp->prev == NULL)
17377 {
17378 minipool_vector_head = mp;
17379 minipool_vector_label = gen_label_rtx ();
17380 }
17381 else
17382 mp->prev->next = mp;
17383
17384 minipool_vector_tail = mp;
17385 }
17386 else
17387 {
17388 if (max_address > max_mp->max_address - mp->fix_size)
17389 mp->max_address = max_mp->max_address - mp->fix_size;
17390 else
17391 mp->max_address = max_address;
17392
17393 mp->next = max_mp;
17394 mp->prev = max_mp->prev;
17395 max_mp->prev = mp;
17396 if (mp->prev != NULL)
17397 mp->prev->next = mp;
17398 else
17399 minipool_vector_head = mp;
17400 }
17401
17402 /* Save the new entry. */
17403 max_mp = mp;
17404
17405 /* Scan over the preceding entries and adjust their addresses as
17406 required. */
17407 while (mp->prev != NULL
17408 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17409 {
17410 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17411 mp = mp->prev;
17412 }
17413
17414 return max_mp;
17415 }
17416
17417 static Mnode *
17418 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17419 HOST_WIDE_INT min_address)
17420 {
17421 HOST_WIDE_INT offset;
17422
17423 /* The code below assumes these are different. */
17424 gcc_assert (mp != min_mp);
17425
17426 if (min_mp == NULL)
17427 {
17428 if (min_address > mp->min_address)
17429 mp->min_address = min_address;
17430 }
17431 else
17432 {
17433 /* We will adjust this below if it is too loose. */
17434 mp->min_address = min_address;
17435
17436 /* Unlink MP from its current position. Since min_mp is non-null,
17437 mp->next must be non-null. */
17438 mp->next->prev = mp->prev;
17439 if (mp->prev != NULL)
17440 mp->prev->next = mp->next;
17441 else
17442 minipool_vector_head = mp->next;
17443
17444 /* Reinsert it after MIN_MP. */
17445 mp->prev = min_mp;
17446 mp->next = min_mp->next;
17447 min_mp->next = mp;
17448 if (mp->next != NULL)
17449 mp->next->prev = mp;
17450 else
17451 minipool_vector_tail = mp;
17452 }
17453
17454 min_mp = mp;
17455
17456 offset = 0;
17457 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17458 {
17459 mp->offset = offset;
17460 if (mp->refcount > 0)
17461 offset += mp->fix_size;
17462
17463 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17464 mp->next->min_address = mp->min_address + mp->fix_size;
17465 }
17466
17467 return min_mp;
17468 }
17469
17470 /* Add a constant to the minipool for a backward reference. Returns the
17471 node added or NULL if the constant will not fit in this pool.
17472
17473 Note that the code for insertion for a backwards reference can be
17474 somewhat confusing because the calculated offsets for each fix do
17475 not take into account the size of the pool (which is still under
17476 construction. */
17477 static Mnode *
17478 add_minipool_backward_ref (Mfix *fix)
17479 {
17480 /* If set, min_mp is the last pool_entry that has a lower constraint
17481 than the one we are trying to add. */
17482 Mnode *min_mp = NULL;
17483 /* This can be negative, since it is only a constraint. */
17484 HOST_WIDE_INT min_address = fix->address - fix->backwards;
17485 Mnode *mp;
17486
17487 /* If we can't reach the current pool from this insn, or if we can't
17488 insert this entry at the end of the pool without pushing other
17489 fixes out of range, then we don't try. This ensures that we
17490 can't fail later on. */
17491 if (min_address >= minipool_barrier->address
17492 || (minipool_vector_tail->min_address + fix->fix_size
17493 >= minipool_barrier->address))
17494 return NULL;
17495
17496 /* Scan the pool to see if a constant with the same value has
17497 already been added. While we are doing this, also note the
17498 location where we must insert the constant if it doesn't already
17499 exist. */
17500 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17501 {
17502 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17503 && fix->mode == mp->mode
17504 && (!LABEL_P (fix->value)
17505 || (CODE_LABEL_NUMBER (fix->value)
17506 == CODE_LABEL_NUMBER (mp->value)))
17507 && rtx_equal_p (fix->value, mp->value)
17508 /* Check that there is enough slack to move this entry to the
17509 end of the table (this is conservative). */
17510 && (mp->max_address
17511 > (minipool_barrier->address
17512 + minipool_vector_tail->offset
17513 + minipool_vector_tail->fix_size)))
17514 {
17515 mp->refcount++;
17516 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17517 }
17518
17519 if (min_mp != NULL)
17520 mp->min_address += fix->fix_size;
17521 else
17522 {
17523 /* Note the insertion point if necessary. */
17524 if (mp->min_address < min_address)
17525 {
17526 /* For now, we do not allow the insertion of 8-byte alignment
17527 requiring nodes anywhere but at the start of the pool. */
17528 if (ARM_DOUBLEWORD_ALIGN
17529 && fix->fix_size >= 8 && mp->fix_size < 8)
17530 return NULL;
17531 else
17532 min_mp = mp;
17533 }
17534 else if (mp->max_address
17535 < minipool_barrier->address + mp->offset + fix->fix_size)
17536 {
17537 /* Inserting before this entry would push the fix beyond
17538 its maximum address (which can happen if we have
17539 re-located a forwards fix); force the new fix to come
17540 after it. */
17541 if (ARM_DOUBLEWORD_ALIGN
17542 && fix->fix_size >= 8 && mp->fix_size < 8)
17543 return NULL;
17544 else
17545 {
17546 min_mp = mp;
17547 min_address = mp->min_address + fix->fix_size;
17548 }
17549 }
17550 /* Do not insert a non-8-byte aligned quantity before 8-byte
17551 aligned quantities. */
17552 else if (ARM_DOUBLEWORD_ALIGN
17553 && fix->fix_size < 8
17554 && mp->fix_size >= 8)
17555 {
17556 min_mp = mp;
17557 min_address = mp->min_address + fix->fix_size;
17558 }
17559 }
17560 }
17561
17562 /* We need to create a new entry. */
17563 mp = XNEW (Mnode);
17564 mp->fix_size = fix->fix_size;
17565 mp->mode = fix->mode;
17566 mp->value = fix->value;
17567 mp->refcount = 1;
17568 mp->max_address = minipool_barrier->address + 65536;
17569
17570 mp->min_address = min_address;
17571
17572 if (min_mp == NULL)
17573 {
17574 mp->prev = NULL;
17575 mp->next = minipool_vector_head;
17576
17577 if (mp->next == NULL)
17578 {
17579 minipool_vector_tail = mp;
17580 minipool_vector_label = gen_label_rtx ();
17581 }
17582 else
17583 mp->next->prev = mp;
17584
17585 minipool_vector_head = mp;
17586 }
17587 else
17588 {
17589 mp->next = min_mp->next;
17590 mp->prev = min_mp;
17591 min_mp->next = mp;
17592
17593 if (mp->next != NULL)
17594 mp->next->prev = mp;
17595 else
17596 minipool_vector_tail = mp;
17597 }
17598
17599 /* Save the new entry. */
17600 min_mp = mp;
17601
17602 if (mp->prev)
17603 mp = mp->prev;
17604 else
17605 mp->offset = 0;
17606
17607 /* Scan over the following entries and adjust their offsets. */
17608 while (mp->next != NULL)
17609 {
17610 if (mp->next->min_address < mp->min_address + mp->fix_size)
17611 mp->next->min_address = mp->min_address + mp->fix_size;
17612
17613 if (mp->refcount)
17614 mp->next->offset = mp->offset + mp->fix_size;
17615 else
17616 mp->next->offset = mp->offset;
17617
17618 mp = mp->next;
17619 }
17620
17621 return min_mp;
17622 }
17623
17624 static void
17625 assign_minipool_offsets (Mfix *barrier)
17626 {
17627 HOST_WIDE_INT offset = 0;
17628 Mnode *mp;
17629
17630 minipool_barrier = barrier;
17631
17632 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17633 {
17634 mp->offset = offset;
17635
17636 if (mp->refcount > 0)
17637 offset += mp->fix_size;
17638 }
17639 }
17640
17641 /* Output the literal table */
17642 static void
17643 dump_minipool (rtx_insn *scan)
17644 {
17645 Mnode * mp;
17646 Mnode * nmp;
17647 int align64 = 0;
17648
17649 if (ARM_DOUBLEWORD_ALIGN)
17650 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17651 if (mp->refcount > 0 && mp->fix_size >= 8)
17652 {
17653 align64 = 1;
17654 break;
17655 }
17656
17657 if (dump_file)
17658 fprintf (dump_file,
17659 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17660 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
17661
17662 scan = emit_label_after (gen_label_rtx (), scan);
17663 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
17664 scan = emit_label_after (minipool_vector_label, scan);
17665
17666 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
17667 {
17668 if (mp->refcount > 0)
17669 {
17670 if (dump_file)
17671 {
17672 fprintf (dump_file,
17673 ";; Offset %u, min %ld, max %ld ",
17674 (unsigned) mp->offset, (unsigned long) mp->min_address,
17675 (unsigned long) mp->max_address);
17676 arm_print_value (dump_file, mp->value);
17677 fputc ('\n', dump_file);
17678 }
17679
17680 rtx val = copy_rtx (mp->value);
17681
17682 switch (GET_MODE_SIZE (mp->mode))
17683 {
17684 #ifdef HAVE_consttable_1
17685 case 1:
17686 scan = emit_insn_after (gen_consttable_1 (val), scan);
17687 break;
17688
17689 #endif
17690 #ifdef HAVE_consttable_2
17691 case 2:
17692 scan = emit_insn_after (gen_consttable_2 (val), scan);
17693 break;
17694
17695 #endif
17696 #ifdef HAVE_consttable_4
17697 case 4:
17698 scan = emit_insn_after (gen_consttable_4 (val), scan);
17699 break;
17700
17701 #endif
17702 #ifdef HAVE_consttable_8
17703 case 8:
17704 scan = emit_insn_after (gen_consttable_8 (val), scan);
17705 break;
17706
17707 #endif
17708 #ifdef HAVE_consttable_16
17709 case 16:
17710 scan = emit_insn_after (gen_consttable_16 (val), scan);
17711 break;
17712
17713 #endif
17714 default:
17715 gcc_unreachable ();
17716 }
17717 }
17718
17719 nmp = mp->next;
17720 free (mp);
17721 }
17722
17723 minipool_vector_head = minipool_vector_tail = NULL;
17724 scan = emit_insn_after (gen_consttable_end (), scan);
17725 scan = emit_barrier_after (scan);
17726 }
17727
17728 /* Return the cost of forcibly inserting a barrier after INSN. */
17729 static int
17730 arm_barrier_cost (rtx_insn *insn)
17731 {
17732 /* Basing the location of the pool on the loop depth is preferable,
17733 but at the moment, the basic block information seems to be
17734 corrupt by this stage of the compilation. */
17735 int base_cost = 50;
17736 rtx_insn *next = next_nonnote_insn (insn);
17737
17738 if (next != NULL && LABEL_P (next))
17739 base_cost -= 20;
17740
17741 switch (GET_CODE (insn))
17742 {
17743 case CODE_LABEL:
17744 /* It will always be better to place the table before the label, rather
17745 than after it. */
17746 return 50;
17747
17748 case INSN:
17749 case CALL_INSN:
17750 return base_cost;
17751
17752 case JUMP_INSN:
17753 return base_cost - 10;
17754
17755 default:
17756 return base_cost + 10;
17757 }
17758 }
17759
17760 /* Find the best place in the insn stream in the range
17761 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
17762 Create the barrier by inserting a jump and add a new fix entry for
17763 it. */
17764 static Mfix *
17765 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
17766 {
17767 HOST_WIDE_INT count = 0;
17768 rtx_barrier *barrier;
17769 rtx_insn *from = fix->insn;
17770 /* The instruction after which we will insert the jump. */
17771 rtx_insn *selected = NULL;
17772 int selected_cost;
17773 /* The address at which the jump instruction will be placed. */
17774 HOST_WIDE_INT selected_address;
17775 Mfix * new_fix;
17776 HOST_WIDE_INT max_count = max_address - fix->address;
17777 rtx_code_label *label = gen_label_rtx ();
17778
17779 selected_cost = arm_barrier_cost (from);
17780 selected_address = fix->address;
17781
17782 while (from && count < max_count)
17783 {
17784 rtx_jump_table_data *tmp;
17785 int new_cost;
17786
17787 /* This code shouldn't have been called if there was a natural barrier
17788 within range. */
17789 gcc_assert (!BARRIER_P (from));
17790
17791 /* Count the length of this insn. This must stay in sync with the
17792 code that pushes minipool fixes. */
17793 if (LABEL_P (from))
17794 count += get_label_padding (from);
17795 else
17796 count += get_attr_length (from);
17797
17798 /* If there is a jump table, add its length. */
17799 if (tablejump_p (from, NULL, &tmp))
17800 {
17801 count += get_jump_table_size (tmp);
17802
17803 /* Jump tables aren't in a basic block, so base the cost on
17804 the dispatch insn. If we select this location, we will
17805 still put the pool after the table. */
17806 new_cost = arm_barrier_cost (from);
17807
17808 if (count < max_count
17809 && (!selected || new_cost <= selected_cost))
17810 {
17811 selected = tmp;
17812 selected_cost = new_cost;
17813 selected_address = fix->address + count;
17814 }
17815
17816 /* Continue after the dispatch table. */
17817 from = NEXT_INSN (tmp);
17818 continue;
17819 }
17820
17821 new_cost = arm_barrier_cost (from);
17822
17823 if (count < max_count
17824 && (!selected || new_cost <= selected_cost))
17825 {
17826 selected = from;
17827 selected_cost = new_cost;
17828 selected_address = fix->address + count;
17829 }
17830
17831 from = NEXT_INSN (from);
17832 }
17833
17834 /* Make sure that we found a place to insert the jump. */
17835 gcc_assert (selected);
17836
17837 /* Create a new JUMP_INSN that branches around a barrier. */
17838 from = emit_jump_insn_after (gen_jump (label), selected);
17839 JUMP_LABEL (from) = label;
17840 barrier = emit_barrier_after (from);
17841 emit_label_after (label, barrier);
17842
17843 /* Create a minipool barrier entry for the new barrier. */
17844 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
17845 new_fix->insn = barrier;
17846 new_fix->address = selected_address;
17847 new_fix->next = fix->next;
17848 fix->next = new_fix;
17849
17850 return new_fix;
17851 }
17852
17853 /* Record that there is a natural barrier in the insn stream at
17854 ADDRESS. */
17855 static void
17856 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
17857 {
17858 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17859
17860 fix->insn = insn;
17861 fix->address = address;
17862
17863 fix->next = NULL;
17864 if (minipool_fix_head != NULL)
17865 minipool_fix_tail->next = fix;
17866 else
17867 minipool_fix_head = fix;
17868
17869 minipool_fix_tail = fix;
17870 }
17871
17872 /* Record INSN, which will need fixing up to load a value from the
17873 minipool. ADDRESS is the offset of the insn since the start of the
17874 function; LOC is a pointer to the part of the insn which requires
17875 fixing; VALUE is the constant that must be loaded, which is of type
17876 MODE. */
17877 static void
17878 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
17879 machine_mode mode, rtx value)
17880 {
17881 gcc_assert (!arm_disable_literal_pool);
17882 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17883
17884 fix->insn = insn;
17885 fix->address = address;
17886 fix->loc = loc;
17887 fix->mode = mode;
17888 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
17889 fix->value = value;
17890 fix->forwards = get_attr_pool_range (insn);
17891 fix->backwards = get_attr_neg_pool_range (insn);
17892 fix->minipool = NULL;
17893
17894 /* If an insn doesn't have a range defined for it, then it isn't
17895 expecting to be reworked by this code. Better to stop now than
17896 to generate duff assembly code. */
17897 gcc_assert (fix->forwards || fix->backwards);
17898
17899 /* If an entry requires 8-byte alignment then assume all constant pools
17900 require 4 bytes of padding. Trying to do this later on a per-pool
17901 basis is awkward because existing pool entries have to be modified. */
17902 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
17903 minipool_pad = 4;
17904
17905 if (dump_file)
17906 {
17907 fprintf (dump_file,
17908 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17909 GET_MODE_NAME (mode),
17910 INSN_UID (insn), (unsigned long) address,
17911 -1 * (long)fix->backwards, (long)fix->forwards);
17912 arm_print_value (dump_file, fix->value);
17913 fprintf (dump_file, "\n");
17914 }
17915
17916 /* Add it to the chain of fixes. */
17917 fix->next = NULL;
17918
17919 if (minipool_fix_head != NULL)
17920 minipool_fix_tail->next = fix;
17921 else
17922 minipool_fix_head = fix;
17923
17924 minipool_fix_tail = fix;
17925 }
17926
17927 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17928 Returns the number of insns needed, or 99 if we always want to synthesize
17929 the value. */
17930 int
17931 arm_max_const_double_inline_cost ()
17932 {
17933 return ((optimize_size || arm_ld_sched) ? 3 : 4);
17934 }
17935
17936 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17937 Returns the number of insns needed, or 99 if we don't know how to
17938 do it. */
17939 int
17940 arm_const_double_inline_cost (rtx val)
17941 {
17942 rtx lowpart, highpart;
17943 machine_mode mode;
17944
17945 mode = GET_MODE (val);
17946
17947 if (mode == VOIDmode)
17948 mode = DImode;
17949
17950 gcc_assert (GET_MODE_SIZE (mode) == 8);
17951
17952 lowpart = gen_lowpart (SImode, val);
17953 highpart = gen_highpart_mode (SImode, mode, val);
17954
17955 gcc_assert (CONST_INT_P (lowpart));
17956 gcc_assert (CONST_INT_P (highpart));
17957
17958 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
17959 NULL_RTX, NULL_RTX, 0, 0)
17960 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
17961 NULL_RTX, NULL_RTX, 0, 0));
17962 }
17963
17964 /* Cost of loading a SImode constant. */
17965 static inline int
17966 arm_const_inline_cost (enum rtx_code code, rtx val)
17967 {
17968 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17969 NULL_RTX, NULL_RTX, 1, 0);
17970 }
17971
17972 /* Return true if it is worthwhile to split a 64-bit constant into two
17973 32-bit operations. This is the case if optimizing for size, or
17974 if we have load delay slots, or if one 32-bit part can be done with
17975 a single data operation. */
17976 bool
17977 arm_const_double_by_parts (rtx val)
17978 {
17979 machine_mode mode = GET_MODE (val);
17980 rtx part;
17981
17982 if (optimize_size || arm_ld_sched)
17983 return true;
17984
17985 if (mode == VOIDmode)
17986 mode = DImode;
17987
17988 part = gen_highpart_mode (SImode, mode, val);
17989
17990 gcc_assert (CONST_INT_P (part));
17991
17992 if (const_ok_for_arm (INTVAL (part))
17993 || const_ok_for_arm (~INTVAL (part)))
17994 return true;
17995
17996 part = gen_lowpart (SImode, val);
17997
17998 gcc_assert (CONST_INT_P (part));
17999
18000 if (const_ok_for_arm (INTVAL (part))
18001 || const_ok_for_arm (~INTVAL (part)))
18002 return true;
18003
18004 return false;
18005 }
18006
18007 /* Return true if it is possible to inline both the high and low parts
18008 of a 64-bit constant into 32-bit data processing instructions. */
18009 bool
18010 arm_const_double_by_immediates (rtx val)
18011 {
18012 machine_mode mode = GET_MODE (val);
18013 rtx part;
18014
18015 if (mode == VOIDmode)
18016 mode = DImode;
18017
18018 part = gen_highpart_mode (SImode, mode, val);
18019
18020 gcc_assert (CONST_INT_P (part));
18021
18022 if (!const_ok_for_arm (INTVAL (part)))
18023 return false;
18024
18025 part = gen_lowpart (SImode, val);
18026
18027 gcc_assert (CONST_INT_P (part));
18028
18029 if (!const_ok_for_arm (INTVAL (part)))
18030 return false;
18031
18032 return true;
18033 }
18034
18035 /* Scan INSN and note any of its operands that need fixing.
18036 If DO_PUSHES is false we do not actually push any of the fixups
18037 needed. */
18038 static void
18039 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18040 {
18041 int opno;
18042
18043 extract_constrain_insn (insn);
18044
18045 if (recog_data.n_alternatives == 0)
18046 return;
18047
18048 /* Fill in recog_op_alt with information about the constraints of
18049 this insn. */
18050 preprocess_constraints (insn);
18051
18052 const operand_alternative *op_alt = which_op_alt ();
18053 for (opno = 0; opno < recog_data.n_operands; opno++)
18054 {
18055 /* Things we need to fix can only occur in inputs. */
18056 if (recog_data.operand_type[opno] != OP_IN)
18057 continue;
18058
18059 /* If this alternative is a memory reference, then any mention
18060 of constants in this alternative is really to fool reload
18061 into allowing us to accept one there. We need to fix them up
18062 now so that we output the right code. */
18063 if (op_alt[opno].memory_ok)
18064 {
18065 rtx op = recog_data.operand[opno];
18066
18067 if (CONSTANT_P (op))
18068 {
18069 if (do_pushes)
18070 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18071 recog_data.operand_mode[opno], op);
18072 }
18073 else if (MEM_P (op)
18074 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18075 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18076 {
18077 if (do_pushes)
18078 {
18079 rtx cop = avoid_constant_pool_reference (op);
18080
18081 /* Casting the address of something to a mode narrower
18082 than a word can cause avoid_constant_pool_reference()
18083 to return the pool reference itself. That's no good to
18084 us here. Lets just hope that we can use the
18085 constant pool value directly. */
18086 if (op == cop)
18087 cop = get_pool_constant (XEXP (op, 0));
18088
18089 push_minipool_fix (insn, address,
18090 recog_data.operand_loc[opno],
18091 recog_data.operand_mode[opno], cop);
18092 }
18093
18094 }
18095 }
18096 }
18097
18098 return;
18099 }
18100
18101 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18102 and unions in the context of ARMv8-M Security Extensions. It is used as a
18103 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18104 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18105 or four masks, depending on whether it is being computed for a
18106 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18107 respectively. The tree for the type of the argument or a field within an
18108 argument is passed in ARG_TYPE, the current register this argument or field
18109 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18110 argument or field starts at is passed in STARTING_BIT and the last used bit
18111 is kept in LAST_USED_BIT which is also updated accordingly. */
18112
18113 static unsigned HOST_WIDE_INT
18114 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18115 uint32_t * padding_bits_to_clear,
18116 unsigned starting_bit, int * last_used_bit)
18117
18118 {
18119 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18120
18121 if (TREE_CODE (arg_type) == RECORD_TYPE)
18122 {
18123 unsigned current_bit = starting_bit;
18124 tree field;
18125 long int offset, size;
18126
18127
18128 field = TYPE_FIELDS (arg_type);
18129 while (field)
18130 {
18131 /* The offset within a structure is always an offset from
18132 the start of that structure. Make sure we take that into the
18133 calculation of the register based offset that we use here. */
18134 offset = starting_bit;
18135 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18136 offset %= 32;
18137
18138 /* This is the actual size of the field, for bitfields this is the
18139 bitfield width and not the container size. */
18140 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18141
18142 if (*last_used_bit != offset)
18143 {
18144 if (offset < *last_used_bit)
18145 {
18146 /* This field's offset is before the 'last_used_bit', that
18147 means this field goes on the next register. So we need to
18148 pad the rest of the current register and increase the
18149 register number. */
18150 uint32_t mask;
18151 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18152 mask++;
18153
18154 padding_bits_to_clear[*regno] |= mask;
18155 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18156 (*regno)++;
18157 }
18158 else
18159 {
18160 /* Otherwise we pad the bits between the last field's end and
18161 the start of the new field. */
18162 uint32_t mask;
18163
18164 mask = ((uint32_t)-1) >> (32 - offset);
18165 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18166 padding_bits_to_clear[*regno] |= mask;
18167 }
18168 current_bit = offset;
18169 }
18170
18171 /* Calculate further padding bits for inner structs/unions too. */
18172 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18173 {
18174 *last_used_bit = current_bit;
18175 not_to_clear_reg_mask
18176 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18177 padding_bits_to_clear, offset,
18178 last_used_bit);
18179 }
18180 else
18181 {
18182 /* Update 'current_bit' with this field's size. If the
18183 'current_bit' lies in a subsequent register, update 'regno' and
18184 reset 'current_bit' to point to the current bit in that new
18185 register. */
18186 current_bit += size;
18187 while (current_bit >= 32)
18188 {
18189 current_bit-=32;
18190 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18191 (*regno)++;
18192 }
18193 *last_used_bit = current_bit;
18194 }
18195
18196 field = TREE_CHAIN (field);
18197 }
18198 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18199 }
18200 else if (TREE_CODE (arg_type) == UNION_TYPE)
18201 {
18202 tree field, field_t;
18203 int i, regno_t, field_size;
18204 int max_reg = -1;
18205 int max_bit = -1;
18206 uint32_t mask;
18207 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18208 = {-1, -1, -1, -1};
18209
18210 /* To compute the padding bits in a union we only consider bits as
18211 padding bits if they are always either a padding bit or fall outside a
18212 fields size for all fields in the union. */
18213 field = TYPE_FIELDS (arg_type);
18214 while (field)
18215 {
18216 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18217 = {0U, 0U, 0U, 0U};
18218 int last_used_bit_t = *last_used_bit;
18219 regno_t = *regno;
18220 field_t = TREE_TYPE (field);
18221
18222 /* If the field's type is either a record or a union make sure to
18223 compute their padding bits too. */
18224 if (RECORD_OR_UNION_TYPE_P (field_t))
18225 not_to_clear_reg_mask
18226 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18227 &padding_bits_to_clear_t[0],
18228 starting_bit, &last_used_bit_t);
18229 else
18230 {
18231 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18232 regno_t = (field_size / 32) + *regno;
18233 last_used_bit_t = (starting_bit + field_size) % 32;
18234 }
18235
18236 for (i = *regno; i < regno_t; i++)
18237 {
18238 /* For all but the last register used by this field only keep the
18239 padding bits that were padding bits in this field. */
18240 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18241 }
18242
18243 /* For the last register, keep all padding bits that were padding
18244 bits in this field and any padding bits that are still valid
18245 as padding bits but fall outside of this field's size. */
18246 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18247 padding_bits_to_clear_res[regno_t]
18248 &= padding_bits_to_clear_t[regno_t] | mask;
18249
18250 /* Update the maximum size of the fields in terms of registers used
18251 ('max_reg') and the 'last_used_bit' in said register. */
18252 if (max_reg < regno_t)
18253 {
18254 max_reg = regno_t;
18255 max_bit = last_used_bit_t;
18256 }
18257 else if (max_reg == regno_t && max_bit < last_used_bit_t)
18258 max_bit = last_used_bit_t;
18259
18260 field = TREE_CHAIN (field);
18261 }
18262
18263 /* Update the current padding_bits_to_clear using the intersection of the
18264 padding bits of all the fields. */
18265 for (i=*regno; i < max_reg; i++)
18266 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18267
18268 /* Do not keep trailing padding bits, we do not know yet whether this
18269 is the end of the argument. */
18270 mask = ((uint32_t) 1 << max_bit) - 1;
18271 padding_bits_to_clear[max_reg]
18272 |= padding_bits_to_clear_res[max_reg] & mask;
18273
18274 *regno = max_reg;
18275 *last_used_bit = max_bit;
18276 }
18277 else
18278 /* This function should only be used for structs and unions. */
18279 gcc_unreachable ();
18280
18281 return not_to_clear_reg_mask;
18282 }
18283
18284 /* In the context of ARMv8-M Security Extensions, this function is used for both
18285 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18286 registers are used when returning or passing arguments, which is then
18287 returned as a mask. It will also compute a mask to indicate padding/unused
18288 bits for each of these registers, and passes this through the
18289 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18290 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18291 the starting register used to pass this argument or return value is passed
18292 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18293 for struct and union types. */
18294
18295 static unsigned HOST_WIDE_INT
18296 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18297 uint32_t * padding_bits_to_clear)
18298
18299 {
18300 int last_used_bit = 0;
18301 unsigned HOST_WIDE_INT not_to_clear_mask;
18302
18303 if (RECORD_OR_UNION_TYPE_P (arg_type))
18304 {
18305 not_to_clear_mask
18306 = comp_not_to_clear_mask_str_un (arg_type, &regno,
18307 padding_bits_to_clear, 0,
18308 &last_used_bit);
18309
18310
18311 /* If the 'last_used_bit' is not zero, that means we are still using a
18312 part of the last 'regno'. In such cases we must clear the trailing
18313 bits. Otherwise we are not using regno and we should mark it as to
18314 clear. */
18315 if (last_used_bit != 0)
18316 padding_bits_to_clear[regno]
18317 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18318 else
18319 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18320 }
18321 else
18322 {
18323 not_to_clear_mask = 0;
18324 /* We are not dealing with structs nor unions. So these arguments may be
18325 passed in floating point registers too. In some cases a BLKmode is
18326 used when returning or passing arguments in multiple VFP registers. */
18327 if (GET_MODE (arg_rtx) == BLKmode)
18328 {
18329 int i, arg_regs;
18330 rtx reg;
18331
18332 /* This should really only occur when dealing with the hard-float
18333 ABI. */
18334 gcc_assert (TARGET_HARD_FLOAT_ABI);
18335
18336 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18337 {
18338 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18339 gcc_assert (REG_P (reg));
18340
18341 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18342
18343 /* If we are dealing with DF mode, make sure we don't
18344 clear either of the registers it addresses. */
18345 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18346 if (arg_regs > 1)
18347 {
18348 unsigned HOST_WIDE_INT mask;
18349 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18350 mask -= HOST_WIDE_INT_1U << REGNO (reg);
18351 not_to_clear_mask |= mask;
18352 }
18353 }
18354 }
18355 else
18356 {
18357 /* Otherwise we can rely on the MODE to determine how many registers
18358 are being used by this argument. */
18359 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18360 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18361 if (arg_regs > 1)
18362 {
18363 unsigned HOST_WIDE_INT
18364 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18365 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18366 not_to_clear_mask |= mask;
18367 }
18368 }
18369 }
18370
18371 return not_to_clear_mask;
18372 }
18373
18374 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18375 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18376 are to be fully cleared, using the value in register CLEARING_REG if more
18377 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18378 the bits that needs to be cleared in caller-saved core registers, with
18379 SCRATCH_REG used as a scratch register for that clearing.
18380
18381 NOTE: one of three following assertions must hold:
18382 - SCRATCH_REG is a low register
18383 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18384 in TO_CLEAR_BITMAP)
18385 - CLEARING_REG is a low register. */
18386
18387 static void
18388 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18389 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18390 {
18391 bool saved_clearing = false;
18392 rtx saved_clearing_reg = NULL_RTX;
18393 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18394
18395 gcc_assert (arm_arch_cmse);
18396
18397 if (!bitmap_empty_p (to_clear_bitmap))
18398 {
18399 minregno = bitmap_first_set_bit (to_clear_bitmap);
18400 maxregno = bitmap_last_set_bit (to_clear_bitmap);
18401 }
18402 clearing_regno = REGNO (clearing_reg);
18403
18404 /* Clear padding bits. */
18405 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18406 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18407 {
18408 uint64_t mask;
18409 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18410
18411 if (padding_bits_to_clear[i] == 0)
18412 continue;
18413
18414 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18415 CLEARING_REG as scratch. */
18416 if (TARGET_THUMB1
18417 && REGNO (scratch_reg) > LAST_LO_REGNUM)
18418 {
18419 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18420 such that we can use clearing_reg to clear the unused bits in the
18421 arguments. */
18422 if ((clearing_regno > maxregno
18423 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18424 && !saved_clearing)
18425 {
18426 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18427 emit_move_insn (scratch_reg, clearing_reg);
18428 saved_clearing = true;
18429 saved_clearing_reg = scratch_reg;
18430 }
18431 scratch_reg = clearing_reg;
18432 }
18433
18434 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18435 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18436 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18437
18438 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18439 mask = (~padding_bits_to_clear[i]) >> 16;
18440 rtx16 = gen_int_mode (16, SImode);
18441 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18442 if (mask)
18443 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18444
18445 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18446 }
18447 if (saved_clearing)
18448 emit_move_insn (clearing_reg, saved_clearing_reg);
18449
18450
18451 /* Clear full registers. */
18452
18453 if (TARGET_HAVE_FPCXT_CMSE)
18454 {
18455 rtvec vunspec_vec;
18456 int i, j, k, nb_regs;
18457 rtx use_seq, par, reg, set, vunspec;
18458 int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18459 auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18460 auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18461
18462 for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18463 {
18464 /* Find next register to clear and exit if none. */
18465 for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18466 if (i > maxregno)
18467 break;
18468
18469 /* Compute number of consecutive registers to clear. */
18470 for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18471 j++);
18472 nb_regs = j - i;
18473
18474 /* Create VSCCLRM RTX pattern. */
18475 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18476 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18477 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18478 VUNSPEC_VSCCLRM_VPR);
18479 XVECEXP (par, 0, 0) = vunspec;
18480
18481 /* Insert VFP register clearing RTX in the pattern. */
18482 start_sequence ();
18483 for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18484 {
18485 if (!bitmap_bit_p (to_clear_bitmap, j))
18486 continue;
18487
18488 reg = gen_rtx_REG (SFmode, j);
18489 set = gen_rtx_SET (reg, const0_rtx);
18490 XVECEXP (par, 0, k++) = set;
18491 emit_use (reg);
18492 }
18493 use_seq = get_insns ();
18494 end_sequence ();
18495
18496 emit_insn_after (use_seq, emit_insn (par));
18497 }
18498
18499 /* Get set of core registers to clear. */
18500 bitmap_clear (core_regs_bitmap);
18501 bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18502 IP_REGNUM - R0_REGNUM + 1);
18503 bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18504 core_regs_bitmap);
18505 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18506
18507 if (bitmap_empty_p (to_clear_core_bitmap))
18508 return;
18509
18510 /* Create clrm RTX pattern. */
18511 nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18512 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18513
18514 /* Insert core register clearing RTX in the pattern. */
18515 start_sequence ();
18516 for (j = 0, i = minregno; j < nb_regs; i++)
18517 {
18518 if (!bitmap_bit_p (to_clear_core_bitmap, i))
18519 continue;
18520
18521 reg = gen_rtx_REG (SImode, i);
18522 set = gen_rtx_SET (reg, const0_rtx);
18523 XVECEXP (par, 0, j++) = set;
18524 emit_use (reg);
18525 }
18526
18527 /* Insert APSR register clearing RTX in the pattern
18528 * along with clobbering CC. */
18529 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18530 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18531 VUNSPEC_CLRM_APSR);
18532
18533 XVECEXP (par, 0, j++) = vunspec;
18534
18535 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18536 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18537 XVECEXP (par, 0, j) = clobber;
18538
18539 use_seq = get_insns ();
18540 end_sequence ();
18541
18542 emit_insn_after (use_seq, emit_insn (par));
18543 }
18544 else
18545 {
18546 /* If not marked for clearing, clearing_reg already does not contain
18547 any secret. */
18548 if (clearing_regno <= maxregno
18549 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18550 {
18551 emit_move_insn (clearing_reg, const0_rtx);
18552 emit_use (clearing_reg);
18553 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18554 }
18555
18556 for (regno = minregno; regno <= maxregno; regno++)
18557 {
18558 if (!bitmap_bit_p (to_clear_bitmap, regno))
18559 continue;
18560
18561 if (IS_VFP_REGNUM (regno))
18562 {
18563 /* If regno is an even vfp register and its successor is also to
18564 be cleared, use vmov. */
18565 if (TARGET_VFP_DOUBLE
18566 && VFP_REGNO_OK_FOR_DOUBLE (regno)
18567 && bitmap_bit_p (to_clear_bitmap, regno + 1))
18568 {
18569 emit_move_insn (gen_rtx_REG (DFmode, regno),
18570 CONST1_RTX (DFmode));
18571 emit_use (gen_rtx_REG (DFmode, regno));
18572 regno++;
18573 }
18574 else
18575 {
18576 emit_move_insn (gen_rtx_REG (SFmode, regno),
18577 CONST1_RTX (SFmode));
18578 emit_use (gen_rtx_REG (SFmode, regno));
18579 }
18580 }
18581 else
18582 {
18583 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18584 emit_use (gen_rtx_REG (SImode, regno));
18585 }
18586 }
18587 }
18588 }
18589
18590 /* Clear core and caller-saved VFP registers not used to pass arguments before
18591 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
18592 registers is done in the __gnu_cmse_nonsecure_call libcall. See
18593 libgcc/config/arm/cmse_nonsecure_call.S. */
18594
18595 static void
18596 cmse_nonsecure_call_inline_register_clear (void)
18597 {
18598 basic_block bb;
18599
18600 FOR_EACH_BB_FN (bb, cfun)
18601 {
18602 rtx_insn *insn;
18603
18604 FOR_BB_INSNS (bb, insn)
18605 {
18606 bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18607 /* frame = VFP regs + FPSCR + VPR. */
18608 unsigned lazy_store_stack_frame_size
18609 = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18610 unsigned long callee_saved_mask
18611 = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18612 & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18613 unsigned address_regnum, regno;
18614 unsigned max_int_regno
18615 = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
18616 unsigned max_fp_regno
18617 = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
18618 unsigned maxregno
18619 = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
18620 auto_sbitmap to_clear_bitmap (maxregno + 1);
18621 rtx_insn *seq;
18622 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
18623 rtx address;
18624 CUMULATIVE_ARGS args_so_far_v;
18625 cumulative_args_t args_so_far;
18626 tree arg_type, fntype;
18627 bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
18628 function_args_iterator args_iter;
18629 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
18630
18631 if (!NONDEBUG_INSN_P (insn))
18632 continue;
18633
18634 if (!CALL_P (insn))
18635 continue;
18636
18637 pat = PATTERN (insn);
18638 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
18639 call = XVECEXP (pat, 0, 0);
18640
18641 /* Get the real call RTX if the insn sets a value, ie. returns. */
18642 if (GET_CODE (call) == SET)
18643 call = SET_SRC (call);
18644
18645 /* Check if it is a cmse_nonsecure_call. */
18646 unspec = XEXP (call, 0);
18647 if (GET_CODE (unspec) != UNSPEC
18648 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
18649 continue;
18650
18651 /* Mark registers that needs to be cleared. Those that holds a
18652 parameter are removed from the set further below. */
18653 bitmap_clear (to_clear_bitmap);
18654 bitmap_set_range (to_clear_bitmap, R0_REGNUM,
18655 max_int_regno - R0_REGNUM + 1);
18656
18657 /* Only look at the caller-saved floating point registers in case of
18658 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
18659 lazy store and loads which clear both caller- and callee-saved
18660 registers. */
18661 if (!lazy_fpclear)
18662 {
18663 auto_sbitmap float_bitmap (maxregno + 1);
18664
18665 bitmap_clear (float_bitmap);
18666 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
18667 max_fp_regno - FIRST_VFP_REGNUM + 1);
18668 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
18669 }
18670
18671 /* Make sure the register used to hold the function address is not
18672 cleared. */
18673 address = RTVEC_ELT (XVEC (unspec, 0), 0);
18674 gcc_assert (MEM_P (address));
18675 gcc_assert (REG_P (XEXP (address, 0)));
18676 address_regnum = REGNO (XEXP (address, 0));
18677 if (address_regnum <= max_int_regno)
18678 bitmap_clear_bit (to_clear_bitmap, address_regnum);
18679
18680 /* Set basic block of call insn so that df rescan is performed on
18681 insns inserted here. */
18682 set_block_for_insn (insn, bb);
18683 df_set_flags (DF_DEFER_INSN_RESCAN);
18684 start_sequence ();
18685
18686 /* Make sure the scheduler doesn't schedule other insns beyond
18687 here. */
18688 emit_insn (gen_blockage ());
18689
18690 /* Walk through all arguments and clear registers appropriately.
18691 */
18692 fntype = TREE_TYPE (MEM_EXPR (address));
18693 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
18694 NULL_TREE);
18695 args_so_far = pack_cumulative_args (&args_so_far_v);
18696 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
18697 {
18698 rtx arg_rtx;
18699 uint64_t to_clear_args_mask;
18700
18701 if (VOID_TYPE_P (arg_type))
18702 continue;
18703
18704 function_arg_info arg (arg_type, /*named=*/true);
18705 if (!first_param)
18706 /* ??? We should advance after processing the argument and pass
18707 the argument we're advancing past. */
18708 arm_function_arg_advance (args_so_far, arg);
18709
18710 arg_rtx = arm_function_arg (args_so_far, arg);
18711 gcc_assert (REG_P (arg_rtx));
18712 to_clear_args_mask
18713 = compute_not_to_clear_mask (arg_type, arg_rtx,
18714 REGNO (arg_rtx),
18715 &padding_bits_to_clear[0]);
18716 if (to_clear_args_mask)
18717 {
18718 for (regno = R0_REGNUM; regno <= maxregno; regno++)
18719 {
18720 if (to_clear_args_mask & (1ULL << regno))
18721 bitmap_clear_bit (to_clear_bitmap, regno);
18722 }
18723 }
18724
18725 first_param = false;
18726 }
18727
18728 /* We use right shift and left shift to clear the LSB of the address
18729 we jump to instead of using bic, to avoid having to use an extra
18730 register on Thumb-1. */
18731 clearing_reg = XEXP (address, 0);
18732 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
18733 emit_insn (gen_rtx_SET (clearing_reg, shift));
18734 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
18735 emit_insn (gen_rtx_SET (clearing_reg, shift));
18736
18737 if (clear_callee_saved)
18738 {
18739 rtx push_insn =
18740 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
18741 /* Disable frame debug info in push because it needs to be
18742 disabled for pop (see below). */
18743 RTX_FRAME_RELATED_P (push_insn) = 0;
18744
18745 /* Lazy store multiple. */
18746 if (lazy_fpclear)
18747 {
18748 rtx imm;
18749 rtx_insn *add_insn;
18750
18751 imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
18752 add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
18753 stack_pointer_rtx, imm));
18754 arm_add_cfa_adjust_cfa_note (add_insn,
18755 - lazy_store_stack_frame_size,
18756 stack_pointer_rtx,
18757 stack_pointer_rtx);
18758 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
18759 }
18760 /* Save VFP callee-saved registers. */
18761 else
18762 {
18763 vfp_emit_fstmd (D7_VFP_REGNUM + 1,
18764 (max_fp_regno - D7_VFP_REGNUM) / 2);
18765 /* Disable frame debug info in push because it needs to be
18766 disabled for vpop (see below). */
18767 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
18768 }
18769 }
18770
18771 /* Clear caller-saved registers that leak before doing a non-secure
18772 call. */
18773 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
18774 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
18775 NUM_ARG_REGS, ip_reg, clearing_reg);
18776
18777 seq = get_insns ();
18778 end_sequence ();
18779 emit_insn_before (seq, insn);
18780
18781 if (TARGET_HAVE_FPCXT_CMSE)
18782 {
18783 rtx_insn *last, *pop_insn, *after = insn;
18784
18785 start_sequence ();
18786
18787 /* Lazy load multiple done as part of libcall in Armv8-M. */
18788 if (lazy_fpclear)
18789 {
18790 rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
18791 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
18792 rtx_insn *add_insn =
18793 emit_insn (gen_addsi3 (stack_pointer_rtx,
18794 stack_pointer_rtx, imm));
18795 arm_add_cfa_adjust_cfa_note (add_insn,
18796 lazy_store_stack_frame_size,
18797 stack_pointer_rtx,
18798 stack_pointer_rtx);
18799 }
18800 /* Restore VFP callee-saved registers. */
18801 else
18802 {
18803 int nb_callee_saved_vfp_regs =
18804 (max_fp_regno - D7_VFP_REGNUM) / 2;
18805 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
18806 nb_callee_saved_vfp_regs,
18807 stack_pointer_rtx);
18808 /* Disable frame debug info in vpop because the SP adjustment
18809 is made using a CFA adjustment note while CFA used is
18810 sometimes R7. This then causes an assert failure in the
18811 CFI note creation code. */
18812 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
18813 }
18814
18815 arm_emit_multi_reg_pop (callee_saved_mask);
18816 pop_insn = get_last_insn ();
18817
18818 /* Disable frame debug info in pop because they reset the state
18819 of popped registers to what it was at the beginning of the
18820 function, before the prologue. This leads to incorrect state
18821 when doing the pop after the nonsecure call for registers that
18822 are pushed both in prologue and before the nonsecure call.
18823
18824 It also occasionally triggers an assert failure in CFI note
18825 creation code when there are two codepaths to the epilogue,
18826 one of which does not go through the nonsecure call.
18827 Obviously this mean that debugging between the push and pop is
18828 not reliable. */
18829 RTX_FRAME_RELATED_P (pop_insn) = 0;
18830
18831 seq = get_insns ();
18832 last = get_last_insn ();
18833 end_sequence ();
18834
18835 emit_insn_after (seq, after);
18836
18837 /* Skip pop we have just inserted after nonsecure call, we know
18838 it does not contain a nonsecure call. */
18839 insn = last;
18840 }
18841 }
18842 }
18843 }
18844
18845 /* Rewrite move insn into subtract of 0 if the condition codes will
18846 be useful in next conditional jump insn. */
18847
18848 static void
18849 thumb1_reorg (void)
18850 {
18851 basic_block bb;
18852
18853 FOR_EACH_BB_FN (bb, cfun)
18854 {
18855 rtx dest, src;
18856 rtx cmp, op0, op1, set = NULL;
18857 rtx_insn *prev, *insn = BB_END (bb);
18858 bool insn_clobbered = false;
18859
18860 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
18861 insn = PREV_INSN (insn);
18862
18863 /* Find the last cbranchsi4_insn in basic block BB. */
18864 if (insn == BB_HEAD (bb)
18865 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
18866 continue;
18867
18868 /* Get the register with which we are comparing. */
18869 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
18870 op0 = XEXP (cmp, 0);
18871 op1 = XEXP (cmp, 1);
18872
18873 /* Check that comparison is against ZERO. */
18874 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
18875 continue;
18876
18877 /* Find the first flag setting insn before INSN in basic block BB. */
18878 gcc_assert (insn != BB_HEAD (bb));
18879 for (prev = PREV_INSN (insn);
18880 (!insn_clobbered
18881 && prev != BB_HEAD (bb)
18882 && (NOTE_P (prev)
18883 || DEBUG_INSN_P (prev)
18884 || ((set = single_set (prev)) != NULL
18885 && get_attr_conds (prev) == CONDS_NOCOND)));
18886 prev = PREV_INSN (prev))
18887 {
18888 if (reg_set_p (op0, prev))
18889 insn_clobbered = true;
18890 }
18891
18892 /* Skip if op0 is clobbered by insn other than prev. */
18893 if (insn_clobbered)
18894 continue;
18895
18896 if (!set)
18897 continue;
18898
18899 dest = SET_DEST (set);
18900 src = SET_SRC (set);
18901 if (!low_register_operand (dest, SImode)
18902 || !low_register_operand (src, SImode))
18903 continue;
18904
18905 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
18906 in INSN. Both src and dest of the move insn are checked. */
18907 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
18908 {
18909 dest = copy_rtx (dest);
18910 src = copy_rtx (src);
18911 src = gen_rtx_MINUS (SImode, src, const0_rtx);
18912 PATTERN (prev) = gen_rtx_SET (dest, src);
18913 INSN_CODE (prev) = -1;
18914 /* Set test register in INSN to dest. */
18915 XEXP (cmp, 0) = copy_rtx (dest);
18916 INSN_CODE (insn) = -1;
18917 }
18918 }
18919 }
18920
18921 /* Convert instructions to their cc-clobbering variant if possible, since
18922 that allows us to use smaller encodings. */
18923
18924 static void
18925 thumb2_reorg (void)
18926 {
18927 basic_block bb;
18928 regset_head live;
18929
18930 INIT_REG_SET (&live);
18931
18932 /* We are freeing block_for_insn in the toplev to keep compatibility
18933 with old MDEP_REORGS that are not CFG based. Recompute it now. */
18934 compute_bb_for_insn ();
18935 df_analyze ();
18936
18937 enum Convert_Action {SKIP, CONV, SWAP_CONV};
18938
18939 FOR_EACH_BB_FN (bb, cfun)
18940 {
18941 if ((current_tune->disparage_flag_setting_t16_encodings
18942 == tune_params::DISPARAGE_FLAGS_ALL)
18943 && optimize_bb_for_speed_p (bb))
18944 continue;
18945
18946 rtx_insn *insn;
18947 Convert_Action action = SKIP;
18948 Convert_Action action_for_partial_flag_setting
18949 = ((current_tune->disparage_flag_setting_t16_encodings
18950 != tune_params::DISPARAGE_FLAGS_NEITHER)
18951 && optimize_bb_for_speed_p (bb))
18952 ? SKIP : CONV;
18953
18954 COPY_REG_SET (&live, DF_LR_OUT (bb));
18955 df_simulate_initialize_backwards (bb, &live);
18956 FOR_BB_INSNS_REVERSE (bb, insn)
18957 {
18958 if (NONJUMP_INSN_P (insn)
18959 && !REGNO_REG_SET_P (&live, CC_REGNUM)
18960 && GET_CODE (PATTERN (insn)) == SET)
18961 {
18962 action = SKIP;
18963 rtx pat = PATTERN (insn);
18964 rtx dst = XEXP (pat, 0);
18965 rtx src = XEXP (pat, 1);
18966 rtx op0 = NULL_RTX, op1 = NULL_RTX;
18967
18968 if (UNARY_P (src) || BINARY_P (src))
18969 op0 = XEXP (src, 0);
18970
18971 if (BINARY_P (src))
18972 op1 = XEXP (src, 1);
18973
18974 if (low_register_operand (dst, SImode))
18975 {
18976 switch (GET_CODE (src))
18977 {
18978 case PLUS:
18979 /* Adding two registers and storing the result
18980 in the first source is already a 16-bit
18981 operation. */
18982 if (rtx_equal_p (dst, op0)
18983 && register_operand (op1, SImode))
18984 break;
18985
18986 if (low_register_operand (op0, SImode))
18987 {
18988 /* ADDS <Rd>,<Rn>,<Rm> */
18989 if (low_register_operand (op1, SImode))
18990 action = CONV;
18991 /* ADDS <Rdn>,#<imm8> */
18992 /* SUBS <Rdn>,#<imm8> */
18993 else if (rtx_equal_p (dst, op0)
18994 && CONST_INT_P (op1)
18995 && IN_RANGE (INTVAL (op1), -255, 255))
18996 action = CONV;
18997 /* ADDS <Rd>,<Rn>,#<imm3> */
18998 /* SUBS <Rd>,<Rn>,#<imm3> */
18999 else if (CONST_INT_P (op1)
19000 && IN_RANGE (INTVAL (op1), -7, 7))
19001 action = CONV;
19002 }
19003 /* ADCS <Rd>, <Rn> */
19004 else if (GET_CODE (XEXP (src, 0)) == PLUS
19005 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19006 && low_register_operand (XEXP (XEXP (src, 0), 1),
19007 SImode)
19008 && COMPARISON_P (op1)
19009 && cc_register (XEXP (op1, 0), VOIDmode)
19010 && maybe_get_arm_condition_code (op1) == ARM_CS
19011 && XEXP (op1, 1) == const0_rtx)
19012 action = CONV;
19013 break;
19014
19015 case MINUS:
19016 /* RSBS <Rd>,<Rn>,#0
19017 Not handled here: see NEG below. */
19018 /* SUBS <Rd>,<Rn>,#<imm3>
19019 SUBS <Rdn>,#<imm8>
19020 Not handled here: see PLUS above. */
19021 /* SUBS <Rd>,<Rn>,<Rm> */
19022 if (low_register_operand (op0, SImode)
19023 && low_register_operand (op1, SImode))
19024 action = CONV;
19025 break;
19026
19027 case MULT:
19028 /* MULS <Rdm>,<Rn>,<Rdm>
19029 As an exception to the rule, this is only used
19030 when optimizing for size since MULS is slow on all
19031 known implementations. We do not even want to use
19032 MULS in cold code, if optimizing for speed, so we
19033 test the global flag here. */
19034 if (!optimize_size)
19035 break;
19036 /* Fall through. */
19037 case AND:
19038 case IOR:
19039 case XOR:
19040 /* ANDS <Rdn>,<Rm> */
19041 if (rtx_equal_p (dst, op0)
19042 && low_register_operand (op1, SImode))
19043 action = action_for_partial_flag_setting;
19044 else if (rtx_equal_p (dst, op1)
19045 && low_register_operand (op0, SImode))
19046 action = action_for_partial_flag_setting == SKIP
19047 ? SKIP : SWAP_CONV;
19048 break;
19049
19050 case ASHIFTRT:
19051 case ASHIFT:
19052 case LSHIFTRT:
19053 /* ASRS <Rdn>,<Rm> */
19054 /* LSRS <Rdn>,<Rm> */
19055 /* LSLS <Rdn>,<Rm> */
19056 if (rtx_equal_p (dst, op0)
19057 && low_register_operand (op1, SImode))
19058 action = action_for_partial_flag_setting;
19059 /* ASRS <Rd>,<Rm>,#<imm5> */
19060 /* LSRS <Rd>,<Rm>,#<imm5> */
19061 /* LSLS <Rd>,<Rm>,#<imm5> */
19062 else if (low_register_operand (op0, SImode)
19063 && CONST_INT_P (op1)
19064 && IN_RANGE (INTVAL (op1), 0, 31))
19065 action = action_for_partial_flag_setting;
19066 break;
19067
19068 case ROTATERT:
19069 /* RORS <Rdn>,<Rm> */
19070 if (rtx_equal_p (dst, op0)
19071 && low_register_operand (op1, SImode))
19072 action = action_for_partial_flag_setting;
19073 break;
19074
19075 case NOT:
19076 /* MVNS <Rd>,<Rm> */
19077 if (low_register_operand (op0, SImode))
19078 action = action_for_partial_flag_setting;
19079 break;
19080
19081 case NEG:
19082 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19083 if (low_register_operand (op0, SImode))
19084 action = CONV;
19085 break;
19086
19087 case CONST_INT:
19088 /* MOVS <Rd>,#<imm8> */
19089 if (CONST_INT_P (src)
19090 && IN_RANGE (INTVAL (src), 0, 255))
19091 action = action_for_partial_flag_setting;
19092 break;
19093
19094 case REG:
19095 /* MOVS and MOV<c> with registers have different
19096 encodings, so are not relevant here. */
19097 break;
19098
19099 default:
19100 break;
19101 }
19102 }
19103
19104 if (action != SKIP)
19105 {
19106 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19107 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19108 rtvec vec;
19109
19110 if (action == SWAP_CONV)
19111 {
19112 src = copy_rtx (src);
19113 XEXP (src, 0) = op1;
19114 XEXP (src, 1) = op0;
19115 pat = gen_rtx_SET (dst, src);
19116 vec = gen_rtvec (2, pat, clobber);
19117 }
19118 else /* action == CONV */
19119 vec = gen_rtvec (2, pat, clobber);
19120
19121 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19122 INSN_CODE (insn) = -1;
19123 }
19124 }
19125
19126 if (NONDEBUG_INSN_P (insn))
19127 df_simulate_one_insn_backwards (bb, insn, &live);
19128 }
19129 }
19130
19131 CLEAR_REG_SET (&live);
19132 }
19133
19134 /* Gcc puts the pool in the wrong place for ARM, since we can only
19135 load addresses a limited distance around the pc. We do some
19136 special munging to move the constant pool values to the correct
19137 point in the code. */
19138 static void
19139 arm_reorg (void)
19140 {
19141 rtx_insn *insn;
19142 HOST_WIDE_INT address = 0;
19143 Mfix * fix;
19144
19145 if (use_cmse)
19146 cmse_nonsecure_call_inline_register_clear ();
19147
19148 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19149 if (cfun->is_thunk)
19150 ;
19151 else if (TARGET_THUMB1)
19152 thumb1_reorg ();
19153 else if (TARGET_THUMB2)
19154 thumb2_reorg ();
19155
19156 /* Ensure all insns that must be split have been split at this point.
19157 Otherwise, the pool placement code below may compute incorrect
19158 insn lengths. Note that when optimizing, all insns have already
19159 been split at this point. */
19160 if (!optimize)
19161 split_all_insns_noflow ();
19162
19163 /* Make sure we do not attempt to create a literal pool even though it should
19164 no longer be necessary to create any. */
19165 if (arm_disable_literal_pool)
19166 return ;
19167
19168 minipool_fix_head = minipool_fix_tail = NULL;
19169
19170 /* The first insn must always be a note, or the code below won't
19171 scan it properly. */
19172 insn = get_insns ();
19173 gcc_assert (NOTE_P (insn));
19174 minipool_pad = 0;
19175
19176 /* Scan all the insns and record the operands that will need fixing. */
19177 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19178 {
19179 if (BARRIER_P (insn))
19180 push_minipool_barrier (insn, address);
19181 else if (INSN_P (insn))
19182 {
19183 rtx_jump_table_data *table;
19184
19185 note_invalid_constants (insn, address, true);
19186 address += get_attr_length (insn);
19187
19188 /* If the insn is a vector jump, add the size of the table
19189 and skip the table. */
19190 if (tablejump_p (insn, NULL, &table))
19191 {
19192 address += get_jump_table_size (table);
19193 insn = table;
19194 }
19195 }
19196 else if (LABEL_P (insn))
19197 /* Add the worst-case padding due to alignment. We don't add
19198 the _current_ padding because the minipool insertions
19199 themselves might change it. */
19200 address += get_label_padding (insn);
19201 }
19202
19203 fix = minipool_fix_head;
19204
19205 /* Now scan the fixups and perform the required changes. */
19206 while (fix)
19207 {
19208 Mfix * ftmp;
19209 Mfix * fdel;
19210 Mfix * last_added_fix;
19211 Mfix * last_barrier = NULL;
19212 Mfix * this_fix;
19213
19214 /* Skip any further barriers before the next fix. */
19215 while (fix && BARRIER_P (fix->insn))
19216 fix = fix->next;
19217
19218 /* No more fixes. */
19219 if (fix == NULL)
19220 break;
19221
19222 last_added_fix = NULL;
19223
19224 for (ftmp = fix; ftmp; ftmp = ftmp->next)
19225 {
19226 if (BARRIER_P (ftmp->insn))
19227 {
19228 if (ftmp->address >= minipool_vector_head->max_address)
19229 break;
19230
19231 last_barrier = ftmp;
19232 }
19233 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19234 break;
19235
19236 last_added_fix = ftmp; /* Keep track of the last fix added. */
19237 }
19238
19239 /* If we found a barrier, drop back to that; any fixes that we
19240 could have reached but come after the barrier will now go in
19241 the next mini-pool. */
19242 if (last_barrier != NULL)
19243 {
19244 /* Reduce the refcount for those fixes that won't go into this
19245 pool after all. */
19246 for (fdel = last_barrier->next;
19247 fdel && fdel != ftmp;
19248 fdel = fdel->next)
19249 {
19250 fdel->minipool->refcount--;
19251 fdel->minipool = NULL;
19252 }
19253
19254 ftmp = last_barrier;
19255 }
19256 else
19257 {
19258 /* ftmp is first fix that we can't fit into this pool and
19259 there no natural barriers that we could use. Insert a
19260 new barrier in the code somewhere between the previous
19261 fix and this one, and arrange to jump around it. */
19262 HOST_WIDE_INT max_address;
19263
19264 /* The last item on the list of fixes must be a barrier, so
19265 we can never run off the end of the list of fixes without
19266 last_barrier being set. */
19267 gcc_assert (ftmp);
19268
19269 max_address = minipool_vector_head->max_address;
19270 /* Check that there isn't another fix that is in range that
19271 we couldn't fit into this pool because the pool was
19272 already too large: we need to put the pool before such an
19273 instruction. The pool itself may come just after the
19274 fix because create_fix_barrier also allows space for a
19275 jump instruction. */
19276 if (ftmp->address < max_address)
19277 max_address = ftmp->address + 1;
19278
19279 last_barrier = create_fix_barrier (last_added_fix, max_address);
19280 }
19281
19282 assign_minipool_offsets (last_barrier);
19283
19284 while (ftmp)
19285 {
19286 if (!BARRIER_P (ftmp->insn)
19287 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19288 == NULL))
19289 break;
19290
19291 ftmp = ftmp->next;
19292 }
19293
19294 /* Scan over the fixes we have identified for this pool, fixing them
19295 up and adding the constants to the pool itself. */
19296 for (this_fix = fix; this_fix && ftmp != this_fix;
19297 this_fix = this_fix->next)
19298 if (!BARRIER_P (this_fix->insn))
19299 {
19300 rtx addr
19301 = plus_constant (Pmode,
19302 gen_rtx_LABEL_REF (VOIDmode,
19303 minipool_vector_label),
19304 this_fix->minipool->offset);
19305 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19306 }
19307
19308 dump_minipool (last_barrier->insn);
19309 fix = ftmp;
19310 }
19311
19312 /* From now on we must synthesize any constants that we can't handle
19313 directly. This can happen if the RTL gets split during final
19314 instruction generation. */
19315 cfun->machine->after_arm_reorg = 1;
19316
19317 /* Free the minipool memory. */
19318 obstack_free (&minipool_obstack, minipool_startobj);
19319 }
19320 \f
19321 /* Routines to output assembly language. */
19322
19323 /* Return string representation of passed in real value. */
19324 static const char *
19325 fp_const_from_val (REAL_VALUE_TYPE *r)
19326 {
19327 if (!fp_consts_inited)
19328 init_fp_table ();
19329
19330 gcc_assert (real_equal (r, &value_fp0));
19331 return "0";
19332 }
19333
19334 /* OPERANDS[0] is the entire list of insns that constitute pop,
19335 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19336 is in the list, UPDATE is true iff the list contains explicit
19337 update of base register. */
19338 void
19339 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19340 bool update)
19341 {
19342 int i;
19343 char pattern[100];
19344 int offset;
19345 const char *conditional;
19346 int num_saves = XVECLEN (operands[0], 0);
19347 unsigned int regno;
19348 unsigned int regno_base = REGNO (operands[1]);
19349 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19350
19351 offset = 0;
19352 offset += update ? 1 : 0;
19353 offset += return_pc ? 1 : 0;
19354
19355 /* Is the base register in the list? */
19356 for (i = offset; i < num_saves; i++)
19357 {
19358 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19359 /* If SP is in the list, then the base register must be SP. */
19360 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19361 /* If base register is in the list, there must be no explicit update. */
19362 if (regno == regno_base)
19363 gcc_assert (!update);
19364 }
19365
19366 conditional = reverse ? "%?%D0" : "%?%d0";
19367 /* Can't use POP if returning from an interrupt. */
19368 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19369 sprintf (pattern, "pop%s\t{", conditional);
19370 else
19371 {
19372 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19373 It's just a convention, their semantics are identical. */
19374 if (regno_base == SP_REGNUM)
19375 sprintf (pattern, "ldmfd%s\t", conditional);
19376 else if (update)
19377 sprintf (pattern, "ldmia%s\t", conditional);
19378 else
19379 sprintf (pattern, "ldm%s\t", conditional);
19380
19381 strcat (pattern, reg_names[regno_base]);
19382 if (update)
19383 strcat (pattern, "!, {");
19384 else
19385 strcat (pattern, ", {");
19386 }
19387
19388 /* Output the first destination register. */
19389 strcat (pattern,
19390 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19391
19392 /* Output the rest of the destination registers. */
19393 for (i = offset + 1; i < num_saves; i++)
19394 {
19395 strcat (pattern, ", ");
19396 strcat (pattern,
19397 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19398 }
19399
19400 strcat (pattern, "}");
19401
19402 if (interrupt_p && return_pc)
19403 strcat (pattern, "^");
19404
19405 output_asm_insn (pattern, &cond);
19406 }
19407
19408
19409 /* Output the assembly for a store multiple. */
19410
19411 const char *
19412 vfp_output_vstmd (rtx * operands)
19413 {
19414 char pattern[100];
19415 int p;
19416 int base;
19417 int i;
19418 rtx addr_reg = REG_P (XEXP (operands[0], 0))
19419 ? XEXP (operands[0], 0)
19420 : XEXP (XEXP (operands[0], 0), 0);
19421 bool push_p = REGNO (addr_reg) == SP_REGNUM;
19422
19423 if (push_p)
19424 strcpy (pattern, "vpush%?.64\t{%P1");
19425 else
19426 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19427
19428 p = strlen (pattern);
19429
19430 gcc_assert (REG_P (operands[1]));
19431
19432 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19433 for (i = 1; i < XVECLEN (operands[2], 0); i++)
19434 {
19435 p += sprintf (&pattern[p], ", d%d", base + i);
19436 }
19437 strcpy (&pattern[p], "}");
19438
19439 output_asm_insn (pattern, operands);
19440 return "";
19441 }
19442
19443
19444 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19445 number of bytes pushed. */
19446
19447 static int
19448 vfp_emit_fstmd (int base_reg, int count)
19449 {
19450 rtx par;
19451 rtx dwarf;
19452 rtx tmp, reg;
19453 int i;
19454
19455 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19456 register pairs are stored by a store multiple insn. We avoid this
19457 by pushing an extra pair. */
19458 if (count == 2 && !arm_arch6)
19459 {
19460 if (base_reg == LAST_VFP_REGNUM - 3)
19461 base_reg -= 2;
19462 count++;
19463 }
19464
19465 /* FSTMD may not store more than 16 doubleword registers at once. Split
19466 larger stores into multiple parts (up to a maximum of two, in
19467 practice). */
19468 if (count > 16)
19469 {
19470 int saved;
19471 /* NOTE: base_reg is an internal register number, so each D register
19472 counts as 2. */
19473 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19474 saved += vfp_emit_fstmd (base_reg, 16);
19475 return saved;
19476 }
19477
19478 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19479 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19480
19481 reg = gen_rtx_REG (DFmode, base_reg);
19482 base_reg += 2;
19483
19484 XVECEXP (par, 0, 0)
19485 = gen_rtx_SET (gen_frame_mem
19486 (BLKmode,
19487 gen_rtx_PRE_MODIFY (Pmode,
19488 stack_pointer_rtx,
19489 plus_constant
19490 (Pmode, stack_pointer_rtx,
19491 - (count * 8)))
19492 ),
19493 gen_rtx_UNSPEC (BLKmode,
19494 gen_rtvec (1, reg),
19495 UNSPEC_PUSH_MULT));
19496
19497 tmp = gen_rtx_SET (stack_pointer_rtx,
19498 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19499 RTX_FRAME_RELATED_P (tmp) = 1;
19500 XVECEXP (dwarf, 0, 0) = tmp;
19501
19502 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19503 RTX_FRAME_RELATED_P (tmp) = 1;
19504 XVECEXP (dwarf, 0, 1) = tmp;
19505
19506 for (i = 1; i < count; i++)
19507 {
19508 reg = gen_rtx_REG (DFmode, base_reg);
19509 base_reg += 2;
19510 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19511
19512 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19513 plus_constant (Pmode,
19514 stack_pointer_rtx,
19515 i * 8)),
19516 reg);
19517 RTX_FRAME_RELATED_P (tmp) = 1;
19518 XVECEXP (dwarf, 0, i + 1) = tmp;
19519 }
19520
19521 par = emit_insn (par);
19522 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19523 RTX_FRAME_RELATED_P (par) = 1;
19524
19525 return count * 8;
19526 }
19527
19528 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19529 has the cmse_nonsecure_call attribute and returns false otherwise. */
19530
19531 bool
19532 detect_cmse_nonsecure_call (tree addr)
19533 {
19534 if (!addr)
19535 return FALSE;
19536
19537 tree fntype = TREE_TYPE (addr);
19538 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19539 TYPE_ATTRIBUTES (fntype)))
19540 return TRUE;
19541 return FALSE;
19542 }
19543
19544
19545 /* Emit a call instruction with pattern PAT. ADDR is the address of
19546 the call target. */
19547
19548 void
19549 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19550 {
19551 rtx insn;
19552
19553 insn = emit_call_insn (pat);
19554
19555 /* The PIC register is live on entry to VxWorks PIC PLT entries.
19556 If the call might use such an entry, add a use of the PIC register
19557 to the instruction's CALL_INSN_FUNCTION_USAGE. */
19558 if (TARGET_VXWORKS_RTP
19559 && flag_pic
19560 && !sibcall
19561 && GET_CODE (addr) == SYMBOL_REF
19562 && (SYMBOL_REF_DECL (addr)
19563 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19564 : !SYMBOL_REF_LOCAL_P (addr)))
19565 {
19566 require_pic_register (NULL_RTX, false /*compute_now*/);
19567 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19568 }
19569
19570 if (TARGET_FDPIC)
19571 {
19572 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19573 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19574 }
19575
19576 if (TARGET_AAPCS_BASED)
19577 {
19578 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19579 linker. We need to add an IP clobber to allow setting
19580 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
19581 is not needed since it's a fixed register. */
19582 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19583 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19584 }
19585 }
19586
19587 /* Output a 'call' insn. */
19588 const char *
19589 output_call (rtx *operands)
19590 {
19591 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
19592
19593 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
19594 if (REGNO (operands[0]) == LR_REGNUM)
19595 {
19596 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19597 output_asm_insn ("mov%?\t%0, %|lr", operands);
19598 }
19599
19600 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19601
19602 if (TARGET_INTERWORK || arm_arch4t)
19603 output_asm_insn ("bx%?\t%0", operands);
19604 else
19605 output_asm_insn ("mov%?\t%|pc, %0", operands);
19606
19607 return "";
19608 }
19609
19610 /* Output a move from arm registers to arm registers of a long double
19611 OPERANDS[0] is the destination.
19612 OPERANDS[1] is the source. */
19613 const char *
19614 output_mov_long_double_arm_from_arm (rtx *operands)
19615 {
19616 /* We have to be careful here because the two might overlap. */
19617 int dest_start = REGNO (operands[0]);
19618 int src_start = REGNO (operands[1]);
19619 rtx ops[2];
19620 int i;
19621
19622 if (dest_start < src_start)
19623 {
19624 for (i = 0; i < 3; i++)
19625 {
19626 ops[0] = gen_rtx_REG (SImode, dest_start + i);
19627 ops[1] = gen_rtx_REG (SImode, src_start + i);
19628 output_asm_insn ("mov%?\t%0, %1", ops);
19629 }
19630 }
19631 else
19632 {
19633 for (i = 2; i >= 0; i--)
19634 {
19635 ops[0] = gen_rtx_REG (SImode, dest_start + i);
19636 ops[1] = gen_rtx_REG (SImode, src_start + i);
19637 output_asm_insn ("mov%?\t%0, %1", ops);
19638 }
19639 }
19640
19641 return "";
19642 }
19643
19644 void
19645 arm_emit_movpair (rtx dest, rtx src)
19646 {
19647 /* If the src is an immediate, simplify it. */
19648 if (CONST_INT_P (src))
19649 {
19650 HOST_WIDE_INT val = INTVAL (src);
19651 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
19652 if ((val >> 16) & 0x0000ffff)
19653 {
19654 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
19655 GEN_INT (16)),
19656 GEN_INT ((val >> 16) & 0x0000ffff));
19657 rtx_insn *insn = get_last_insn ();
19658 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19659 }
19660 return;
19661 }
19662 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
19663 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
19664 rtx_insn *insn = get_last_insn ();
19665 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19666 }
19667
19668 /* Output a move between double words. It must be REG<-MEM
19669 or MEM<-REG. */
19670 const char *
19671 output_move_double (rtx *operands, bool emit, int *count)
19672 {
19673 enum rtx_code code0 = GET_CODE (operands[0]);
19674 enum rtx_code code1 = GET_CODE (operands[1]);
19675 rtx otherops[3];
19676 if (count)
19677 *count = 1;
19678
19679 /* The only case when this might happen is when
19680 you are looking at the length of a DImode instruction
19681 that has an invalid constant in it. */
19682 if (code0 == REG && code1 != MEM)
19683 {
19684 gcc_assert (!emit);
19685 *count = 2;
19686 return "";
19687 }
19688
19689 if (code0 == REG)
19690 {
19691 unsigned int reg0 = REGNO (operands[0]);
19692 const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
19693
19694 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
19695
19696 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
19697
19698 switch (GET_CODE (XEXP (operands[1], 0)))
19699 {
19700 case REG:
19701
19702 if (emit)
19703 {
19704 if (can_ldrd
19705 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
19706 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
19707 else
19708 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
19709 }
19710 break;
19711
19712 case PRE_INC:
19713 gcc_assert (can_ldrd);
19714 if (emit)
19715 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
19716 break;
19717
19718 case PRE_DEC:
19719 if (emit)
19720 {
19721 if (can_ldrd)
19722 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
19723 else
19724 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
19725 }
19726 break;
19727
19728 case POST_INC:
19729 if (emit)
19730 {
19731 if (can_ldrd)
19732 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
19733 else
19734 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
19735 }
19736 break;
19737
19738 case POST_DEC:
19739 gcc_assert (can_ldrd);
19740 if (emit)
19741 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
19742 break;
19743
19744 case PRE_MODIFY:
19745 case POST_MODIFY:
19746 /* Autoicrement addressing modes should never have overlapping
19747 base and destination registers, and overlapping index registers
19748 are already prohibited, so this doesn't need to worry about
19749 fix_cm3_ldrd. */
19750 otherops[0] = operands[0];
19751 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
19752 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
19753
19754 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
19755 {
19756 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
19757 {
19758 /* Registers overlap so split out the increment. */
19759 if (emit)
19760 {
19761 gcc_assert (can_ldrd);
19762 output_asm_insn ("add%?\t%1, %1, %2", otherops);
19763 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
19764 }
19765 if (count)
19766 *count = 2;
19767 }
19768 else
19769 {
19770 /* Use a single insn if we can.
19771 FIXME: IWMMXT allows offsets larger than ldrd can
19772 handle, fix these up with a pair of ldr. */
19773 if (can_ldrd
19774 && (TARGET_THUMB2
19775 || !CONST_INT_P (otherops[2])
19776 || (INTVAL (otherops[2]) > -256
19777 && INTVAL (otherops[2]) < 256)))
19778 {
19779 if (emit)
19780 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
19781 }
19782 else
19783 {
19784 if (emit)
19785 {
19786 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
19787 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
19788 }
19789 if (count)
19790 *count = 2;
19791
19792 }
19793 }
19794 }
19795 else
19796 {
19797 /* Use a single insn if we can.
19798 FIXME: IWMMXT allows offsets larger than ldrd can handle,
19799 fix these up with a pair of ldr. */
19800 if (can_ldrd
19801 && (TARGET_THUMB2
19802 || !CONST_INT_P (otherops[2])
19803 || (INTVAL (otherops[2]) > -256
19804 && INTVAL (otherops[2]) < 256)))
19805 {
19806 if (emit)
19807 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
19808 }
19809 else
19810 {
19811 if (emit)
19812 {
19813 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
19814 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
19815 }
19816 if (count)
19817 *count = 2;
19818 }
19819 }
19820 break;
19821
19822 case LABEL_REF:
19823 case CONST:
19824 /* We might be able to use ldrd %0, %1 here. However the range is
19825 different to ldr/adr, and it is broken on some ARMv7-M
19826 implementations. */
19827 /* Use the second register of the pair to avoid problematic
19828 overlap. */
19829 otherops[1] = operands[1];
19830 if (emit)
19831 output_asm_insn ("adr%?\t%0, %1", otherops);
19832 operands[1] = otherops[0];
19833 if (emit)
19834 {
19835 if (can_ldrd)
19836 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
19837 else
19838 output_asm_insn ("ldmia%?\t%1, %M0", operands);
19839 }
19840
19841 if (count)
19842 *count = 2;
19843 break;
19844
19845 /* ??? This needs checking for thumb2. */
19846 default:
19847 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
19848 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
19849 {
19850 otherops[0] = operands[0];
19851 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
19852 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
19853
19854 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
19855 {
19856 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
19857 {
19858 switch ((int) INTVAL (otherops[2]))
19859 {
19860 case -8:
19861 if (emit)
19862 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
19863 return "";
19864 case -4:
19865 if (TARGET_THUMB2)
19866 break;
19867 if (emit)
19868 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
19869 return "";
19870 case 4:
19871 if (TARGET_THUMB2)
19872 break;
19873 if (emit)
19874 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
19875 return "";
19876 }
19877 }
19878 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
19879 operands[1] = otherops[0];
19880 if (can_ldrd
19881 && (REG_P (otherops[2])
19882 || TARGET_THUMB2
19883 || (CONST_INT_P (otherops[2])
19884 && INTVAL (otherops[2]) > -256
19885 && INTVAL (otherops[2]) < 256)))
19886 {
19887 if (reg_overlap_mentioned_p (operands[0],
19888 otherops[2]))
19889 {
19890 /* Swap base and index registers over to
19891 avoid a conflict. */
19892 std::swap (otherops[1], otherops[2]);
19893 }
19894 /* If both registers conflict, it will usually
19895 have been fixed by a splitter. */
19896 if (reg_overlap_mentioned_p (operands[0], otherops[2])
19897 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
19898 {
19899 if (emit)
19900 {
19901 output_asm_insn ("add%?\t%0, %1, %2", otherops);
19902 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
19903 }
19904 if (count)
19905 *count = 2;
19906 }
19907 else
19908 {
19909 otherops[0] = operands[0];
19910 if (emit)
19911 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
19912 }
19913 return "";
19914 }
19915
19916 if (CONST_INT_P (otherops[2]))
19917 {
19918 if (emit)
19919 {
19920 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
19921 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
19922 else
19923 output_asm_insn ("add%?\t%0, %1, %2", otherops);
19924 }
19925 }
19926 else
19927 {
19928 if (emit)
19929 output_asm_insn ("add%?\t%0, %1, %2", otherops);
19930 }
19931 }
19932 else
19933 {
19934 if (emit)
19935 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
19936 }
19937
19938 if (count)
19939 *count = 2;
19940
19941 if (can_ldrd)
19942 return "ldrd%?\t%0, [%1]";
19943
19944 return "ldmia%?\t%1, %M0";
19945 }
19946 else
19947 {
19948 otherops[1] = adjust_address (operands[1], SImode, 4);
19949 /* Take care of overlapping base/data reg. */
19950 if (reg_mentioned_p (operands[0], operands[1]))
19951 {
19952 if (emit)
19953 {
19954 output_asm_insn ("ldr%?\t%0, %1", otherops);
19955 output_asm_insn ("ldr%?\t%0, %1", operands);
19956 }
19957 if (count)
19958 *count = 2;
19959
19960 }
19961 else
19962 {
19963 if (emit)
19964 {
19965 output_asm_insn ("ldr%?\t%0, %1", operands);
19966 output_asm_insn ("ldr%?\t%0, %1", otherops);
19967 }
19968 if (count)
19969 *count = 2;
19970 }
19971 }
19972 }
19973 }
19974 else
19975 {
19976 /* Constraints should ensure this. */
19977 gcc_assert (code0 == MEM && code1 == REG);
19978 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
19979 || (TARGET_ARM && TARGET_LDRD));
19980
19981 /* For TARGET_ARM the first source register of an STRD
19982 must be even. This is usually the case for double-word
19983 values but user assembly constraints can force an odd
19984 starting register. */
19985 bool allow_strd = TARGET_LDRD
19986 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
19987 switch (GET_CODE (XEXP (operands[0], 0)))
19988 {
19989 case REG:
19990 if (emit)
19991 {
19992 if (allow_strd)
19993 output_asm_insn ("strd%?\t%1, [%m0]", operands);
19994 else
19995 output_asm_insn ("stm%?\t%m0, %M1", operands);
19996 }
19997 break;
19998
19999 case PRE_INC:
20000 gcc_assert (allow_strd);
20001 if (emit)
20002 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20003 break;
20004
20005 case PRE_DEC:
20006 if (emit)
20007 {
20008 if (allow_strd)
20009 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20010 else
20011 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20012 }
20013 break;
20014
20015 case POST_INC:
20016 if (emit)
20017 {
20018 if (allow_strd)
20019 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20020 else
20021 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20022 }
20023 break;
20024
20025 case POST_DEC:
20026 gcc_assert (allow_strd);
20027 if (emit)
20028 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20029 break;
20030
20031 case PRE_MODIFY:
20032 case POST_MODIFY:
20033 otherops[0] = operands[1];
20034 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20035 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20036
20037 /* IWMMXT allows offsets larger than strd can handle,
20038 fix these up with a pair of str. */
20039 if (!TARGET_THUMB2
20040 && CONST_INT_P (otherops[2])
20041 && (INTVAL(otherops[2]) <= -256
20042 || INTVAL(otherops[2]) >= 256))
20043 {
20044 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20045 {
20046 if (emit)
20047 {
20048 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20049 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20050 }
20051 if (count)
20052 *count = 2;
20053 }
20054 else
20055 {
20056 if (emit)
20057 {
20058 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20059 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20060 }
20061 if (count)
20062 *count = 2;
20063 }
20064 }
20065 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20066 {
20067 if (emit)
20068 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20069 }
20070 else
20071 {
20072 if (emit)
20073 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20074 }
20075 break;
20076
20077 case PLUS:
20078 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20079 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20080 {
20081 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20082 {
20083 case -8:
20084 if (emit)
20085 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20086 return "";
20087
20088 case -4:
20089 if (TARGET_THUMB2)
20090 break;
20091 if (emit)
20092 output_asm_insn ("stmda%?\t%m0, %M1", operands);
20093 return "";
20094
20095 case 4:
20096 if (TARGET_THUMB2)
20097 break;
20098 if (emit)
20099 output_asm_insn ("stmib%?\t%m0, %M1", operands);
20100 return "";
20101 }
20102 }
20103 if (allow_strd
20104 && (REG_P (otherops[2])
20105 || TARGET_THUMB2
20106 || (CONST_INT_P (otherops[2])
20107 && INTVAL (otherops[2]) > -256
20108 && INTVAL (otherops[2]) < 256)))
20109 {
20110 otherops[0] = operands[1];
20111 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20112 if (emit)
20113 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20114 return "";
20115 }
20116 /* Fall through */
20117
20118 default:
20119 otherops[0] = adjust_address (operands[0], SImode, 4);
20120 otherops[1] = operands[1];
20121 if (emit)
20122 {
20123 output_asm_insn ("str%?\t%1, %0", operands);
20124 output_asm_insn ("str%?\t%H1, %0", otherops);
20125 }
20126 if (count)
20127 *count = 2;
20128 }
20129 }
20130
20131 return "";
20132 }
20133
20134 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20135 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20136
20137 const char *
20138 output_move_quad (rtx *operands)
20139 {
20140 if (REG_P (operands[0]))
20141 {
20142 /* Load, or reg->reg move. */
20143
20144 if (MEM_P (operands[1]))
20145 {
20146 switch (GET_CODE (XEXP (operands[1], 0)))
20147 {
20148 case REG:
20149 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20150 break;
20151
20152 case LABEL_REF:
20153 case CONST:
20154 output_asm_insn ("adr%?\t%0, %1", operands);
20155 output_asm_insn ("ldmia%?\t%0, %M0", operands);
20156 break;
20157
20158 default:
20159 gcc_unreachable ();
20160 }
20161 }
20162 else
20163 {
20164 rtx ops[2];
20165 int dest, src, i;
20166
20167 gcc_assert (REG_P (operands[1]));
20168
20169 dest = REGNO (operands[0]);
20170 src = REGNO (operands[1]);
20171
20172 /* This seems pretty dumb, but hopefully GCC won't try to do it
20173 very often. */
20174 if (dest < src)
20175 for (i = 0; i < 4; i++)
20176 {
20177 ops[0] = gen_rtx_REG (SImode, dest + i);
20178 ops[1] = gen_rtx_REG (SImode, src + i);
20179 output_asm_insn ("mov%?\t%0, %1", ops);
20180 }
20181 else
20182 for (i = 3; i >= 0; i--)
20183 {
20184 ops[0] = gen_rtx_REG (SImode, dest + i);
20185 ops[1] = gen_rtx_REG (SImode, src + i);
20186 output_asm_insn ("mov%?\t%0, %1", ops);
20187 }
20188 }
20189 }
20190 else
20191 {
20192 gcc_assert (MEM_P (operands[0]));
20193 gcc_assert (REG_P (operands[1]));
20194 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20195
20196 switch (GET_CODE (XEXP (operands[0], 0)))
20197 {
20198 case REG:
20199 output_asm_insn ("stm%?\t%m0, %M1", operands);
20200 break;
20201
20202 default:
20203 gcc_unreachable ();
20204 }
20205 }
20206
20207 return "";
20208 }
20209
20210 /* Output a VFP load or store instruction. */
20211
20212 const char *
20213 output_move_vfp (rtx *operands)
20214 {
20215 rtx reg, mem, addr, ops[2];
20216 int load = REG_P (operands[0]);
20217 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20218 int sp = (!TARGET_VFP_FP16INST
20219 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20220 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20221 const char *templ;
20222 char buff[50];
20223 machine_mode mode;
20224
20225 reg = operands[!load];
20226 mem = operands[load];
20227
20228 mode = GET_MODE (reg);
20229
20230 gcc_assert (REG_P (reg));
20231 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20232 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20233 || mode == SFmode
20234 || mode == DFmode
20235 || mode == HImode
20236 || mode == SImode
20237 || mode == DImode
20238 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20239 gcc_assert (MEM_P (mem));
20240
20241 addr = XEXP (mem, 0);
20242
20243 switch (GET_CODE (addr))
20244 {
20245 case PRE_DEC:
20246 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20247 ops[0] = XEXP (addr, 0);
20248 ops[1] = reg;
20249 break;
20250
20251 case POST_INC:
20252 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20253 ops[0] = XEXP (addr, 0);
20254 ops[1] = reg;
20255 break;
20256
20257 default:
20258 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20259 ops[0] = reg;
20260 ops[1] = mem;
20261 break;
20262 }
20263
20264 sprintf (buff, templ,
20265 load ? "ld" : "st",
20266 dp ? "64" : sp ? "32" : "16",
20267 dp ? "P" : "",
20268 integer_p ? "\t%@ int" : "");
20269 output_asm_insn (buff, ops);
20270
20271 return "";
20272 }
20273
20274 /* Output a Neon double-word or quad-word load or store, or a load
20275 or store for larger structure modes.
20276
20277 WARNING: The ordering of elements is weird in big-endian mode,
20278 because the EABI requires that vectors stored in memory appear
20279 as though they were stored by a VSTM, as required by the EABI.
20280 GCC RTL defines element ordering based on in-memory order.
20281 This can be different from the architectural ordering of elements
20282 within a NEON register. The intrinsics defined in arm_neon.h use the
20283 NEON register element ordering, not the GCC RTL element ordering.
20284
20285 For example, the in-memory ordering of a big-endian a quadword
20286 vector with 16-bit elements when stored from register pair {d0,d1}
20287 will be (lowest address first, d0[N] is NEON register element N):
20288
20289 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20290
20291 When necessary, quadword registers (dN, dN+1) are moved to ARM
20292 registers from rN in the order:
20293
20294 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20295
20296 So that STM/LDM can be used on vectors in ARM registers, and the
20297 same memory layout will result as if VSTM/VLDM were used.
20298
20299 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20300 possible, which allows use of appropriate alignment tags.
20301 Note that the choice of "64" is independent of the actual vector
20302 element size; this size simply ensures that the behavior is
20303 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20304
20305 Due to limitations of those instructions, use of VST1.64/VLD1.64
20306 is not possible if:
20307 - the address contains PRE_DEC, or
20308 - the mode refers to more than 4 double-word registers
20309
20310 In those cases, it would be possible to replace VSTM/VLDM by a
20311 sequence of instructions; this is not currently implemented since
20312 this is not certain to actually improve performance. */
20313
20314 const char *
20315 output_move_neon (rtx *operands)
20316 {
20317 rtx reg, mem, addr, ops[2];
20318 int regno, nregs, load = REG_P (operands[0]);
20319 const char *templ;
20320 char buff[50];
20321 machine_mode mode;
20322
20323 reg = operands[!load];
20324 mem = operands[load];
20325
20326 mode = GET_MODE (reg);
20327
20328 gcc_assert (REG_P (reg));
20329 regno = REGNO (reg);
20330 nregs = REG_NREGS (reg) / 2;
20331 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20332 || NEON_REGNO_OK_FOR_QUAD (regno));
20333 gcc_assert (VALID_NEON_DREG_MODE (mode)
20334 || VALID_NEON_QREG_MODE (mode)
20335 || VALID_NEON_STRUCT_MODE (mode));
20336 gcc_assert (MEM_P (mem));
20337
20338 addr = XEXP (mem, 0);
20339
20340 /* Strip off const from addresses like (const (plus (...))). */
20341 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20342 addr = XEXP (addr, 0);
20343
20344 switch (GET_CODE (addr))
20345 {
20346 case POST_INC:
20347 /* We have to use vldm / vstm for too-large modes. */
20348 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20349 {
20350 templ = "v%smia%%?\t%%0!, %%h1";
20351 ops[0] = XEXP (addr, 0);
20352 }
20353 else
20354 {
20355 templ = "v%s1.64\t%%h1, %%A0";
20356 ops[0] = mem;
20357 }
20358 ops[1] = reg;
20359 break;
20360
20361 case PRE_DEC:
20362 /* We have to use vldm / vstm in this case, since there is no
20363 pre-decrement form of the vld1 / vst1 instructions. */
20364 templ = "v%smdb%%?\t%%0!, %%h1";
20365 ops[0] = XEXP (addr, 0);
20366 ops[1] = reg;
20367 break;
20368
20369 case POST_MODIFY:
20370 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20371 gcc_unreachable ();
20372
20373 case REG:
20374 /* We have to use vldm / vstm for too-large modes. */
20375 if (nregs > 1)
20376 {
20377 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20378 templ = "v%smia%%?\t%%m0, %%h1";
20379 else
20380 templ = "v%s1.64\t%%h1, %%A0";
20381
20382 ops[0] = mem;
20383 ops[1] = reg;
20384 break;
20385 }
20386 /* Fall through. */
20387 case PLUS:
20388 if (GET_CODE (addr) == PLUS)
20389 addr = XEXP (addr, 0);
20390 /* Fall through. */
20391 case LABEL_REF:
20392 {
20393 int i;
20394 int overlap = -1;
20395 for (i = 0; i < nregs; i++)
20396 {
20397 /* We're only using DImode here because it's a convenient
20398 size. */
20399 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20400 ops[1] = adjust_address (mem, DImode, 8 * i);
20401 if (reg_overlap_mentioned_p (ops[0], mem))
20402 {
20403 gcc_assert (overlap == -1);
20404 overlap = i;
20405 }
20406 else
20407 {
20408 if (TARGET_HAVE_MVE && GET_CODE (addr) == LABEL_REF)
20409 sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20410 else
20411 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20412 output_asm_insn (buff, ops);
20413 }
20414 }
20415 if (overlap != -1)
20416 {
20417 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20418 ops[1] = adjust_address (mem, SImode, 8 * overlap);
20419 if (TARGET_HAVE_MVE && GET_CODE (addr) == LABEL_REF)
20420 sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20421 else
20422 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20423 output_asm_insn (buff, ops);
20424 }
20425
20426 return "";
20427 }
20428
20429 default:
20430 gcc_unreachable ();
20431 }
20432
20433 sprintf (buff, templ, load ? "ld" : "st");
20434 output_asm_insn (buff, ops);
20435
20436 return "";
20437 }
20438
20439 /* Compute and return the length of neon_mov<mode>, where <mode> is
20440 one of VSTRUCT modes: EI, OI, CI or XI. */
20441 int
20442 arm_attr_length_move_neon (rtx_insn *insn)
20443 {
20444 rtx reg, mem, addr;
20445 int load;
20446 machine_mode mode;
20447
20448 extract_insn_cached (insn);
20449
20450 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20451 {
20452 mode = GET_MODE (recog_data.operand[0]);
20453 switch (mode)
20454 {
20455 case E_EImode:
20456 case E_OImode:
20457 return 8;
20458 case E_CImode:
20459 return 12;
20460 case E_XImode:
20461 return 16;
20462 default:
20463 gcc_unreachable ();
20464 }
20465 }
20466
20467 load = REG_P (recog_data.operand[0]);
20468 reg = recog_data.operand[!load];
20469 mem = recog_data.operand[load];
20470
20471 gcc_assert (MEM_P (mem));
20472
20473 addr = XEXP (mem, 0);
20474
20475 /* Strip off const from addresses like (const (plus (...))). */
20476 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20477 addr = XEXP (addr, 0);
20478
20479 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
20480 {
20481 int insns = REG_NREGS (reg) / 2;
20482 return insns * 4;
20483 }
20484 else
20485 return 4;
20486 }
20487
20488 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20489 return zero. */
20490
20491 int
20492 arm_address_offset_is_imm (rtx_insn *insn)
20493 {
20494 rtx mem, addr;
20495
20496 extract_insn_cached (insn);
20497
20498 if (REG_P (recog_data.operand[0]))
20499 return 0;
20500
20501 mem = recog_data.operand[0];
20502
20503 gcc_assert (MEM_P (mem));
20504
20505 addr = XEXP (mem, 0);
20506
20507 if (REG_P (addr)
20508 || (GET_CODE (addr) == PLUS
20509 && REG_P (XEXP (addr, 0))
20510 && CONST_INT_P (XEXP (addr, 1))))
20511 return 1;
20512 else
20513 return 0;
20514 }
20515
20516 /* Output an ADD r, s, #n where n may be too big for one instruction.
20517 If adding zero to one register, output nothing. */
20518 const char *
20519 output_add_immediate (rtx *operands)
20520 {
20521 HOST_WIDE_INT n = INTVAL (operands[2]);
20522
20523 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20524 {
20525 if (n < 0)
20526 output_multi_immediate (operands,
20527 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20528 -n);
20529 else
20530 output_multi_immediate (operands,
20531 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20532 n);
20533 }
20534
20535 return "";
20536 }
20537
20538 /* Output a multiple immediate operation.
20539 OPERANDS is the vector of operands referred to in the output patterns.
20540 INSTR1 is the output pattern to use for the first constant.
20541 INSTR2 is the output pattern to use for subsequent constants.
20542 IMMED_OP is the index of the constant slot in OPERANDS.
20543 N is the constant value. */
20544 static const char *
20545 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20546 int immed_op, HOST_WIDE_INT n)
20547 {
20548 #if HOST_BITS_PER_WIDE_INT > 32
20549 n &= 0xffffffff;
20550 #endif
20551
20552 if (n == 0)
20553 {
20554 /* Quick and easy output. */
20555 operands[immed_op] = const0_rtx;
20556 output_asm_insn (instr1, operands);
20557 }
20558 else
20559 {
20560 int i;
20561 const char * instr = instr1;
20562
20563 /* Note that n is never zero here (which would give no output). */
20564 for (i = 0; i < 32; i += 2)
20565 {
20566 if (n & (3 << i))
20567 {
20568 operands[immed_op] = GEN_INT (n & (255 << i));
20569 output_asm_insn (instr, operands);
20570 instr = instr2;
20571 i += 6;
20572 }
20573 }
20574 }
20575
20576 return "";
20577 }
20578
20579 /* Return the name of a shifter operation. */
20580 static const char *
20581 arm_shift_nmem(enum rtx_code code)
20582 {
20583 switch (code)
20584 {
20585 case ASHIFT:
20586 return ARM_LSL_NAME;
20587
20588 case ASHIFTRT:
20589 return "asr";
20590
20591 case LSHIFTRT:
20592 return "lsr";
20593
20594 case ROTATERT:
20595 return "ror";
20596
20597 default:
20598 abort();
20599 }
20600 }
20601
20602 /* Return the appropriate ARM instruction for the operation code.
20603 The returned result should not be overwritten. OP is the rtx of the
20604 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20605 was shifted. */
20606 const char *
20607 arithmetic_instr (rtx op, int shift_first_arg)
20608 {
20609 switch (GET_CODE (op))
20610 {
20611 case PLUS:
20612 return "add";
20613
20614 case MINUS:
20615 return shift_first_arg ? "rsb" : "sub";
20616
20617 case IOR:
20618 return "orr";
20619
20620 case XOR:
20621 return "eor";
20622
20623 case AND:
20624 return "and";
20625
20626 case ASHIFT:
20627 case ASHIFTRT:
20628 case LSHIFTRT:
20629 case ROTATERT:
20630 return arm_shift_nmem(GET_CODE(op));
20631
20632 default:
20633 gcc_unreachable ();
20634 }
20635 }
20636
20637 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20638 for the operation code. The returned result should not be overwritten.
20639 OP is the rtx code of the shift.
20640 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20641 shift. */
20642 static const char *
20643 shift_op (rtx op, HOST_WIDE_INT *amountp)
20644 {
20645 const char * mnem;
20646 enum rtx_code code = GET_CODE (op);
20647
20648 switch (code)
20649 {
20650 case ROTATE:
20651 if (!CONST_INT_P (XEXP (op, 1)))
20652 {
20653 output_operand_lossage ("invalid shift operand");
20654 return NULL;
20655 }
20656
20657 code = ROTATERT;
20658 *amountp = 32 - INTVAL (XEXP (op, 1));
20659 mnem = "ror";
20660 break;
20661
20662 case ASHIFT:
20663 case ASHIFTRT:
20664 case LSHIFTRT:
20665 case ROTATERT:
20666 mnem = arm_shift_nmem(code);
20667 if (CONST_INT_P (XEXP (op, 1)))
20668 {
20669 *amountp = INTVAL (XEXP (op, 1));
20670 }
20671 else if (REG_P (XEXP (op, 1)))
20672 {
20673 *amountp = -1;
20674 return mnem;
20675 }
20676 else
20677 {
20678 output_operand_lossage ("invalid shift operand");
20679 return NULL;
20680 }
20681 break;
20682
20683 case MULT:
20684 /* We never have to worry about the amount being other than a
20685 power of 2, since this case can never be reloaded from a reg. */
20686 if (!CONST_INT_P (XEXP (op, 1)))
20687 {
20688 output_operand_lossage ("invalid shift operand");
20689 return NULL;
20690 }
20691
20692 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
20693
20694 /* Amount must be a power of two. */
20695 if (*amountp & (*amountp - 1))
20696 {
20697 output_operand_lossage ("invalid shift operand");
20698 return NULL;
20699 }
20700
20701 *amountp = exact_log2 (*amountp);
20702 gcc_assert (IN_RANGE (*amountp, 0, 31));
20703 return ARM_LSL_NAME;
20704
20705 default:
20706 output_operand_lossage ("invalid shift operand");
20707 return NULL;
20708 }
20709
20710 /* This is not 100% correct, but follows from the desire to merge
20711 multiplication by a power of 2 with the recognizer for a
20712 shift. >=32 is not a valid shift for "lsl", so we must try and
20713 output a shift that produces the correct arithmetical result.
20714 Using lsr #32 is identical except for the fact that the carry bit
20715 is not set correctly if we set the flags; but we never use the
20716 carry bit from such an operation, so we can ignore that. */
20717 if (code == ROTATERT)
20718 /* Rotate is just modulo 32. */
20719 *amountp &= 31;
20720 else if (*amountp != (*amountp & 31))
20721 {
20722 if (code == ASHIFT)
20723 mnem = "lsr";
20724 *amountp = 32;
20725 }
20726
20727 /* Shifts of 0 are no-ops. */
20728 if (*amountp == 0)
20729 return NULL;
20730
20731 return mnem;
20732 }
20733
20734 /* Output a .ascii pseudo-op, keeping track of lengths. This is
20735 because /bin/as is horribly restrictive. The judgement about
20736 whether or not each character is 'printable' (and can be output as
20737 is) or not (and must be printed with an octal escape) must be made
20738 with reference to the *host* character set -- the situation is
20739 similar to that discussed in the comments above pp_c_char in
20740 c-pretty-print.c. */
20741
20742 #define MAX_ASCII_LEN 51
20743
20744 void
20745 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
20746 {
20747 int i;
20748 int len_so_far = 0;
20749
20750 fputs ("\t.ascii\t\"", stream);
20751
20752 for (i = 0; i < len; i++)
20753 {
20754 int c = p[i];
20755
20756 if (len_so_far >= MAX_ASCII_LEN)
20757 {
20758 fputs ("\"\n\t.ascii\t\"", stream);
20759 len_so_far = 0;
20760 }
20761
20762 if (ISPRINT (c))
20763 {
20764 if (c == '\\' || c == '\"')
20765 {
20766 putc ('\\', stream);
20767 len_so_far++;
20768 }
20769 putc (c, stream);
20770 len_so_far++;
20771 }
20772 else
20773 {
20774 fprintf (stream, "\\%03o", c);
20775 len_so_far += 4;
20776 }
20777 }
20778
20779 fputs ("\"\n", stream);
20780 }
20781 \f
20782
20783 /* Compute the register save mask for registers 0 through 12
20784 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
20785
20786 static unsigned long
20787 arm_compute_save_reg0_reg12_mask (void)
20788 {
20789 unsigned long func_type = arm_current_func_type ();
20790 unsigned long save_reg_mask = 0;
20791 unsigned int reg;
20792
20793 if (IS_INTERRUPT (func_type))
20794 {
20795 unsigned int max_reg;
20796 /* Interrupt functions must not corrupt any registers,
20797 even call clobbered ones. If this is a leaf function
20798 we can just examine the registers used by the RTL, but
20799 otherwise we have to assume that whatever function is
20800 called might clobber anything, and so we have to save
20801 all the call-clobbered registers as well. */
20802 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
20803 /* FIQ handlers have registers r8 - r12 banked, so
20804 we only need to check r0 - r7, Normal ISRs only
20805 bank r14 and r15, so we must check up to r12.
20806 r13 is the stack pointer which is always preserved,
20807 so we do not need to consider it here. */
20808 max_reg = 7;
20809 else
20810 max_reg = 12;
20811
20812 for (reg = 0; reg <= max_reg; reg++)
20813 if (reg_needs_saving_p (reg))
20814 save_reg_mask |= (1 << reg);
20815
20816 /* Also save the pic base register if necessary. */
20817 if (PIC_REGISTER_MAY_NEED_SAVING
20818 && crtl->uses_pic_offset_table)
20819 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
20820 }
20821 else if (IS_VOLATILE(func_type))
20822 {
20823 /* For noreturn functions we historically omitted register saves
20824 altogether. However this really messes up debugging. As a
20825 compromise save just the frame pointers. Combined with the link
20826 register saved elsewhere this should be sufficient to get
20827 a backtrace. */
20828 if (frame_pointer_needed)
20829 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
20830 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
20831 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
20832 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
20833 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
20834 }
20835 else
20836 {
20837 /* In the normal case we only need to save those registers
20838 which are call saved and which are used by this function. */
20839 for (reg = 0; reg <= 11; reg++)
20840 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
20841 save_reg_mask |= (1 << reg);
20842
20843 /* Handle the frame pointer as a special case. */
20844 if (frame_pointer_needed)
20845 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
20846
20847 /* If we aren't loading the PIC register,
20848 don't stack it even though it may be live. */
20849 if (PIC_REGISTER_MAY_NEED_SAVING
20850 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
20851 || crtl->uses_pic_offset_table))
20852 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
20853
20854 /* The prologue will copy SP into R0, so save it. */
20855 if (IS_STACKALIGN (func_type))
20856 save_reg_mask |= 1;
20857 }
20858
20859 /* Save registers so the exception handler can modify them. */
20860 if (crtl->calls_eh_return)
20861 {
20862 unsigned int i;
20863
20864 for (i = 0; ; i++)
20865 {
20866 reg = EH_RETURN_DATA_REGNO (i);
20867 if (reg == INVALID_REGNUM)
20868 break;
20869 save_reg_mask |= 1 << reg;
20870 }
20871 }
20872
20873 return save_reg_mask;
20874 }
20875
20876 /* Return true if r3 is live at the start of the function. */
20877
20878 static bool
20879 arm_r3_live_at_start_p (void)
20880 {
20881 /* Just look at cfg info, which is still close enough to correct at this
20882 point. This gives false positives for broken functions that might use
20883 uninitialized data that happens to be allocated in r3, but who cares? */
20884 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
20885 }
20886
20887 /* Compute the number of bytes used to store the static chain register on the
20888 stack, above the stack frame. We need to know this accurately to get the
20889 alignment of the rest of the stack frame correct. */
20890
20891 static int
20892 arm_compute_static_chain_stack_bytes (void)
20893 {
20894 /* Once the value is updated from the init value of -1, do not
20895 re-compute. */
20896 if (cfun->machine->static_chain_stack_bytes != -1)
20897 return cfun->machine->static_chain_stack_bytes;
20898
20899 /* See the defining assertion in arm_expand_prologue. */
20900 if (IS_NESTED (arm_current_func_type ())
20901 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20902 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
20903 || flag_stack_clash_protection)
20904 && !df_regs_ever_live_p (LR_REGNUM)))
20905 && arm_r3_live_at_start_p ()
20906 && crtl->args.pretend_args_size == 0)
20907 return 4;
20908
20909 return 0;
20910 }
20911
20912 /* Compute a bit mask of which core registers need to be
20913 saved on the stack for the current function.
20914 This is used by arm_compute_frame_layout, which may add extra registers. */
20915
20916 static unsigned long
20917 arm_compute_save_core_reg_mask (void)
20918 {
20919 unsigned int save_reg_mask = 0;
20920 unsigned long func_type = arm_current_func_type ();
20921 unsigned int reg;
20922
20923 if (IS_NAKED (func_type))
20924 /* This should never really happen. */
20925 return 0;
20926
20927 /* If we are creating a stack frame, then we must save the frame pointer,
20928 IP (which will hold the old stack pointer), LR and the PC. */
20929 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20930 save_reg_mask |=
20931 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
20932 | (1 << IP_REGNUM)
20933 | (1 << LR_REGNUM)
20934 | (1 << PC_REGNUM);
20935
20936 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
20937
20938 /* Decide if we need to save the link register.
20939 Interrupt routines have their own banked link register,
20940 so they never need to save it.
20941 Otherwise if we do not use the link register we do not need to save
20942 it. If we are pushing other registers onto the stack however, we
20943 can save an instruction in the epilogue by pushing the link register
20944 now and then popping it back into the PC. This incurs extra memory
20945 accesses though, so we only do it when optimizing for size, and only
20946 if we know that we will not need a fancy return sequence. */
20947 if (df_regs_ever_live_p (LR_REGNUM)
20948 || (save_reg_mask
20949 && optimize_size
20950 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
20951 && !crtl->tail_call_emit
20952 && !crtl->calls_eh_return))
20953 save_reg_mask |= 1 << LR_REGNUM;
20954
20955 if (cfun->machine->lr_save_eliminated)
20956 save_reg_mask &= ~ (1 << LR_REGNUM);
20957
20958 if (TARGET_REALLY_IWMMXT
20959 && ((bit_count (save_reg_mask)
20960 + ARM_NUM_INTS (crtl->args.pretend_args_size +
20961 arm_compute_static_chain_stack_bytes())
20962 ) % 2) != 0)
20963 {
20964 /* The total number of registers that are going to be pushed
20965 onto the stack is odd. We need to ensure that the stack
20966 is 64-bit aligned before we start to save iWMMXt registers,
20967 and also before we start to create locals. (A local variable
20968 might be a double or long long which we will load/store using
20969 an iWMMXt instruction). Therefore we need to push another
20970 ARM register, so that the stack will be 64-bit aligned. We
20971 try to avoid using the arg registers (r0 -r3) as they might be
20972 used to pass values in a tail call. */
20973 for (reg = 4; reg <= 12; reg++)
20974 if ((save_reg_mask & (1 << reg)) == 0)
20975 break;
20976
20977 if (reg <= 12)
20978 save_reg_mask |= (1 << reg);
20979 else
20980 {
20981 cfun->machine->sibcall_blocked = 1;
20982 save_reg_mask |= (1 << 3);
20983 }
20984 }
20985
20986 /* We may need to push an additional register for use initializing the
20987 PIC base register. */
20988 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
20989 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
20990 {
20991 reg = thumb_find_work_register (1 << 4);
20992 if (!call_used_or_fixed_reg_p (reg))
20993 save_reg_mask |= (1 << reg);
20994 }
20995
20996 return save_reg_mask;
20997 }
20998
20999 /* Compute a bit mask of which core registers need to be
21000 saved on the stack for the current function. */
21001 static unsigned long
21002 thumb1_compute_save_core_reg_mask (void)
21003 {
21004 unsigned long mask;
21005 unsigned reg;
21006
21007 mask = 0;
21008 for (reg = 0; reg < 12; reg ++)
21009 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21010 mask |= 1 << reg;
21011
21012 /* Handle the frame pointer as a special case. */
21013 if (frame_pointer_needed)
21014 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21015
21016 if (flag_pic
21017 && !TARGET_SINGLE_PIC_BASE
21018 && arm_pic_register != INVALID_REGNUM
21019 && crtl->uses_pic_offset_table)
21020 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21021
21022 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21023 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21024 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21025
21026 /* LR will also be pushed if any lo regs are pushed. */
21027 if (mask & 0xff || thumb_force_lr_save ())
21028 mask |= (1 << LR_REGNUM);
21029
21030 bool call_clobbered_scratch
21031 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21032 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21033
21034 /* Make sure we have a low work register if we need one. We will
21035 need one if we are going to push a high register, but we are not
21036 currently intending to push a low register. However if both the
21037 prologue and epilogue have a spare call-clobbered low register,
21038 then we won't need to find an additional work register. It does
21039 not need to be the same register in the prologue and
21040 epilogue. */
21041 if ((mask & 0xff) == 0
21042 && !call_clobbered_scratch
21043 && ((mask & 0x0f00) || TARGET_BACKTRACE))
21044 {
21045 /* Use thumb_find_work_register to choose which register
21046 we will use. If the register is live then we will
21047 have to push it. Use LAST_LO_REGNUM as our fallback
21048 choice for the register to select. */
21049 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21050 /* Make sure the register returned by thumb_find_work_register is
21051 not part of the return value. */
21052 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21053 reg = LAST_LO_REGNUM;
21054
21055 if (callee_saved_reg_p (reg))
21056 mask |= 1 << reg;
21057 }
21058
21059 /* The 504 below is 8 bytes less than 512 because there are two possible
21060 alignment words. We can't tell here if they will be present or not so we
21061 have to play it safe and assume that they are. */
21062 if ((CALLER_INTERWORKING_SLOT_SIZE +
21063 ROUND_UP_WORD (get_frame_size ()) +
21064 crtl->outgoing_args_size) >= 504)
21065 {
21066 /* This is the same as the code in thumb1_expand_prologue() which
21067 determines which register to use for stack decrement. */
21068 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21069 if (mask & (1 << reg))
21070 break;
21071
21072 if (reg > LAST_LO_REGNUM)
21073 {
21074 /* Make sure we have a register available for stack decrement. */
21075 mask |= 1 << LAST_LO_REGNUM;
21076 }
21077 }
21078
21079 return mask;
21080 }
21081
21082 /* Return the number of bytes required to save VFP registers. */
21083 static int
21084 arm_get_vfp_saved_size (void)
21085 {
21086 unsigned int regno;
21087 int count;
21088 int saved;
21089
21090 saved = 0;
21091 /* Space for saved VFP registers. */
21092 if (TARGET_VFP_BASE)
21093 {
21094 count = 0;
21095 for (regno = FIRST_VFP_REGNUM;
21096 regno < LAST_VFP_REGNUM;
21097 regno += 2)
21098 {
21099 if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21100 {
21101 if (count > 0)
21102 {
21103 /* Workaround ARM10 VFPr1 bug. */
21104 if (count == 2 && !arm_arch6)
21105 count++;
21106 saved += count * 8;
21107 }
21108 count = 0;
21109 }
21110 else
21111 count++;
21112 }
21113 if (count > 0)
21114 {
21115 if (count == 2 && !arm_arch6)
21116 count++;
21117 saved += count * 8;
21118 }
21119 }
21120 return saved;
21121 }
21122
21123
21124 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21125 everything bar the final return instruction. If simple_return is true,
21126 then do not output epilogue, because it has already been emitted in RTL.
21127
21128 Note: do not forget to update length attribute of corresponding insn pattern
21129 when changing assembly output (eg. length attribute of
21130 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21131 register clearing sequences). */
21132 const char *
21133 output_return_instruction (rtx operand, bool really_return, bool reverse,
21134 bool simple_return)
21135 {
21136 char conditional[10];
21137 char instr[100];
21138 unsigned reg;
21139 unsigned long live_regs_mask;
21140 unsigned long func_type;
21141 arm_stack_offsets *offsets;
21142
21143 func_type = arm_current_func_type ();
21144
21145 if (IS_NAKED (func_type))
21146 return "";
21147
21148 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21149 {
21150 /* If this function was declared non-returning, and we have
21151 found a tail call, then we have to trust that the called
21152 function won't return. */
21153 if (really_return)
21154 {
21155 rtx ops[2];
21156
21157 /* Otherwise, trap an attempted return by aborting. */
21158 ops[0] = operand;
21159 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21160 : "abort");
21161 assemble_external_libcall (ops[1]);
21162 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21163 }
21164
21165 return "";
21166 }
21167
21168 gcc_assert (!cfun->calls_alloca || really_return);
21169
21170 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21171
21172 cfun->machine->return_used_this_function = 1;
21173
21174 offsets = arm_get_frame_offsets ();
21175 live_regs_mask = offsets->saved_regs_mask;
21176
21177 if (!simple_return && live_regs_mask)
21178 {
21179 const char * return_reg;
21180
21181 /* If we do not have any special requirements for function exit
21182 (e.g. interworking) then we can load the return address
21183 directly into the PC. Otherwise we must load it into LR. */
21184 if (really_return
21185 && !IS_CMSE_ENTRY (func_type)
21186 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21187 return_reg = reg_names[PC_REGNUM];
21188 else
21189 return_reg = reg_names[LR_REGNUM];
21190
21191 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21192 {
21193 /* There are three possible reasons for the IP register
21194 being saved. 1) a stack frame was created, in which case
21195 IP contains the old stack pointer, or 2) an ISR routine
21196 corrupted it, or 3) it was saved to align the stack on
21197 iWMMXt. In case 1, restore IP into SP, otherwise just
21198 restore IP. */
21199 if (frame_pointer_needed)
21200 {
21201 live_regs_mask &= ~ (1 << IP_REGNUM);
21202 live_regs_mask |= (1 << SP_REGNUM);
21203 }
21204 else
21205 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21206 }
21207
21208 /* On some ARM architectures it is faster to use LDR rather than
21209 LDM to load a single register. On other architectures, the
21210 cost is the same. In 26 bit mode, or for exception handlers,
21211 we have to use LDM to load the PC so that the CPSR is also
21212 restored. */
21213 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21214 if (live_regs_mask == (1U << reg))
21215 break;
21216
21217 if (reg <= LAST_ARM_REGNUM
21218 && (reg != LR_REGNUM
21219 || ! really_return
21220 || ! IS_INTERRUPT (func_type)))
21221 {
21222 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21223 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21224 }
21225 else
21226 {
21227 char *p;
21228 int first = 1;
21229
21230 /* Generate the load multiple instruction to restore the
21231 registers. Note we can get here, even if
21232 frame_pointer_needed is true, but only if sp already
21233 points to the base of the saved core registers. */
21234 if (live_regs_mask & (1 << SP_REGNUM))
21235 {
21236 unsigned HOST_WIDE_INT stack_adjust;
21237
21238 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21239 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21240
21241 if (stack_adjust && arm_arch5t && TARGET_ARM)
21242 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21243 else
21244 {
21245 /* If we can't use ldmib (SA110 bug),
21246 then try to pop r3 instead. */
21247 if (stack_adjust)
21248 live_regs_mask |= 1 << 3;
21249
21250 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21251 }
21252 }
21253 /* For interrupt returns we have to use an LDM rather than
21254 a POP so that we can use the exception return variant. */
21255 else if (IS_INTERRUPT (func_type))
21256 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21257 else
21258 sprintf (instr, "pop%s\t{", conditional);
21259
21260 p = instr + strlen (instr);
21261
21262 for (reg = 0; reg <= SP_REGNUM; reg++)
21263 if (live_regs_mask & (1 << reg))
21264 {
21265 int l = strlen (reg_names[reg]);
21266
21267 if (first)
21268 first = 0;
21269 else
21270 {
21271 memcpy (p, ", ", 2);
21272 p += 2;
21273 }
21274
21275 memcpy (p, "%|", 2);
21276 memcpy (p + 2, reg_names[reg], l);
21277 p += l + 2;
21278 }
21279
21280 if (live_regs_mask & (1 << LR_REGNUM))
21281 {
21282 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21283 /* If returning from an interrupt, restore the CPSR. */
21284 if (IS_INTERRUPT (func_type))
21285 strcat (p, "^");
21286 }
21287 else
21288 strcpy (p, "}");
21289 }
21290
21291 output_asm_insn (instr, & operand);
21292
21293 /* See if we need to generate an extra instruction to
21294 perform the actual function return. */
21295 if (really_return
21296 && func_type != ARM_FT_INTERWORKED
21297 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21298 {
21299 /* The return has already been handled
21300 by loading the LR into the PC. */
21301 return "";
21302 }
21303 }
21304
21305 if (really_return)
21306 {
21307 switch ((int) ARM_FUNC_TYPE (func_type))
21308 {
21309 case ARM_FT_ISR:
21310 case ARM_FT_FIQ:
21311 /* ??? This is wrong for unified assembly syntax. */
21312 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21313 break;
21314
21315 case ARM_FT_INTERWORKED:
21316 gcc_assert (arm_arch5t || arm_arch4t);
21317 sprintf (instr, "bx%s\t%%|lr", conditional);
21318 break;
21319
21320 case ARM_FT_EXCEPTION:
21321 /* ??? This is wrong for unified assembly syntax. */
21322 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21323 break;
21324
21325 default:
21326 if (IS_CMSE_ENTRY (func_type))
21327 {
21328 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21329 emitted by cmse_nonsecure_entry_clear_before_return () and the
21330 VSTR/VLDR instructions in the prologue and epilogue. */
21331 if (!TARGET_HAVE_FPCXT_CMSE)
21332 {
21333 /* Check if we have to clear the 'GE bits' which is only used if
21334 parallel add and subtraction instructions are available. */
21335 if (TARGET_INT_SIMD)
21336 snprintf (instr, sizeof (instr),
21337 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21338 else
21339 snprintf (instr, sizeof (instr),
21340 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21341
21342 output_asm_insn (instr, & operand);
21343 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21344 care of it. */
21345 if (TARGET_HARD_FLOAT)
21346 {
21347 /* Clear the cumulative exception-status bits (0-4,7) and
21348 the condition code bits (28-31) of the FPSCR. We need
21349 to remember to clear the first scratch register used
21350 (IP) and save and restore the second (r4).
21351
21352 Important note: the length of the
21353 thumb2_cmse_entry_return insn pattern must account for
21354 the size of the below instructions. */
21355 output_asm_insn ("push\t{%|r4}", & operand);
21356 output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21357 output_asm_insn ("movw\t%|r4, #65376", & operand);
21358 output_asm_insn ("movt\t%|r4, #4095", & operand);
21359 output_asm_insn ("and\t%|ip, %|r4", & operand);
21360 output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21361 output_asm_insn ("pop\t{%|r4}", & operand);
21362 output_asm_insn ("mov\t%|ip, %|lr", & operand);
21363 }
21364 }
21365 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21366 }
21367 /* Use bx if it's available. */
21368 else if (arm_arch5t || arm_arch4t)
21369 sprintf (instr, "bx%s\t%%|lr", conditional);
21370 else
21371 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21372 break;
21373 }
21374
21375 output_asm_insn (instr, & operand);
21376 }
21377
21378 return "";
21379 }
21380
21381 /* Output in FILE asm statements needed to declare the NAME of the function
21382 defined by its DECL node. */
21383
21384 void
21385 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21386 {
21387 size_t cmse_name_len;
21388 char *cmse_name = 0;
21389 char cmse_prefix[] = "__acle_se_";
21390
21391 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21392 extra function label for each function with the 'cmse_nonsecure_entry'
21393 attribute. This extra function label should be prepended with
21394 '__acle_se_', telling the linker that it needs to create secure gateway
21395 veneers for this function. */
21396 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21397 DECL_ATTRIBUTES (decl)))
21398 {
21399 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21400 cmse_name = XALLOCAVEC (char, cmse_name_len);
21401 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21402 targetm.asm_out.globalize_label (file, cmse_name);
21403
21404 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21405 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21406 }
21407
21408 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21409 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21410 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21411 ASM_OUTPUT_LABEL (file, name);
21412
21413 if (cmse_name)
21414 ASM_OUTPUT_LABEL (file, cmse_name);
21415
21416 ARM_OUTPUT_FN_UNWIND (file, TRUE);
21417 }
21418
21419 /* Write the function name into the code section, directly preceding
21420 the function prologue.
21421
21422 Code will be output similar to this:
21423 t0
21424 .ascii "arm_poke_function_name", 0
21425 .align
21426 t1
21427 .word 0xff000000 + (t1 - t0)
21428 arm_poke_function_name
21429 mov ip, sp
21430 stmfd sp!, {fp, ip, lr, pc}
21431 sub fp, ip, #4
21432
21433 When performing a stack backtrace, code can inspect the value
21434 of 'pc' stored at 'fp' + 0. If the trace function then looks
21435 at location pc - 12 and the top 8 bits are set, then we know
21436 that there is a function name embedded immediately preceding this
21437 location and has length ((pc[-3]) & 0xff000000).
21438
21439 We assume that pc is declared as a pointer to an unsigned long.
21440
21441 It is of no benefit to output the function name if we are assembling
21442 a leaf function. These function types will not contain a stack
21443 backtrace structure, therefore it is not possible to determine the
21444 function name. */
21445 void
21446 arm_poke_function_name (FILE *stream, const char *name)
21447 {
21448 unsigned long alignlength;
21449 unsigned long length;
21450 rtx x;
21451
21452 length = strlen (name) + 1;
21453 alignlength = ROUND_UP_WORD (length);
21454
21455 ASM_OUTPUT_ASCII (stream, name, length);
21456 ASM_OUTPUT_ALIGN (stream, 2);
21457 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21458 assemble_aligned_integer (UNITS_PER_WORD, x);
21459 }
21460
21461 /* Place some comments into the assembler stream
21462 describing the current function. */
21463 static void
21464 arm_output_function_prologue (FILE *f)
21465 {
21466 unsigned long func_type;
21467
21468 /* Sanity check. */
21469 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21470
21471 func_type = arm_current_func_type ();
21472
21473 switch ((int) ARM_FUNC_TYPE (func_type))
21474 {
21475 default:
21476 case ARM_FT_NORMAL:
21477 break;
21478 case ARM_FT_INTERWORKED:
21479 asm_fprintf (f, "\t%@ Function supports interworking.\n");
21480 break;
21481 case ARM_FT_ISR:
21482 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21483 break;
21484 case ARM_FT_FIQ:
21485 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21486 break;
21487 case ARM_FT_EXCEPTION:
21488 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21489 break;
21490 }
21491
21492 if (IS_NAKED (func_type))
21493 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21494
21495 if (IS_VOLATILE (func_type))
21496 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21497
21498 if (IS_NESTED (func_type))
21499 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21500 if (IS_STACKALIGN (func_type))
21501 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21502 if (IS_CMSE_ENTRY (func_type))
21503 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21504
21505 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21506 (HOST_WIDE_INT) crtl->args.size,
21507 crtl->args.pretend_args_size,
21508 (HOST_WIDE_INT) get_frame_size ());
21509
21510 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21511 frame_pointer_needed,
21512 cfun->machine->uses_anonymous_args);
21513
21514 if (cfun->machine->lr_save_eliminated)
21515 asm_fprintf (f, "\t%@ link register save eliminated.\n");
21516
21517 if (crtl->calls_eh_return)
21518 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21519
21520 }
21521
21522 static void
21523 arm_output_function_epilogue (FILE *)
21524 {
21525 arm_stack_offsets *offsets;
21526
21527 if (TARGET_THUMB1)
21528 {
21529 int regno;
21530
21531 /* Emit any call-via-reg trampolines that are needed for v4t support
21532 of call_reg and call_value_reg type insns. */
21533 for (regno = 0; regno < LR_REGNUM; regno++)
21534 {
21535 rtx label = cfun->machine->call_via[regno];
21536
21537 if (label != NULL)
21538 {
21539 switch_to_section (function_section (current_function_decl));
21540 targetm.asm_out.internal_label (asm_out_file, "L",
21541 CODE_LABEL_NUMBER (label));
21542 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21543 }
21544 }
21545
21546 /* ??? Probably not safe to set this here, since it assumes that a
21547 function will be emitted as assembly immediately after we generate
21548 RTL for it. This does not happen for inline functions. */
21549 cfun->machine->return_used_this_function = 0;
21550 }
21551 else /* TARGET_32BIT */
21552 {
21553 /* We need to take into account any stack-frame rounding. */
21554 offsets = arm_get_frame_offsets ();
21555
21556 gcc_assert (!use_return_insn (FALSE, NULL)
21557 || (cfun->machine->return_used_this_function != 0)
21558 || offsets->saved_regs == offsets->outgoing_args
21559 || frame_pointer_needed);
21560 }
21561 }
21562
21563 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21564 STR and STRD. If an even number of registers are being pushed, one
21565 or more STRD patterns are created for each register pair. If an
21566 odd number of registers are pushed, emit an initial STR followed by
21567 as many STRD instructions as are needed. This works best when the
21568 stack is initially 64-bit aligned (the normal case), since it
21569 ensures that each STRD is also 64-bit aligned. */
21570 static void
21571 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21572 {
21573 int num_regs = 0;
21574 int i;
21575 int regno;
21576 rtx par = NULL_RTX;
21577 rtx dwarf = NULL_RTX;
21578 rtx tmp;
21579 bool first = true;
21580
21581 num_regs = bit_count (saved_regs_mask);
21582
21583 /* Must be at least one register to save, and can't save SP or PC. */
21584 gcc_assert (num_regs > 0 && num_regs <= 14);
21585 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21586 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21587
21588 /* Create sequence for DWARF info. All the frame-related data for
21589 debugging is held in this wrapper. */
21590 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21591
21592 /* Describe the stack adjustment. */
21593 tmp = gen_rtx_SET (stack_pointer_rtx,
21594 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21595 RTX_FRAME_RELATED_P (tmp) = 1;
21596 XVECEXP (dwarf, 0, 0) = tmp;
21597
21598 /* Find the first register. */
21599 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21600 ;
21601
21602 i = 0;
21603
21604 /* If there's an odd number of registers to push. Start off by
21605 pushing a single register. This ensures that subsequent strd
21606 operations are dword aligned (assuming that SP was originally
21607 64-bit aligned). */
21608 if ((num_regs & 1) != 0)
21609 {
21610 rtx reg, mem, insn;
21611
21612 reg = gen_rtx_REG (SImode, regno);
21613 if (num_regs == 1)
21614 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
21615 stack_pointer_rtx));
21616 else
21617 mem = gen_frame_mem (Pmode,
21618 gen_rtx_PRE_MODIFY
21619 (Pmode, stack_pointer_rtx,
21620 plus_constant (Pmode, stack_pointer_rtx,
21621 -4 * num_regs)));
21622
21623 tmp = gen_rtx_SET (mem, reg);
21624 RTX_FRAME_RELATED_P (tmp) = 1;
21625 insn = emit_insn (tmp);
21626 RTX_FRAME_RELATED_P (insn) = 1;
21627 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21628 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
21629 RTX_FRAME_RELATED_P (tmp) = 1;
21630 i++;
21631 regno++;
21632 XVECEXP (dwarf, 0, i) = tmp;
21633 first = false;
21634 }
21635
21636 while (i < num_regs)
21637 if (saved_regs_mask & (1 << regno))
21638 {
21639 rtx reg1, reg2, mem1, mem2;
21640 rtx tmp0, tmp1, tmp2;
21641 int regno2;
21642
21643 /* Find the register to pair with this one. */
21644 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
21645 regno2++)
21646 ;
21647
21648 reg1 = gen_rtx_REG (SImode, regno);
21649 reg2 = gen_rtx_REG (SImode, regno2);
21650
21651 if (first)
21652 {
21653 rtx insn;
21654
21655 first = false;
21656 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21657 stack_pointer_rtx,
21658 -4 * num_regs));
21659 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21660 stack_pointer_rtx,
21661 -4 * (num_regs - 1)));
21662 tmp0 = gen_rtx_SET (stack_pointer_rtx,
21663 plus_constant (Pmode, stack_pointer_rtx,
21664 -4 * (num_regs)));
21665 tmp1 = gen_rtx_SET (mem1, reg1);
21666 tmp2 = gen_rtx_SET (mem2, reg2);
21667 RTX_FRAME_RELATED_P (tmp0) = 1;
21668 RTX_FRAME_RELATED_P (tmp1) = 1;
21669 RTX_FRAME_RELATED_P (tmp2) = 1;
21670 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
21671 XVECEXP (par, 0, 0) = tmp0;
21672 XVECEXP (par, 0, 1) = tmp1;
21673 XVECEXP (par, 0, 2) = tmp2;
21674 insn = emit_insn (par);
21675 RTX_FRAME_RELATED_P (insn) = 1;
21676 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21677 }
21678 else
21679 {
21680 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21681 stack_pointer_rtx,
21682 4 * i));
21683 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21684 stack_pointer_rtx,
21685 4 * (i + 1)));
21686 tmp1 = gen_rtx_SET (mem1, reg1);
21687 tmp2 = gen_rtx_SET (mem2, reg2);
21688 RTX_FRAME_RELATED_P (tmp1) = 1;
21689 RTX_FRAME_RELATED_P (tmp2) = 1;
21690 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21691 XVECEXP (par, 0, 0) = tmp1;
21692 XVECEXP (par, 0, 1) = tmp2;
21693 emit_insn (par);
21694 }
21695
21696 /* Create unwind information. This is an approximation. */
21697 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
21698 plus_constant (Pmode,
21699 stack_pointer_rtx,
21700 4 * i)),
21701 reg1);
21702 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
21703 plus_constant (Pmode,
21704 stack_pointer_rtx,
21705 4 * (i + 1))),
21706 reg2);
21707
21708 RTX_FRAME_RELATED_P (tmp1) = 1;
21709 RTX_FRAME_RELATED_P (tmp2) = 1;
21710 XVECEXP (dwarf, 0, i + 1) = tmp1;
21711 XVECEXP (dwarf, 0, i + 2) = tmp2;
21712 i += 2;
21713 regno = regno2 + 1;
21714 }
21715 else
21716 regno++;
21717
21718 return;
21719 }
21720
21721 /* STRD in ARM mode requires consecutive registers. This function emits STRD
21722 whenever possible, otherwise it emits single-word stores. The first store
21723 also allocates stack space for all saved registers, using writeback with
21724 post-addressing mode. All other stores use offset addressing. If no STRD
21725 can be emitted, this function emits a sequence of single-word stores,
21726 and not an STM as before, because single-word stores provide more freedom
21727 scheduling and can be turned into an STM by peephole optimizations. */
21728 static void
21729 arm_emit_strd_push (unsigned long saved_regs_mask)
21730 {
21731 int num_regs = 0;
21732 int i, j, dwarf_index = 0;
21733 int offset = 0;
21734 rtx dwarf = NULL_RTX;
21735 rtx insn = NULL_RTX;
21736 rtx tmp, mem;
21737
21738 /* TODO: A more efficient code can be emitted by changing the
21739 layout, e.g., first push all pairs that can use STRD to keep the
21740 stack aligned, and then push all other registers. */
21741 for (i = 0; i <= LAST_ARM_REGNUM; i++)
21742 if (saved_regs_mask & (1 << i))
21743 num_regs++;
21744
21745 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21746 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21747 gcc_assert (num_regs > 0);
21748
21749 /* Create sequence for DWARF info. */
21750 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21751
21752 /* For dwarf info, we generate explicit stack update. */
21753 tmp = gen_rtx_SET (stack_pointer_rtx,
21754 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21755 RTX_FRAME_RELATED_P (tmp) = 1;
21756 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21757
21758 /* Save registers. */
21759 offset = - 4 * num_regs;
21760 j = 0;
21761 while (j <= LAST_ARM_REGNUM)
21762 if (saved_regs_mask & (1 << j))
21763 {
21764 if ((j % 2 == 0)
21765 && (saved_regs_mask & (1 << (j + 1))))
21766 {
21767 /* Current register and previous register form register pair for
21768 which STRD can be generated. */
21769 if (offset < 0)
21770 {
21771 /* Allocate stack space for all saved registers. */
21772 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
21773 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21774 mem = gen_frame_mem (DImode, tmp);
21775 offset = 0;
21776 }
21777 else if (offset > 0)
21778 mem = gen_frame_mem (DImode,
21779 plus_constant (Pmode,
21780 stack_pointer_rtx,
21781 offset));
21782 else
21783 mem = gen_frame_mem (DImode, stack_pointer_rtx);
21784
21785 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
21786 RTX_FRAME_RELATED_P (tmp) = 1;
21787 tmp = emit_insn (tmp);
21788
21789 /* Record the first store insn. */
21790 if (dwarf_index == 1)
21791 insn = tmp;
21792
21793 /* Generate dwarf info. */
21794 mem = gen_frame_mem (SImode,
21795 plus_constant (Pmode,
21796 stack_pointer_rtx,
21797 offset));
21798 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21799 RTX_FRAME_RELATED_P (tmp) = 1;
21800 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21801
21802 mem = gen_frame_mem (SImode,
21803 plus_constant (Pmode,
21804 stack_pointer_rtx,
21805 offset + 4));
21806 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
21807 RTX_FRAME_RELATED_P (tmp) = 1;
21808 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21809
21810 offset += 8;
21811 j += 2;
21812 }
21813 else
21814 {
21815 /* Emit a single word store. */
21816 if (offset < 0)
21817 {
21818 /* Allocate stack space for all saved registers. */
21819 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
21820 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21821 mem = gen_frame_mem (SImode, tmp);
21822 offset = 0;
21823 }
21824 else if (offset > 0)
21825 mem = gen_frame_mem (SImode,
21826 plus_constant (Pmode,
21827 stack_pointer_rtx,
21828 offset));
21829 else
21830 mem = gen_frame_mem (SImode, stack_pointer_rtx);
21831
21832 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21833 RTX_FRAME_RELATED_P (tmp) = 1;
21834 tmp = emit_insn (tmp);
21835
21836 /* Record the first store insn. */
21837 if (dwarf_index == 1)
21838 insn = tmp;
21839
21840 /* Generate dwarf info. */
21841 mem = gen_frame_mem (SImode,
21842 plus_constant(Pmode,
21843 stack_pointer_rtx,
21844 offset));
21845 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21846 RTX_FRAME_RELATED_P (tmp) = 1;
21847 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21848
21849 offset += 4;
21850 j += 1;
21851 }
21852 }
21853 else
21854 j++;
21855
21856 /* Attach dwarf info to the first insn we generate. */
21857 gcc_assert (insn != NULL_RTX);
21858 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21859 RTX_FRAME_RELATED_P (insn) = 1;
21860 }
21861
21862 /* Generate and emit an insn that we will recognize as a push_multi.
21863 Unfortunately, since this insn does not reflect very well the actual
21864 semantics of the operation, we need to annotate the insn for the benefit
21865 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
21866 MASK for registers that should be annotated for DWARF2 frame unwind
21867 information. */
21868 static rtx
21869 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
21870 {
21871 int num_regs = 0;
21872 int num_dwarf_regs = 0;
21873 int i, j;
21874 rtx par;
21875 rtx dwarf;
21876 int dwarf_par_index;
21877 rtx tmp, reg;
21878
21879 /* We don't record the PC in the dwarf frame information. */
21880 dwarf_regs_mask &= ~(1 << PC_REGNUM);
21881
21882 for (i = 0; i <= LAST_ARM_REGNUM; i++)
21883 {
21884 if (mask & (1 << i))
21885 num_regs++;
21886 if (dwarf_regs_mask & (1 << i))
21887 num_dwarf_regs++;
21888 }
21889
21890 gcc_assert (num_regs && num_regs <= 16);
21891 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
21892
21893 /* For the body of the insn we are going to generate an UNSPEC in
21894 parallel with several USEs. This allows the insn to be recognized
21895 by the push_multi pattern in the arm.md file.
21896
21897 The body of the insn looks something like this:
21898
21899 (parallel [
21900 (set (mem:BLK (pre_modify:SI (reg:SI sp)
21901 (const_int:SI <num>)))
21902 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
21903 (use (reg:SI XX))
21904 (use (reg:SI YY))
21905 ...
21906 ])
21907
21908 For the frame note however, we try to be more explicit and actually
21909 show each register being stored into the stack frame, plus a (single)
21910 decrement of the stack pointer. We do it this way in order to be
21911 friendly to the stack unwinding code, which only wants to see a single
21912 stack decrement per instruction. The RTL we generate for the note looks
21913 something like this:
21914
21915 (sequence [
21916 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
21917 (set (mem:SI (reg:SI sp)) (reg:SI r4))
21918 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
21919 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
21920 ...
21921 ])
21922
21923 FIXME:: In an ideal world the PRE_MODIFY would not exist and
21924 instead we'd have a parallel expression detailing all
21925 the stores to the various memory addresses so that debug
21926 information is more up-to-date. Remember however while writing
21927 this to take care of the constraints with the push instruction.
21928
21929 Note also that this has to be taken care of for the VFP registers.
21930
21931 For more see PR43399. */
21932
21933 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
21934 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
21935 dwarf_par_index = 1;
21936
21937 for (i = 0; i <= LAST_ARM_REGNUM; i++)
21938 {
21939 if (mask & (1 << i))
21940 {
21941 reg = gen_rtx_REG (SImode, i);
21942
21943 XVECEXP (par, 0, 0)
21944 = gen_rtx_SET (gen_frame_mem
21945 (BLKmode,
21946 gen_rtx_PRE_MODIFY (Pmode,
21947 stack_pointer_rtx,
21948 plus_constant
21949 (Pmode, stack_pointer_rtx,
21950 -4 * num_regs))
21951 ),
21952 gen_rtx_UNSPEC (BLKmode,
21953 gen_rtvec (1, reg),
21954 UNSPEC_PUSH_MULT));
21955
21956 if (dwarf_regs_mask & (1 << i))
21957 {
21958 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
21959 reg);
21960 RTX_FRAME_RELATED_P (tmp) = 1;
21961 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
21962 }
21963
21964 break;
21965 }
21966 }
21967
21968 for (j = 1, i++; j < num_regs; i++)
21969 {
21970 if (mask & (1 << i))
21971 {
21972 reg = gen_rtx_REG (SImode, i);
21973
21974 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
21975
21976 if (dwarf_regs_mask & (1 << i))
21977 {
21978 tmp
21979 = gen_rtx_SET (gen_frame_mem
21980 (SImode,
21981 plus_constant (Pmode, stack_pointer_rtx,
21982 4 * j)),
21983 reg);
21984 RTX_FRAME_RELATED_P (tmp) = 1;
21985 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
21986 }
21987
21988 j++;
21989 }
21990 }
21991
21992 par = emit_insn (par);
21993
21994 tmp = gen_rtx_SET (stack_pointer_rtx,
21995 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21996 RTX_FRAME_RELATED_P (tmp) = 1;
21997 XVECEXP (dwarf, 0, 0) = tmp;
21998
21999 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22000
22001 return par;
22002 }
22003
22004 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22005 SIZE is the offset to be adjusted.
22006 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22007 static void
22008 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22009 {
22010 rtx dwarf;
22011
22012 RTX_FRAME_RELATED_P (insn) = 1;
22013 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22014 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22015 }
22016
22017 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22018 SAVED_REGS_MASK shows which registers need to be restored.
22019
22020 Unfortunately, since this insn does not reflect very well the actual
22021 semantics of the operation, we need to annotate the insn for the benefit
22022 of DWARF2 frame unwind information. */
22023 static void
22024 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22025 {
22026 int num_regs = 0;
22027 int i, j;
22028 rtx par;
22029 rtx dwarf = NULL_RTX;
22030 rtx tmp, reg;
22031 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22032 int offset_adj;
22033 int emit_update;
22034
22035 offset_adj = return_in_pc ? 1 : 0;
22036 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22037 if (saved_regs_mask & (1 << i))
22038 num_regs++;
22039
22040 gcc_assert (num_regs && num_regs <= 16);
22041
22042 /* If SP is in reglist, then we don't emit SP update insn. */
22043 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22044
22045 /* The parallel needs to hold num_regs SETs
22046 and one SET for the stack update. */
22047 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22048
22049 if (return_in_pc)
22050 XVECEXP (par, 0, 0) = ret_rtx;
22051
22052 if (emit_update)
22053 {
22054 /* Increment the stack pointer, based on there being
22055 num_regs 4-byte registers to restore. */
22056 tmp = gen_rtx_SET (stack_pointer_rtx,
22057 plus_constant (Pmode,
22058 stack_pointer_rtx,
22059 4 * num_regs));
22060 RTX_FRAME_RELATED_P (tmp) = 1;
22061 XVECEXP (par, 0, offset_adj) = tmp;
22062 }
22063
22064 /* Now restore every reg, which may include PC. */
22065 for (j = 0, i = 0; j < num_regs; i++)
22066 if (saved_regs_mask & (1 << i))
22067 {
22068 reg = gen_rtx_REG (SImode, i);
22069 if ((num_regs == 1) && emit_update && !return_in_pc)
22070 {
22071 /* Emit single load with writeback. */
22072 tmp = gen_frame_mem (SImode,
22073 gen_rtx_POST_INC (Pmode,
22074 stack_pointer_rtx));
22075 tmp = emit_insn (gen_rtx_SET (reg, tmp));
22076 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22077 return;
22078 }
22079
22080 tmp = gen_rtx_SET (reg,
22081 gen_frame_mem
22082 (SImode,
22083 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22084 RTX_FRAME_RELATED_P (tmp) = 1;
22085 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22086
22087 /* We need to maintain a sequence for DWARF info too. As dwarf info
22088 should not have PC, skip PC. */
22089 if (i != PC_REGNUM)
22090 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22091
22092 j++;
22093 }
22094
22095 if (return_in_pc)
22096 par = emit_jump_insn (par);
22097 else
22098 par = emit_insn (par);
22099
22100 REG_NOTES (par) = dwarf;
22101 if (!return_in_pc)
22102 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22103 stack_pointer_rtx, stack_pointer_rtx);
22104 }
22105
22106 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22107 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22108
22109 Unfortunately, since this insn does not reflect very well the actual
22110 semantics of the operation, we need to annotate the insn for the benefit
22111 of DWARF2 frame unwind information. */
22112 static void
22113 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22114 {
22115 int i, j;
22116 rtx par;
22117 rtx dwarf = NULL_RTX;
22118 rtx tmp, reg;
22119
22120 gcc_assert (num_regs && num_regs <= 32);
22121
22122 /* Workaround ARM10 VFPr1 bug. */
22123 if (num_regs == 2 && !arm_arch6)
22124 {
22125 if (first_reg == 15)
22126 first_reg--;
22127
22128 num_regs++;
22129 }
22130
22131 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22132 there could be up to 32 D-registers to restore.
22133 If there are more than 16 D-registers, make two recursive calls,
22134 each of which emits one pop_multi instruction. */
22135 if (num_regs > 16)
22136 {
22137 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22138 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22139 return;
22140 }
22141
22142 /* The parallel needs to hold num_regs SETs
22143 and one SET for the stack update. */
22144 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22145
22146 /* Increment the stack pointer, based on there being
22147 num_regs 8-byte registers to restore. */
22148 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22149 RTX_FRAME_RELATED_P (tmp) = 1;
22150 XVECEXP (par, 0, 0) = tmp;
22151
22152 /* Now show every reg that will be restored, using a SET for each. */
22153 for (j = 0, i=first_reg; j < num_regs; i += 2)
22154 {
22155 reg = gen_rtx_REG (DFmode, i);
22156
22157 tmp = gen_rtx_SET (reg,
22158 gen_frame_mem
22159 (DFmode,
22160 plus_constant (Pmode, base_reg, 8 * j)));
22161 RTX_FRAME_RELATED_P (tmp) = 1;
22162 XVECEXP (par, 0, j + 1) = tmp;
22163
22164 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22165
22166 j++;
22167 }
22168
22169 par = emit_insn (par);
22170 REG_NOTES (par) = dwarf;
22171
22172 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22173 if (REGNO (base_reg) == IP_REGNUM)
22174 {
22175 RTX_FRAME_RELATED_P (par) = 1;
22176 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22177 }
22178 else
22179 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22180 base_reg, base_reg);
22181 }
22182
22183 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22184 number of registers are being popped, multiple LDRD patterns are created for
22185 all register pairs. If odd number of registers are popped, last register is
22186 loaded by using LDR pattern. */
22187 static void
22188 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22189 {
22190 int num_regs = 0;
22191 int i, j;
22192 rtx par = NULL_RTX;
22193 rtx dwarf = NULL_RTX;
22194 rtx tmp, reg, tmp1;
22195 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22196
22197 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22198 if (saved_regs_mask & (1 << i))
22199 num_regs++;
22200
22201 gcc_assert (num_regs && num_regs <= 16);
22202
22203 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22204 to be popped. So, if num_regs is even, now it will become odd,
22205 and we can generate pop with PC. If num_regs is odd, it will be
22206 even now, and ldr with return can be generated for PC. */
22207 if (return_in_pc)
22208 num_regs--;
22209
22210 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22211
22212 /* Var j iterates over all the registers to gather all the registers in
22213 saved_regs_mask. Var i gives index of saved registers in stack frame.
22214 A PARALLEL RTX of register-pair is created here, so that pattern for
22215 LDRD can be matched. As PC is always last register to be popped, and
22216 we have already decremented num_regs if PC, we don't have to worry
22217 about PC in this loop. */
22218 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22219 if (saved_regs_mask & (1 << j))
22220 {
22221 /* Create RTX for memory load. */
22222 reg = gen_rtx_REG (SImode, j);
22223 tmp = gen_rtx_SET (reg,
22224 gen_frame_mem (SImode,
22225 plus_constant (Pmode,
22226 stack_pointer_rtx, 4 * i)));
22227 RTX_FRAME_RELATED_P (tmp) = 1;
22228
22229 if (i % 2 == 0)
22230 {
22231 /* When saved-register index (i) is even, the RTX to be emitted is
22232 yet to be created. Hence create it first. The LDRD pattern we
22233 are generating is :
22234 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22235 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22236 where target registers need not be consecutive. */
22237 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22238 dwarf = NULL_RTX;
22239 }
22240
22241 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22242 added as 0th element and if i is odd, reg_i is added as 1st element
22243 of LDRD pattern shown above. */
22244 XVECEXP (par, 0, (i % 2)) = tmp;
22245 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22246
22247 if ((i % 2) == 1)
22248 {
22249 /* When saved-register index (i) is odd, RTXs for both the registers
22250 to be loaded are generated in above given LDRD pattern, and the
22251 pattern can be emitted now. */
22252 par = emit_insn (par);
22253 REG_NOTES (par) = dwarf;
22254 RTX_FRAME_RELATED_P (par) = 1;
22255 }
22256
22257 i++;
22258 }
22259
22260 /* If the number of registers pushed is odd AND return_in_pc is false OR
22261 number of registers are even AND return_in_pc is true, last register is
22262 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22263 then LDR with post increment. */
22264
22265 /* Increment the stack pointer, based on there being
22266 num_regs 4-byte registers to restore. */
22267 tmp = gen_rtx_SET (stack_pointer_rtx,
22268 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22269 RTX_FRAME_RELATED_P (tmp) = 1;
22270 tmp = emit_insn (tmp);
22271 if (!return_in_pc)
22272 {
22273 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22274 stack_pointer_rtx, stack_pointer_rtx);
22275 }
22276
22277 dwarf = NULL_RTX;
22278
22279 if (((num_regs % 2) == 1 && !return_in_pc)
22280 || ((num_regs % 2) == 0 && return_in_pc))
22281 {
22282 /* Scan for the single register to be popped. Skip until the saved
22283 register is found. */
22284 for (; (saved_regs_mask & (1 << j)) == 0; j++);
22285
22286 /* Gen LDR with post increment here. */
22287 tmp1 = gen_rtx_MEM (SImode,
22288 gen_rtx_POST_INC (SImode,
22289 stack_pointer_rtx));
22290 set_mem_alias_set (tmp1, get_frame_alias_set ());
22291
22292 reg = gen_rtx_REG (SImode, j);
22293 tmp = gen_rtx_SET (reg, tmp1);
22294 RTX_FRAME_RELATED_P (tmp) = 1;
22295 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22296
22297 if (return_in_pc)
22298 {
22299 /* If return_in_pc, j must be PC_REGNUM. */
22300 gcc_assert (j == PC_REGNUM);
22301 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22302 XVECEXP (par, 0, 0) = ret_rtx;
22303 XVECEXP (par, 0, 1) = tmp;
22304 par = emit_jump_insn (par);
22305 }
22306 else
22307 {
22308 par = emit_insn (tmp);
22309 REG_NOTES (par) = dwarf;
22310 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22311 stack_pointer_rtx, stack_pointer_rtx);
22312 }
22313
22314 }
22315 else if ((num_regs % 2) == 1 && return_in_pc)
22316 {
22317 /* There are 2 registers to be popped. So, generate the pattern
22318 pop_multiple_with_stack_update_and_return to pop in PC. */
22319 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22320 }
22321
22322 return;
22323 }
22324
22325 /* LDRD in ARM mode needs consecutive registers as operands. This function
22326 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22327 offset addressing and then generates one separate stack udpate. This provides
22328 more scheduling freedom, compared to writeback on every load. However,
22329 if the function returns using load into PC directly
22330 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22331 before the last load. TODO: Add a peephole optimization to recognize
22332 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22333 peephole optimization to merge the load at stack-offset zero
22334 with the stack update instruction using load with writeback
22335 in post-index addressing mode. */
22336 static void
22337 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22338 {
22339 int j = 0;
22340 int offset = 0;
22341 rtx par = NULL_RTX;
22342 rtx dwarf = NULL_RTX;
22343 rtx tmp, mem;
22344
22345 /* Restore saved registers. */
22346 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22347 j = 0;
22348 while (j <= LAST_ARM_REGNUM)
22349 if (saved_regs_mask & (1 << j))
22350 {
22351 if ((j % 2) == 0
22352 && (saved_regs_mask & (1 << (j + 1)))
22353 && (j + 1) != PC_REGNUM)
22354 {
22355 /* Current register and next register form register pair for which
22356 LDRD can be generated. PC is always the last register popped, and
22357 we handle it separately. */
22358 if (offset > 0)
22359 mem = gen_frame_mem (DImode,
22360 plus_constant (Pmode,
22361 stack_pointer_rtx,
22362 offset));
22363 else
22364 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22365
22366 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22367 tmp = emit_insn (tmp);
22368 RTX_FRAME_RELATED_P (tmp) = 1;
22369
22370 /* Generate dwarf info. */
22371
22372 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22373 gen_rtx_REG (SImode, j),
22374 NULL_RTX);
22375 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22376 gen_rtx_REG (SImode, j + 1),
22377 dwarf);
22378
22379 REG_NOTES (tmp) = dwarf;
22380
22381 offset += 8;
22382 j += 2;
22383 }
22384 else if (j != PC_REGNUM)
22385 {
22386 /* Emit a single word load. */
22387 if (offset > 0)
22388 mem = gen_frame_mem (SImode,
22389 plus_constant (Pmode,
22390 stack_pointer_rtx,
22391 offset));
22392 else
22393 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22394
22395 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22396 tmp = emit_insn (tmp);
22397 RTX_FRAME_RELATED_P (tmp) = 1;
22398
22399 /* Generate dwarf info. */
22400 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22401 gen_rtx_REG (SImode, j),
22402 NULL_RTX);
22403
22404 offset += 4;
22405 j += 1;
22406 }
22407 else /* j == PC_REGNUM */
22408 j++;
22409 }
22410 else
22411 j++;
22412
22413 /* Update the stack. */
22414 if (offset > 0)
22415 {
22416 tmp = gen_rtx_SET (stack_pointer_rtx,
22417 plus_constant (Pmode,
22418 stack_pointer_rtx,
22419 offset));
22420 tmp = emit_insn (tmp);
22421 arm_add_cfa_adjust_cfa_note (tmp, offset,
22422 stack_pointer_rtx, stack_pointer_rtx);
22423 offset = 0;
22424 }
22425
22426 if (saved_regs_mask & (1 << PC_REGNUM))
22427 {
22428 /* Only PC is to be popped. */
22429 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22430 XVECEXP (par, 0, 0) = ret_rtx;
22431 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22432 gen_frame_mem (SImode,
22433 gen_rtx_POST_INC (SImode,
22434 stack_pointer_rtx)));
22435 RTX_FRAME_RELATED_P (tmp) = 1;
22436 XVECEXP (par, 0, 1) = tmp;
22437 par = emit_jump_insn (par);
22438
22439 /* Generate dwarf info. */
22440 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22441 gen_rtx_REG (SImode, PC_REGNUM),
22442 NULL_RTX);
22443 REG_NOTES (par) = dwarf;
22444 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22445 stack_pointer_rtx, stack_pointer_rtx);
22446 }
22447 }
22448
22449 /* Calculate the size of the return value that is passed in registers. */
22450 static unsigned
22451 arm_size_return_regs (void)
22452 {
22453 machine_mode mode;
22454
22455 if (crtl->return_rtx != 0)
22456 mode = GET_MODE (crtl->return_rtx);
22457 else
22458 mode = DECL_MODE (DECL_RESULT (current_function_decl));
22459
22460 return GET_MODE_SIZE (mode);
22461 }
22462
22463 /* Return true if the current function needs to save/restore LR. */
22464 static bool
22465 thumb_force_lr_save (void)
22466 {
22467 return !cfun->machine->lr_save_eliminated
22468 && (!crtl->is_leaf
22469 || thumb_far_jump_used_p ()
22470 || df_regs_ever_live_p (LR_REGNUM));
22471 }
22472
22473 /* We do not know if r3 will be available because
22474 we do have an indirect tailcall happening in this
22475 particular case. */
22476 static bool
22477 is_indirect_tailcall_p (rtx call)
22478 {
22479 rtx pat = PATTERN (call);
22480
22481 /* Indirect tail call. */
22482 pat = XVECEXP (pat, 0, 0);
22483 if (GET_CODE (pat) == SET)
22484 pat = SET_SRC (pat);
22485
22486 pat = XEXP (XEXP (pat, 0), 0);
22487 return REG_P (pat);
22488 }
22489
22490 /* Return true if r3 is used by any of the tail call insns in the
22491 current function. */
22492 static bool
22493 any_sibcall_could_use_r3 (void)
22494 {
22495 edge_iterator ei;
22496 edge e;
22497
22498 if (!crtl->tail_call_emit)
22499 return false;
22500 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22501 if (e->flags & EDGE_SIBCALL)
22502 {
22503 rtx_insn *call = BB_END (e->src);
22504 if (!CALL_P (call))
22505 call = prev_nonnote_nondebug_insn (call);
22506 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22507 if (find_regno_fusage (call, USE, 3)
22508 || is_indirect_tailcall_p (call))
22509 return true;
22510 }
22511 return false;
22512 }
22513
22514
22515 /* Compute the distance from register FROM to register TO.
22516 These can be the arg pointer (26), the soft frame pointer (25),
22517 the stack pointer (13) or the hard frame pointer (11).
22518 In thumb mode r7 is used as the soft frame pointer, if needed.
22519 Typical stack layout looks like this:
22520
22521 old stack pointer -> | |
22522 ----
22523 | | \
22524 | | saved arguments for
22525 | | vararg functions
22526 | | /
22527 --
22528 hard FP & arg pointer -> | | \
22529 | | stack
22530 | | frame
22531 | | /
22532 --
22533 | | \
22534 | | call saved
22535 | | registers
22536 soft frame pointer -> | | /
22537 --
22538 | | \
22539 | | local
22540 | | variables
22541 locals base pointer -> | | /
22542 --
22543 | | \
22544 | | outgoing
22545 | | arguments
22546 current stack pointer -> | | /
22547 --
22548
22549 For a given function some or all of these stack components
22550 may not be needed, giving rise to the possibility of
22551 eliminating some of the registers.
22552
22553 The values returned by this function must reflect the behavior
22554 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22555
22556 The sign of the number returned reflects the direction of stack
22557 growth, so the values are positive for all eliminations except
22558 from the soft frame pointer to the hard frame pointer.
22559
22560 SFP may point just inside the local variables block to ensure correct
22561 alignment. */
22562
22563
22564 /* Return cached stack offsets. */
22565
22566 static arm_stack_offsets *
22567 arm_get_frame_offsets (void)
22568 {
22569 struct arm_stack_offsets *offsets;
22570
22571 offsets = &cfun->machine->stack_offsets;
22572
22573 return offsets;
22574 }
22575
22576
22577 /* Calculate stack offsets. These are used to calculate register elimination
22578 offsets and in prologue/epilogue code. Also calculates which registers
22579 should be saved. */
22580
22581 static void
22582 arm_compute_frame_layout (void)
22583 {
22584 struct arm_stack_offsets *offsets;
22585 unsigned long func_type;
22586 int saved;
22587 int core_saved;
22588 HOST_WIDE_INT frame_size;
22589 int i;
22590
22591 offsets = &cfun->machine->stack_offsets;
22592
22593 /* Initially this is the size of the local variables. It will translated
22594 into an offset once we have determined the size of preceding data. */
22595 frame_size = ROUND_UP_WORD (get_frame_size ());
22596
22597 /* Space for variadic functions. */
22598 offsets->saved_args = crtl->args.pretend_args_size;
22599
22600 /* In Thumb mode this is incorrect, but never used. */
22601 offsets->frame
22602 = (offsets->saved_args
22603 + arm_compute_static_chain_stack_bytes ()
22604 + (frame_pointer_needed ? 4 : 0));
22605
22606 if (TARGET_32BIT)
22607 {
22608 unsigned int regno;
22609
22610 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
22611 core_saved = bit_count (offsets->saved_regs_mask) * 4;
22612 saved = core_saved;
22613
22614 /* We know that SP will be doubleword aligned on entry, and we must
22615 preserve that condition at any subroutine call. We also require the
22616 soft frame pointer to be doubleword aligned. */
22617
22618 if (TARGET_REALLY_IWMMXT)
22619 {
22620 /* Check for the call-saved iWMMXt registers. */
22621 for (regno = FIRST_IWMMXT_REGNUM;
22622 regno <= LAST_IWMMXT_REGNUM;
22623 regno++)
22624 if (reg_needs_saving_p (regno))
22625 saved += 8;
22626 }
22627
22628 func_type = arm_current_func_type ();
22629 /* Space for saved VFP registers. */
22630 if (! IS_VOLATILE (func_type)
22631 && TARGET_VFP_BASE)
22632 saved += arm_get_vfp_saved_size ();
22633
22634 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
22635 nonecure entry functions with VSTR/VLDR. */
22636 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
22637 saved += 4;
22638 }
22639 else /* TARGET_THUMB1 */
22640 {
22641 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
22642 core_saved = bit_count (offsets->saved_regs_mask) * 4;
22643 saved = core_saved;
22644 if (TARGET_BACKTRACE)
22645 saved += 16;
22646 }
22647
22648 /* Saved registers include the stack frame. */
22649 offsets->saved_regs
22650 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
22651 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
22652
22653 /* A leaf function does not need any stack alignment if it has nothing
22654 on the stack. */
22655 if (crtl->is_leaf && frame_size == 0
22656 /* However if it calls alloca(), we have a dynamically allocated
22657 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
22658 && ! cfun->calls_alloca)
22659 {
22660 offsets->outgoing_args = offsets->soft_frame;
22661 offsets->locals_base = offsets->soft_frame;
22662 return;
22663 }
22664
22665 /* Ensure SFP has the correct alignment. */
22666 if (ARM_DOUBLEWORD_ALIGN
22667 && (offsets->soft_frame & 7))
22668 {
22669 offsets->soft_frame += 4;
22670 /* Try to align stack by pushing an extra reg. Don't bother doing this
22671 when there is a stack frame as the alignment will be rolled into
22672 the normal stack adjustment. */
22673 if (frame_size + crtl->outgoing_args_size == 0)
22674 {
22675 int reg = -1;
22676
22677 /* Register r3 is caller-saved. Normally it does not need to be
22678 saved on entry by the prologue. However if we choose to save
22679 it for padding then we may confuse the compiler into thinking
22680 a prologue sequence is required when in fact it is not. This
22681 will occur when shrink-wrapping if r3 is used as a scratch
22682 register and there are no other callee-saved writes.
22683
22684 This situation can be avoided when other callee-saved registers
22685 are available and r3 is not mandatory if we choose a callee-saved
22686 register for padding. */
22687 bool prefer_callee_reg_p = false;
22688
22689 /* If it is safe to use r3, then do so. This sometimes
22690 generates better code on Thumb-2 by avoiding the need to
22691 use 32-bit push/pop instructions. */
22692 if (! any_sibcall_could_use_r3 ()
22693 && arm_size_return_regs () <= 12
22694 && (offsets->saved_regs_mask & (1 << 3)) == 0
22695 && (TARGET_THUMB2
22696 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
22697 {
22698 reg = 3;
22699 if (!TARGET_THUMB2)
22700 prefer_callee_reg_p = true;
22701 }
22702 if (reg == -1
22703 || prefer_callee_reg_p)
22704 {
22705 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
22706 {
22707 /* Avoid fixed registers; they may be changed at
22708 arbitrary times so it's unsafe to restore them
22709 during the epilogue. */
22710 if (!fixed_regs[i]
22711 && (offsets->saved_regs_mask & (1 << i)) == 0)
22712 {
22713 reg = i;
22714 break;
22715 }
22716 }
22717 }
22718
22719 if (reg != -1)
22720 {
22721 offsets->saved_regs += 4;
22722 offsets->saved_regs_mask |= (1 << reg);
22723 }
22724 }
22725 }
22726
22727 offsets->locals_base = offsets->soft_frame + frame_size;
22728 offsets->outgoing_args = (offsets->locals_base
22729 + crtl->outgoing_args_size);
22730
22731 if (ARM_DOUBLEWORD_ALIGN)
22732 {
22733 /* Ensure SP remains doubleword aligned. */
22734 if (offsets->outgoing_args & 7)
22735 offsets->outgoing_args += 4;
22736 gcc_assert (!(offsets->outgoing_args & 7));
22737 }
22738 }
22739
22740
22741 /* Calculate the relative offsets for the different stack pointers. Positive
22742 offsets are in the direction of stack growth. */
22743
22744 HOST_WIDE_INT
22745 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22746 {
22747 arm_stack_offsets *offsets;
22748
22749 offsets = arm_get_frame_offsets ();
22750
22751 /* OK, now we have enough information to compute the distances.
22752 There must be an entry in these switch tables for each pair
22753 of registers in ELIMINABLE_REGS, even if some of the entries
22754 seem to be redundant or useless. */
22755 switch (from)
22756 {
22757 case ARG_POINTER_REGNUM:
22758 switch (to)
22759 {
22760 case THUMB_HARD_FRAME_POINTER_REGNUM:
22761 return 0;
22762
22763 case FRAME_POINTER_REGNUM:
22764 /* This is the reverse of the soft frame pointer
22765 to hard frame pointer elimination below. */
22766 return offsets->soft_frame - offsets->saved_args;
22767
22768 case ARM_HARD_FRAME_POINTER_REGNUM:
22769 /* This is only non-zero in the case where the static chain register
22770 is stored above the frame. */
22771 return offsets->frame - offsets->saved_args - 4;
22772
22773 case STACK_POINTER_REGNUM:
22774 /* If nothing has been pushed on the stack at all
22775 then this will return -4. This *is* correct! */
22776 return offsets->outgoing_args - (offsets->saved_args + 4);
22777
22778 default:
22779 gcc_unreachable ();
22780 }
22781 gcc_unreachable ();
22782
22783 case FRAME_POINTER_REGNUM:
22784 switch (to)
22785 {
22786 case THUMB_HARD_FRAME_POINTER_REGNUM:
22787 return 0;
22788
22789 case ARM_HARD_FRAME_POINTER_REGNUM:
22790 /* The hard frame pointer points to the top entry in the
22791 stack frame. The soft frame pointer to the bottom entry
22792 in the stack frame. If there is no stack frame at all,
22793 then they are identical. */
22794
22795 return offsets->frame - offsets->soft_frame;
22796
22797 case STACK_POINTER_REGNUM:
22798 return offsets->outgoing_args - offsets->soft_frame;
22799
22800 default:
22801 gcc_unreachable ();
22802 }
22803 gcc_unreachable ();
22804
22805 default:
22806 /* You cannot eliminate from the stack pointer.
22807 In theory you could eliminate from the hard frame
22808 pointer to the stack pointer, but this will never
22809 happen, since if a stack frame is not needed the
22810 hard frame pointer will never be used. */
22811 gcc_unreachable ();
22812 }
22813 }
22814
22815 /* Given FROM and TO register numbers, say whether this elimination is
22816 allowed. Frame pointer elimination is automatically handled.
22817
22818 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
22819 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
22820 pointer, we must eliminate FRAME_POINTER_REGNUM into
22821 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
22822 ARG_POINTER_REGNUM. */
22823
22824 bool
22825 arm_can_eliminate (const int from, const int to)
22826 {
22827 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
22828 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
22829 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
22830 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
22831 true);
22832 }
22833
22834 /* Emit RTL to save coprocessor registers on function entry. Returns the
22835 number of bytes pushed. */
22836
22837 static int
22838 arm_save_coproc_regs(void)
22839 {
22840 int saved_size = 0;
22841 unsigned reg;
22842 unsigned start_reg;
22843 rtx insn;
22844
22845 if (TARGET_REALLY_IWMMXT)
22846 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
22847 if (reg_needs_saving_p (reg))
22848 {
22849 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22850 insn = gen_rtx_MEM (V2SImode, insn);
22851 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
22852 RTX_FRAME_RELATED_P (insn) = 1;
22853 saved_size += 8;
22854 }
22855
22856 if (TARGET_VFP_BASE)
22857 {
22858 start_reg = FIRST_VFP_REGNUM;
22859
22860 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
22861 {
22862 if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
22863 {
22864 if (start_reg != reg)
22865 saved_size += vfp_emit_fstmd (start_reg,
22866 (reg - start_reg) / 2);
22867 start_reg = reg + 2;
22868 }
22869 }
22870 if (start_reg != reg)
22871 saved_size += vfp_emit_fstmd (start_reg,
22872 (reg - start_reg) / 2);
22873 }
22874 return saved_size;
22875 }
22876
22877
22878 /* Set the Thumb frame pointer from the stack pointer. */
22879
22880 static void
22881 thumb_set_frame_pointer (arm_stack_offsets *offsets)
22882 {
22883 HOST_WIDE_INT amount;
22884 rtx insn, dwarf;
22885
22886 amount = offsets->outgoing_args - offsets->locals_base;
22887 if (amount < 1024)
22888 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22889 stack_pointer_rtx, GEN_INT (amount)));
22890 else
22891 {
22892 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
22893 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
22894 expects the first two operands to be the same. */
22895 if (TARGET_THUMB2)
22896 {
22897 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22898 stack_pointer_rtx,
22899 hard_frame_pointer_rtx));
22900 }
22901 else
22902 {
22903 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22904 hard_frame_pointer_rtx,
22905 stack_pointer_rtx));
22906 }
22907 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
22908 plus_constant (Pmode, stack_pointer_rtx, amount));
22909 RTX_FRAME_RELATED_P (dwarf) = 1;
22910 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22911 }
22912
22913 RTX_FRAME_RELATED_P (insn) = 1;
22914 }
22915
22916 struct scratch_reg {
22917 rtx reg;
22918 bool saved;
22919 };
22920
22921 /* Return a short-lived scratch register for use as a 2nd scratch register on
22922 function entry after the registers are saved in the prologue. This register
22923 must be released by means of release_scratch_register_on_entry. IP is not
22924 considered since it is always used as the 1st scratch register if available.
22925
22926 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
22927 mask of live registers. */
22928
22929 static void
22930 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
22931 unsigned long live_regs)
22932 {
22933 int regno = -1;
22934
22935 sr->saved = false;
22936
22937 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
22938 regno = LR_REGNUM;
22939 else
22940 {
22941 unsigned int i;
22942
22943 for (i = 4; i < 11; i++)
22944 if (regno1 != i && (live_regs & (1 << i)) != 0)
22945 {
22946 regno = i;
22947 break;
22948 }
22949
22950 if (regno < 0)
22951 {
22952 /* If IP is used as the 1st scratch register for a nested function,
22953 then either r3 wasn't available or is used to preserve IP. */
22954 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
22955 regno1 = 3;
22956 regno = (regno1 == 3 ? 2 : 3);
22957 sr->saved
22958 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
22959 regno);
22960 }
22961 }
22962
22963 sr->reg = gen_rtx_REG (SImode, regno);
22964 if (sr->saved)
22965 {
22966 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22967 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
22968 rtx x = gen_rtx_SET (stack_pointer_rtx,
22969 plus_constant (Pmode, stack_pointer_rtx, -4));
22970 RTX_FRAME_RELATED_P (insn) = 1;
22971 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
22972 }
22973 }
22974
22975 /* Release a scratch register obtained from the preceding function. */
22976
22977 static void
22978 release_scratch_register_on_entry (struct scratch_reg *sr)
22979 {
22980 if (sr->saved)
22981 {
22982 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
22983 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
22984 rtx x = gen_rtx_SET (stack_pointer_rtx,
22985 plus_constant (Pmode, stack_pointer_rtx, 4));
22986 RTX_FRAME_RELATED_P (insn) = 1;
22987 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
22988 }
22989 }
22990
22991 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
22992
22993 #if PROBE_INTERVAL > 4096
22994 #error Cannot use indexed addressing mode for stack probing
22995 #endif
22996
22997 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
22998 inclusive. These are offsets from the current stack pointer. REGNO1
22999 is the index number of the 1st scratch register and LIVE_REGS is the
23000 mask of live registers. */
23001
23002 static void
23003 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23004 unsigned int regno1, unsigned long live_regs)
23005 {
23006 rtx reg1 = gen_rtx_REG (Pmode, regno1);
23007
23008 /* See if we have a constant small number of probes to generate. If so,
23009 that's the easy case. */
23010 if (size <= PROBE_INTERVAL)
23011 {
23012 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23013 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23014 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23015 }
23016
23017 /* The run-time loop is made up of 10 insns in the generic case while the
23018 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23019 else if (size <= 5 * PROBE_INTERVAL)
23020 {
23021 HOST_WIDE_INT i, rem;
23022
23023 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23024 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23025 emit_stack_probe (reg1);
23026
23027 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23028 it exceeds SIZE. If only two probes are needed, this will not
23029 generate any code. Then probe at FIRST + SIZE. */
23030 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23031 {
23032 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23033 emit_stack_probe (reg1);
23034 }
23035
23036 rem = size - (i - PROBE_INTERVAL);
23037 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23038 {
23039 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23040 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23041 }
23042 else
23043 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23044 }
23045
23046 /* Otherwise, do the same as above, but in a loop. Note that we must be
23047 extra careful with variables wrapping around because we might be at
23048 the very top (or the very bottom) of the address space and we have
23049 to be able to handle this case properly; in particular, we use an
23050 equality test for the loop condition. */
23051 else
23052 {
23053 HOST_WIDE_INT rounded_size;
23054 struct scratch_reg sr;
23055
23056 get_scratch_register_on_entry (&sr, regno1, live_regs);
23057
23058 emit_move_insn (reg1, GEN_INT (first));
23059
23060
23061 /* Step 1: round SIZE to the previous multiple of the interval. */
23062
23063 rounded_size = size & -PROBE_INTERVAL;
23064 emit_move_insn (sr.reg, GEN_INT (rounded_size));
23065
23066
23067 /* Step 2: compute initial and final value of the loop counter. */
23068
23069 /* TEST_ADDR = SP + FIRST. */
23070 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23071
23072 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23073 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23074
23075
23076 /* Step 3: the loop
23077
23078 do
23079 {
23080 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23081 probe at TEST_ADDR
23082 }
23083 while (TEST_ADDR != LAST_ADDR)
23084
23085 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23086 until it is equal to ROUNDED_SIZE. */
23087
23088 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23089
23090
23091 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23092 that SIZE is equal to ROUNDED_SIZE. */
23093
23094 if (size != rounded_size)
23095 {
23096 HOST_WIDE_INT rem = size - rounded_size;
23097
23098 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23099 {
23100 emit_set_insn (sr.reg,
23101 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23102 emit_stack_probe (plus_constant (Pmode, sr.reg,
23103 PROBE_INTERVAL - rem));
23104 }
23105 else
23106 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23107 }
23108
23109 release_scratch_register_on_entry (&sr);
23110 }
23111
23112 /* Make sure nothing is scheduled before we are done. */
23113 emit_insn (gen_blockage ());
23114 }
23115
23116 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23117 absolute addresses. */
23118
23119 const char *
23120 output_probe_stack_range (rtx reg1, rtx reg2)
23121 {
23122 static int labelno = 0;
23123 char loop_lab[32];
23124 rtx xops[2];
23125
23126 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23127
23128 /* Loop. */
23129 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23130
23131 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23132 xops[0] = reg1;
23133 xops[1] = GEN_INT (PROBE_INTERVAL);
23134 output_asm_insn ("sub\t%0, %0, %1", xops);
23135
23136 /* Probe at TEST_ADDR. */
23137 output_asm_insn ("str\tr0, [%0, #0]", xops);
23138
23139 /* Test if TEST_ADDR == LAST_ADDR. */
23140 xops[1] = reg2;
23141 output_asm_insn ("cmp\t%0, %1", xops);
23142
23143 /* Branch. */
23144 fputs ("\tbne\t", asm_out_file);
23145 assemble_name_raw (asm_out_file, loop_lab);
23146 fputc ('\n', asm_out_file);
23147
23148 return "";
23149 }
23150
23151 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23152 function. */
23153 void
23154 arm_expand_prologue (void)
23155 {
23156 rtx amount;
23157 rtx insn;
23158 rtx ip_rtx;
23159 unsigned long live_regs_mask;
23160 unsigned long func_type;
23161 int fp_offset = 0;
23162 int saved_pretend_args = 0;
23163 int saved_regs = 0;
23164 unsigned HOST_WIDE_INT args_to_push;
23165 HOST_WIDE_INT size;
23166 arm_stack_offsets *offsets;
23167 bool clobber_ip;
23168
23169 func_type = arm_current_func_type ();
23170
23171 /* Naked functions don't have prologues. */
23172 if (IS_NAKED (func_type))
23173 {
23174 if (flag_stack_usage_info)
23175 current_function_static_stack_size = 0;
23176 return;
23177 }
23178
23179 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23180 args_to_push = crtl->args.pretend_args_size;
23181
23182 /* Compute which register we will have to save onto the stack. */
23183 offsets = arm_get_frame_offsets ();
23184 live_regs_mask = offsets->saved_regs_mask;
23185
23186 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23187
23188 if (IS_STACKALIGN (func_type))
23189 {
23190 rtx r0, r1;
23191
23192 /* Handle a word-aligned stack pointer. We generate the following:
23193
23194 mov r0, sp
23195 bic r1, r0, #7
23196 mov sp, r1
23197 <save and restore r0 in normal prologue/epilogue>
23198 mov sp, r0
23199 bx lr
23200
23201 The unwinder doesn't need to know about the stack realignment.
23202 Just tell it we saved SP in r0. */
23203 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23204
23205 r0 = gen_rtx_REG (SImode, R0_REGNUM);
23206 r1 = gen_rtx_REG (SImode, R1_REGNUM);
23207
23208 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23209 RTX_FRAME_RELATED_P (insn) = 1;
23210 add_reg_note (insn, REG_CFA_REGISTER, NULL);
23211
23212 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23213
23214 /* ??? The CFA changes here, which may cause GDB to conclude that it
23215 has entered a different function. That said, the unwind info is
23216 correct, individually, before and after this instruction because
23217 we've described the save of SP, which will override the default
23218 handling of SP as restoring from the CFA. */
23219 emit_insn (gen_movsi (stack_pointer_rtx, r1));
23220 }
23221
23222 /* Let's compute the static_chain_stack_bytes required and store it. Right
23223 now the value must be -1 as stored by arm_init_machine_status (). */
23224 cfun->machine->static_chain_stack_bytes
23225 = arm_compute_static_chain_stack_bytes ();
23226
23227 /* The static chain register is the same as the IP register. If it is
23228 clobbered when creating the frame, we need to save and restore it. */
23229 clobber_ip = IS_NESTED (func_type)
23230 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23231 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23232 || flag_stack_clash_protection)
23233 && !df_regs_ever_live_p (LR_REGNUM)
23234 && arm_r3_live_at_start_p ()));
23235
23236 /* Find somewhere to store IP whilst the frame is being created.
23237 We try the following places in order:
23238
23239 1. The last argument register r3 if it is available.
23240 2. A slot on the stack above the frame if there are no
23241 arguments to push onto the stack.
23242 3. Register r3 again, after pushing the argument registers
23243 onto the stack, if this is a varargs function.
23244 4. The last slot on the stack created for the arguments to
23245 push, if this isn't a varargs function.
23246
23247 Note - we only need to tell the dwarf2 backend about the SP
23248 adjustment in the second variant; the static chain register
23249 doesn't need to be unwound, as it doesn't contain a value
23250 inherited from the caller. */
23251 if (clobber_ip)
23252 {
23253 if (!arm_r3_live_at_start_p ())
23254 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23255 else if (args_to_push == 0)
23256 {
23257 rtx addr, dwarf;
23258
23259 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
23260 saved_regs += 4;
23261
23262 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23263 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23264 fp_offset = 4;
23265
23266 /* Just tell the dwarf backend that we adjusted SP. */
23267 dwarf = gen_rtx_SET (stack_pointer_rtx,
23268 plus_constant (Pmode, stack_pointer_rtx,
23269 -fp_offset));
23270 RTX_FRAME_RELATED_P (insn) = 1;
23271 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23272 }
23273 else
23274 {
23275 /* Store the args on the stack. */
23276 if (cfun->machine->uses_anonymous_args)
23277 {
23278 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23279 (0xf0 >> (args_to_push / 4)) & 0xf);
23280 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23281 saved_pretend_args = 1;
23282 }
23283 else
23284 {
23285 rtx addr, dwarf;
23286
23287 if (args_to_push == 4)
23288 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23289 else
23290 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23291 plus_constant (Pmode,
23292 stack_pointer_rtx,
23293 -args_to_push));
23294
23295 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23296
23297 /* Just tell the dwarf backend that we adjusted SP. */
23298 dwarf = gen_rtx_SET (stack_pointer_rtx,
23299 plus_constant (Pmode, stack_pointer_rtx,
23300 -args_to_push));
23301 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23302 }
23303
23304 RTX_FRAME_RELATED_P (insn) = 1;
23305 fp_offset = args_to_push;
23306 args_to_push = 0;
23307 }
23308 }
23309
23310 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23311 {
23312 if (IS_INTERRUPT (func_type))
23313 {
23314 /* Interrupt functions must not corrupt any registers.
23315 Creating a frame pointer however, corrupts the IP
23316 register, so we must push it first. */
23317 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23318
23319 /* Do not set RTX_FRAME_RELATED_P on this insn.
23320 The dwarf stack unwinding code only wants to see one
23321 stack decrement per function, and this is not it. If
23322 this instruction is labeled as being part of the frame
23323 creation sequence then dwarf2out_frame_debug_expr will
23324 die when it encounters the assignment of IP to FP
23325 later on, since the use of SP here establishes SP as
23326 the CFA register and not IP.
23327
23328 Anyway this instruction is not really part of the stack
23329 frame creation although it is part of the prologue. */
23330 }
23331
23332 insn = emit_set_insn (ip_rtx,
23333 plus_constant (Pmode, stack_pointer_rtx,
23334 fp_offset));
23335 RTX_FRAME_RELATED_P (insn) = 1;
23336 }
23337
23338 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23339 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23340 {
23341 saved_regs += 4;
23342 insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23343 GEN_INT (FPCXTNS_ENUM)));
23344 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23345 plus_constant (Pmode, stack_pointer_rtx, -4));
23346 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23347 RTX_FRAME_RELATED_P (insn) = 1;
23348 }
23349
23350 if (args_to_push)
23351 {
23352 /* Push the argument registers, or reserve space for them. */
23353 if (cfun->machine->uses_anonymous_args)
23354 insn = emit_multi_reg_push
23355 ((0xf0 >> (args_to_push / 4)) & 0xf,
23356 (0xf0 >> (args_to_push / 4)) & 0xf);
23357 else
23358 insn = emit_insn
23359 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23360 GEN_INT (- args_to_push)));
23361 RTX_FRAME_RELATED_P (insn) = 1;
23362 }
23363
23364 /* If this is an interrupt service routine, and the link register
23365 is going to be pushed, and we're not generating extra
23366 push of IP (needed when frame is needed and frame layout if apcs),
23367 subtracting four from LR now will mean that the function return
23368 can be done with a single instruction. */
23369 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23370 && (live_regs_mask & (1 << LR_REGNUM)) != 0
23371 && !(frame_pointer_needed && TARGET_APCS_FRAME)
23372 && TARGET_ARM)
23373 {
23374 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23375
23376 emit_set_insn (lr, plus_constant (SImode, lr, -4));
23377 }
23378
23379 if (live_regs_mask)
23380 {
23381 unsigned long dwarf_regs_mask = live_regs_mask;
23382
23383 saved_regs += bit_count (live_regs_mask) * 4;
23384 if (optimize_size && !frame_pointer_needed
23385 && saved_regs == offsets->saved_regs - offsets->saved_args)
23386 {
23387 /* If no coprocessor registers are being pushed and we don't have
23388 to worry about a frame pointer then push extra registers to
23389 create the stack frame. This is done in a way that does not
23390 alter the frame layout, so is independent of the epilogue. */
23391 int n;
23392 int frame;
23393 n = 0;
23394 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23395 n++;
23396 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23397 if (frame && n * 4 >= frame)
23398 {
23399 n = frame / 4;
23400 live_regs_mask |= (1 << n) - 1;
23401 saved_regs += frame;
23402 }
23403 }
23404
23405 if (TARGET_LDRD
23406 && current_tune->prefer_ldrd_strd
23407 && !optimize_function_for_size_p (cfun))
23408 {
23409 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23410 if (TARGET_THUMB2)
23411 thumb2_emit_strd_push (live_regs_mask);
23412 else if (TARGET_ARM
23413 && !TARGET_APCS_FRAME
23414 && !IS_INTERRUPT (func_type))
23415 arm_emit_strd_push (live_regs_mask);
23416 else
23417 {
23418 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23419 RTX_FRAME_RELATED_P (insn) = 1;
23420 }
23421 }
23422 else
23423 {
23424 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23425 RTX_FRAME_RELATED_P (insn) = 1;
23426 }
23427 }
23428
23429 if (! IS_VOLATILE (func_type))
23430 saved_regs += arm_save_coproc_regs ();
23431
23432 if (frame_pointer_needed && TARGET_ARM)
23433 {
23434 /* Create the new frame pointer. */
23435 if (TARGET_APCS_FRAME)
23436 {
23437 insn = GEN_INT (-(4 + args_to_push + fp_offset));
23438 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23439 RTX_FRAME_RELATED_P (insn) = 1;
23440 }
23441 else
23442 {
23443 insn = GEN_INT (saved_regs - (4 + fp_offset));
23444 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23445 stack_pointer_rtx, insn));
23446 RTX_FRAME_RELATED_P (insn) = 1;
23447 }
23448 }
23449
23450 size = offsets->outgoing_args - offsets->saved_args;
23451 if (flag_stack_usage_info)
23452 current_function_static_stack_size = size;
23453
23454 /* If this isn't an interrupt service routine and we have a frame, then do
23455 stack checking. We use IP as the first scratch register, except for the
23456 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
23457 if (!IS_INTERRUPT (func_type)
23458 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23459 || flag_stack_clash_protection))
23460 {
23461 unsigned int regno;
23462
23463 if (!IS_NESTED (func_type) || clobber_ip)
23464 regno = IP_REGNUM;
23465 else if (df_regs_ever_live_p (LR_REGNUM))
23466 regno = LR_REGNUM;
23467 else
23468 regno = 3;
23469
23470 if (crtl->is_leaf && !cfun->calls_alloca)
23471 {
23472 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23473 arm_emit_probe_stack_range (get_stack_check_protect (),
23474 size - get_stack_check_protect (),
23475 regno, live_regs_mask);
23476 }
23477 else if (size > 0)
23478 arm_emit_probe_stack_range (get_stack_check_protect (), size,
23479 regno, live_regs_mask);
23480 }
23481
23482 /* Recover the static chain register. */
23483 if (clobber_ip)
23484 {
23485 if (!arm_r3_live_at_start_p () || saved_pretend_args)
23486 insn = gen_rtx_REG (SImode, 3);
23487 else
23488 {
23489 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23490 insn = gen_frame_mem (SImode, insn);
23491 }
23492 emit_set_insn (ip_rtx, insn);
23493 emit_insn (gen_force_register_use (ip_rtx));
23494 }
23495
23496 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23497 {
23498 /* This add can produce multiple insns for a large constant, so we
23499 need to get tricky. */
23500 rtx_insn *last = get_last_insn ();
23501
23502 amount = GEN_INT (offsets->saved_args + saved_regs
23503 - offsets->outgoing_args);
23504
23505 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23506 amount));
23507 do
23508 {
23509 last = last ? NEXT_INSN (last) : get_insns ();
23510 RTX_FRAME_RELATED_P (last) = 1;
23511 }
23512 while (last != insn);
23513
23514 /* If the frame pointer is needed, emit a special barrier that
23515 will prevent the scheduler from moving stores to the frame
23516 before the stack adjustment. */
23517 if (frame_pointer_needed)
23518 emit_insn (gen_stack_tie (stack_pointer_rtx,
23519 hard_frame_pointer_rtx));
23520 }
23521
23522
23523 if (frame_pointer_needed && TARGET_THUMB2)
23524 thumb_set_frame_pointer (offsets);
23525
23526 if (flag_pic && arm_pic_register != INVALID_REGNUM)
23527 {
23528 unsigned long mask;
23529
23530 mask = live_regs_mask;
23531 mask &= THUMB2_WORK_REGS;
23532 if (!IS_NESTED (func_type))
23533 mask |= (1 << IP_REGNUM);
23534 arm_load_pic_register (mask, NULL_RTX);
23535 }
23536
23537 /* If we are profiling, make sure no instructions are scheduled before
23538 the call to mcount. Similarly if the user has requested no
23539 scheduling in the prolog. Similarly if we want non-call exceptions
23540 using the EABI unwinder, to prevent faulting instructions from being
23541 swapped with a stack adjustment. */
23542 if (crtl->profile || !TARGET_SCHED_PROLOG
23543 || (arm_except_unwind_info (&global_options) == UI_TARGET
23544 && cfun->can_throw_non_call_exceptions))
23545 emit_insn (gen_blockage ());
23546
23547 /* If the link register is being kept alive, with the return address in it,
23548 then make sure that it does not get reused by the ce2 pass. */
23549 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23550 cfun->machine->lr_save_eliminated = 1;
23551 }
23552 \f
23553 /* Print condition code to STREAM. Helper function for arm_print_operand. */
23554 static void
23555 arm_print_condition (FILE *stream)
23556 {
23557 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
23558 {
23559 /* Branch conversion is not implemented for Thumb-2. */
23560 if (TARGET_THUMB)
23561 {
23562 output_operand_lossage ("predicated Thumb instruction");
23563 return;
23564 }
23565 if (current_insn_predicate != NULL)
23566 {
23567 output_operand_lossage
23568 ("predicated instruction in conditional sequence");
23569 return;
23570 }
23571
23572 fputs (arm_condition_codes[arm_current_cc], stream);
23573 }
23574 else if (current_insn_predicate)
23575 {
23576 enum arm_cond_code code;
23577
23578 if (TARGET_THUMB1)
23579 {
23580 output_operand_lossage ("predicated Thumb instruction");
23581 return;
23582 }
23583
23584 code = get_arm_condition_code (current_insn_predicate);
23585 fputs (arm_condition_codes[code], stream);
23586 }
23587 }
23588
23589
23590 /* Globally reserved letters: acln
23591 Puncutation letters currently used: @_|?().!#
23592 Lower case letters currently used: bcdefhimpqtvwxyz
23593 Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTU
23594 Letters previously used, but now deprecated/obsolete: sVWXYZ.
23595
23596 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23597
23598 If CODE is 'd', then the X is a condition operand and the instruction
23599 should only be executed if the condition is true.
23600 if CODE is 'D', then the X is a condition operand and the instruction
23601 should only be executed if the condition is false: however, if the mode
23602 of the comparison is CCFPEmode, then always execute the instruction -- we
23603 do this because in these circumstances !GE does not necessarily imply LT;
23604 in these cases the instruction pattern will take care to make sure that
23605 an instruction containing %d will follow, thereby undoing the effects of
23606 doing this instruction unconditionally.
23607 If CODE is 'N' then X is a floating point operand that must be negated
23608 before output.
23609 If CODE is 'B' then output a bitwise inverted value of X (a const int).
23610 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
23611 static void
23612 arm_print_operand (FILE *stream, rtx x, int code)
23613 {
23614 switch (code)
23615 {
23616 case '@':
23617 fputs (ASM_COMMENT_START, stream);
23618 return;
23619
23620 case '_':
23621 fputs (user_label_prefix, stream);
23622 return;
23623
23624 case '|':
23625 fputs (REGISTER_PREFIX, stream);
23626 return;
23627
23628 case '?':
23629 arm_print_condition (stream);
23630 return;
23631
23632 case '.':
23633 /* The current condition code for a condition code setting instruction.
23634 Preceded by 's' in unified syntax, otherwise followed by 's'. */
23635 fputc('s', stream);
23636 arm_print_condition (stream);
23637 return;
23638
23639 case '!':
23640 /* If the instruction is conditionally executed then print
23641 the current condition code, otherwise print 's'. */
23642 gcc_assert (TARGET_THUMB2);
23643 if (current_insn_predicate)
23644 arm_print_condition (stream);
23645 else
23646 fputc('s', stream);
23647 break;
23648
23649 /* %# is a "break" sequence. It doesn't output anything, but is used to
23650 separate e.g. operand numbers from following text, if that text consists
23651 of further digits which we don't want to be part of the operand
23652 number. */
23653 case '#':
23654 return;
23655
23656 case 'N':
23657 {
23658 REAL_VALUE_TYPE r;
23659 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
23660 fprintf (stream, "%s", fp_const_from_val (&r));
23661 }
23662 return;
23663
23664 /* An integer or symbol address without a preceding # sign. */
23665 case 'c':
23666 switch (GET_CODE (x))
23667 {
23668 case CONST_INT:
23669 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
23670 break;
23671
23672 case SYMBOL_REF:
23673 output_addr_const (stream, x);
23674 break;
23675
23676 case CONST:
23677 if (GET_CODE (XEXP (x, 0)) == PLUS
23678 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
23679 {
23680 output_addr_const (stream, x);
23681 break;
23682 }
23683 /* Fall through. */
23684
23685 default:
23686 output_operand_lossage ("Unsupported operand for code '%c'", code);
23687 }
23688 return;
23689
23690 /* An integer that we want to print in HEX. */
23691 case 'x':
23692 switch (GET_CODE (x))
23693 {
23694 case CONST_INT:
23695 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
23696 break;
23697
23698 default:
23699 output_operand_lossage ("Unsupported operand for code '%c'", code);
23700 }
23701 return;
23702
23703 case 'B':
23704 if (CONST_INT_P (x))
23705 {
23706 HOST_WIDE_INT val;
23707 val = ARM_SIGN_EXTEND (~INTVAL (x));
23708 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
23709 }
23710 else
23711 {
23712 putc ('~', stream);
23713 output_addr_const (stream, x);
23714 }
23715 return;
23716
23717 case 'b':
23718 /* Print the log2 of a CONST_INT. */
23719 {
23720 HOST_WIDE_INT val;
23721
23722 if (!CONST_INT_P (x)
23723 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
23724 output_operand_lossage ("Unsupported operand for code '%c'", code);
23725 else
23726 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
23727 }
23728 return;
23729
23730 case 'L':
23731 /* The low 16 bits of an immediate constant. */
23732 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
23733 return;
23734
23735 case 'i':
23736 fprintf (stream, "%s", arithmetic_instr (x, 1));
23737 return;
23738
23739 case 'I':
23740 fprintf (stream, "%s", arithmetic_instr (x, 0));
23741 return;
23742
23743 case 'S':
23744 {
23745 HOST_WIDE_INT val;
23746 const char *shift;
23747
23748 shift = shift_op (x, &val);
23749
23750 if (shift)
23751 {
23752 fprintf (stream, ", %s ", shift);
23753 if (val == -1)
23754 arm_print_operand (stream, XEXP (x, 1), 0);
23755 else
23756 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
23757 }
23758 }
23759 return;
23760
23761 /* An explanation of the 'Q', 'R' and 'H' register operands:
23762
23763 In a pair of registers containing a DI or DF value the 'Q'
23764 operand returns the register number of the register containing
23765 the least significant part of the value. The 'R' operand returns
23766 the register number of the register containing the most
23767 significant part of the value.
23768
23769 The 'H' operand returns the higher of the two register numbers.
23770 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
23771 same as the 'Q' operand, since the most significant part of the
23772 value is held in the lower number register. The reverse is true
23773 on systems where WORDS_BIG_ENDIAN is false.
23774
23775 The purpose of these operands is to distinguish between cases
23776 where the endian-ness of the values is important (for example
23777 when they are added together), and cases where the endian-ness
23778 is irrelevant, but the order of register operations is important.
23779 For example when loading a value from memory into a register
23780 pair, the endian-ness does not matter. Provided that the value
23781 from the lower memory address is put into the lower numbered
23782 register, and the value from the higher address is put into the
23783 higher numbered register, the load will work regardless of whether
23784 the value being loaded is big-wordian or little-wordian. The
23785 order of the two register loads can matter however, if the address
23786 of the memory location is actually held in one of the registers
23787 being overwritten by the load.
23788
23789 The 'Q' and 'R' constraints are also available for 64-bit
23790 constants. */
23791 case 'Q':
23792 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
23793 {
23794 rtx part = gen_lowpart (SImode, x);
23795 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
23796 return;
23797 }
23798
23799 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23800 {
23801 output_operand_lossage ("invalid operand for code '%c'", code);
23802 return;
23803 }
23804
23805 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
23806 return;
23807
23808 case 'R':
23809 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
23810 {
23811 machine_mode mode = GET_MODE (x);
23812 rtx part;
23813
23814 if (mode == VOIDmode)
23815 mode = DImode;
23816 part = gen_highpart_mode (SImode, mode, x);
23817 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
23818 return;
23819 }
23820
23821 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23822 {
23823 output_operand_lossage ("invalid operand for code '%c'", code);
23824 return;
23825 }
23826
23827 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
23828 return;
23829
23830 case 'H':
23831 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23832 {
23833 output_operand_lossage ("invalid operand for code '%c'", code);
23834 return;
23835 }
23836
23837 asm_fprintf (stream, "%r", REGNO (x) + 1);
23838 return;
23839
23840 case 'J':
23841 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23842 {
23843 output_operand_lossage ("invalid operand for code '%c'", code);
23844 return;
23845 }
23846
23847 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
23848 return;
23849
23850 case 'K':
23851 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23852 {
23853 output_operand_lossage ("invalid operand for code '%c'", code);
23854 return;
23855 }
23856
23857 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
23858 return;
23859
23860 case 'm':
23861 asm_fprintf (stream, "%r",
23862 REG_P (XEXP (x, 0))
23863 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
23864 return;
23865
23866 case 'M':
23867 asm_fprintf (stream, "{%r-%r}",
23868 REGNO (x),
23869 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
23870 return;
23871
23872 /* Like 'M', but writing doubleword vector registers, for use by Neon
23873 insns. */
23874 case 'h':
23875 {
23876 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
23877 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
23878 if (numregs == 1)
23879 asm_fprintf (stream, "{d%d}", regno);
23880 else
23881 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
23882 }
23883 return;
23884
23885 case 'd':
23886 /* CONST_TRUE_RTX means always -- that's the default. */
23887 if (x == const_true_rtx)
23888 return;
23889
23890 if (!COMPARISON_P (x))
23891 {
23892 output_operand_lossage ("invalid operand for code '%c'", code);
23893 return;
23894 }
23895
23896 fputs (arm_condition_codes[get_arm_condition_code (x)],
23897 stream);
23898 return;
23899
23900 case 'D':
23901 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
23902 want to do that. */
23903 if (x == const_true_rtx)
23904 {
23905 output_operand_lossage ("instruction never executed");
23906 return;
23907 }
23908 if (!COMPARISON_P (x))
23909 {
23910 output_operand_lossage ("invalid operand for code '%c'", code);
23911 return;
23912 }
23913
23914 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
23915 (get_arm_condition_code (x))],
23916 stream);
23917 return;
23918
23919 case 's':
23920 case 'V':
23921 case 'W':
23922 case 'X':
23923 case 'Y':
23924 case 'Z':
23925 /* Former Maverick support, removed after GCC-4.7. */
23926 output_operand_lossage ("obsolete Maverick format code '%c'", code);
23927 return;
23928
23929 case 'U':
23930 if (!REG_P (x)
23931 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
23932 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
23933 /* Bad value for wCG register number. */
23934 {
23935 output_operand_lossage ("invalid operand for code '%c'", code);
23936 return;
23937 }
23938
23939 else
23940 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
23941 return;
23942
23943 /* Print an iWMMXt control register name. */
23944 case 'w':
23945 if (!CONST_INT_P (x)
23946 || INTVAL (x) < 0
23947 || INTVAL (x) >= 16)
23948 /* Bad value for wC register number. */
23949 {
23950 output_operand_lossage ("invalid operand for code '%c'", code);
23951 return;
23952 }
23953
23954 else
23955 {
23956 static const char * wc_reg_names [16] =
23957 {
23958 "wCID", "wCon", "wCSSF", "wCASF",
23959 "wC4", "wC5", "wC6", "wC7",
23960 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
23961 "wC12", "wC13", "wC14", "wC15"
23962 };
23963
23964 fputs (wc_reg_names [INTVAL (x)], stream);
23965 }
23966 return;
23967
23968 /* Print the high single-precision register of a VFP double-precision
23969 register. */
23970 case 'p':
23971 {
23972 machine_mode mode = GET_MODE (x);
23973 int regno;
23974
23975 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
23976 {
23977 output_operand_lossage ("invalid operand for code '%c'", code);
23978 return;
23979 }
23980
23981 regno = REGNO (x);
23982 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
23983 {
23984 output_operand_lossage ("invalid operand for code '%c'", code);
23985 return;
23986 }
23987
23988 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
23989 }
23990 return;
23991
23992 /* Print a VFP/Neon double precision or quad precision register name. */
23993 case 'P':
23994 case 'q':
23995 {
23996 machine_mode mode = GET_MODE (x);
23997 int is_quad = (code == 'q');
23998 int regno;
23999
24000 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24001 {
24002 output_operand_lossage ("invalid operand for code '%c'", code);
24003 return;
24004 }
24005
24006 if (!REG_P (x)
24007 || !IS_VFP_REGNUM (REGNO (x)))
24008 {
24009 output_operand_lossage ("invalid operand for code '%c'", code);
24010 return;
24011 }
24012
24013 regno = REGNO (x);
24014 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24015 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24016 {
24017 output_operand_lossage ("invalid operand for code '%c'", code);
24018 return;
24019 }
24020
24021 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24022 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24023 }
24024 return;
24025
24026 /* These two codes print the low/high doubleword register of a Neon quad
24027 register, respectively. For pair-structure types, can also print
24028 low/high quadword registers. */
24029 case 'e':
24030 case 'f':
24031 {
24032 machine_mode mode = GET_MODE (x);
24033 int regno;
24034
24035 if ((GET_MODE_SIZE (mode) != 16
24036 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24037 {
24038 output_operand_lossage ("invalid operand for code '%c'", code);
24039 return;
24040 }
24041
24042 regno = REGNO (x);
24043 if (!NEON_REGNO_OK_FOR_QUAD (regno))
24044 {
24045 output_operand_lossage ("invalid operand for code '%c'", code);
24046 return;
24047 }
24048
24049 if (GET_MODE_SIZE (mode) == 16)
24050 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24051 + (code == 'f' ? 1 : 0));
24052 else
24053 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24054 + (code == 'f' ? 1 : 0));
24055 }
24056 return;
24057
24058 /* Print a VFPv3 floating-point constant, represented as an integer
24059 index. */
24060 case 'G':
24061 {
24062 int index = vfp3_const_double_index (x);
24063 gcc_assert (index != -1);
24064 fprintf (stream, "%d", index);
24065 }
24066 return;
24067
24068 /* Print bits representing opcode features for Neon.
24069
24070 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24071 and polynomials as unsigned.
24072
24073 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24074
24075 Bit 2 is 1 for rounding functions, 0 otherwise. */
24076
24077 /* Identify the type as 's', 'u', 'p' or 'f'. */
24078 case 'T':
24079 {
24080 HOST_WIDE_INT bits = INTVAL (x);
24081 fputc ("uspf"[bits & 3], stream);
24082 }
24083 return;
24084
24085 /* Likewise, but signed and unsigned integers are both 'i'. */
24086 case 'F':
24087 {
24088 HOST_WIDE_INT bits = INTVAL (x);
24089 fputc ("iipf"[bits & 3], stream);
24090 }
24091 return;
24092
24093 /* As for 'T', but emit 'u' instead of 'p'. */
24094 case 't':
24095 {
24096 HOST_WIDE_INT bits = INTVAL (x);
24097 fputc ("usuf"[bits & 3], stream);
24098 }
24099 return;
24100
24101 /* Bit 2: rounding (vs none). */
24102 case 'O':
24103 {
24104 HOST_WIDE_INT bits = INTVAL (x);
24105 fputs ((bits & 4) != 0 ? "r" : "", stream);
24106 }
24107 return;
24108
24109 /* Memory operand for vld1/vst1 instruction. */
24110 case 'A':
24111 {
24112 rtx addr;
24113 bool postinc = FALSE;
24114 rtx postinc_reg = NULL;
24115 unsigned align, memsize, align_bits;
24116
24117 gcc_assert (MEM_P (x));
24118 addr = XEXP (x, 0);
24119 if (GET_CODE (addr) == POST_INC)
24120 {
24121 postinc = 1;
24122 addr = XEXP (addr, 0);
24123 }
24124 if (GET_CODE (addr) == POST_MODIFY)
24125 {
24126 postinc_reg = XEXP( XEXP (addr, 1), 1);
24127 addr = XEXP (addr, 0);
24128 }
24129 asm_fprintf (stream, "[%r", REGNO (addr));
24130
24131 /* We know the alignment of this access, so we can emit a hint in the
24132 instruction (for some alignments) as an aid to the memory subsystem
24133 of the target. */
24134 align = MEM_ALIGN (x) >> 3;
24135 memsize = MEM_SIZE (x);
24136
24137 /* Only certain alignment specifiers are supported by the hardware. */
24138 if (memsize == 32 && (align % 32) == 0)
24139 align_bits = 256;
24140 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24141 align_bits = 128;
24142 else if (memsize >= 8 && (align % 8) == 0)
24143 align_bits = 64;
24144 else
24145 align_bits = 0;
24146
24147 if (align_bits != 0)
24148 asm_fprintf (stream, ":%d", align_bits);
24149
24150 asm_fprintf (stream, "]");
24151
24152 if (postinc)
24153 fputs("!", stream);
24154 if (postinc_reg)
24155 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24156 }
24157 return;
24158
24159 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24160 rtx_code the memory operands output looks like following.
24161 1. [Rn], #+/-<imm>
24162 2. [Rn, #+/-<imm>]!
24163 3. [Rn, #+/-<imm>]
24164 4. [Rn]. */
24165 case 'E':
24166 {
24167 rtx addr;
24168 rtx postinc_reg = NULL;
24169 unsigned inc_val = 0;
24170 enum rtx_code code;
24171
24172 gcc_assert (MEM_P (x));
24173 addr = XEXP (x, 0);
24174 code = GET_CODE (addr);
24175 if (code == POST_INC || code == POST_DEC || code == PRE_INC
24176 || code == PRE_DEC)
24177 {
24178 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24179 inc_val = GET_MODE_SIZE (GET_MODE (x));
24180 if (code == POST_INC || code == POST_DEC)
24181 asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24182 ? "": "-", inc_val);
24183 else
24184 asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24185 ? "": "-", inc_val);
24186 }
24187 else if (code == POST_MODIFY || code == PRE_MODIFY)
24188 {
24189 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24190 postinc_reg = XEXP ( XEXP (x, 1), 1);
24191 if (postinc_reg && CONST_INT_P (postinc_reg))
24192 {
24193 if (code == POST_MODIFY)
24194 asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24195 else
24196 asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24197 }
24198 }
24199 else if (code == PLUS)
24200 {
24201 rtx base = XEXP (addr, 0);
24202 rtx index = XEXP (addr, 1);
24203
24204 gcc_assert (REG_P (base) && CONST_INT_P (index));
24205
24206 HOST_WIDE_INT offset = INTVAL (index);
24207 asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24208 }
24209 else
24210 {
24211 gcc_assert (REG_P (addr));
24212 asm_fprintf (stream, "[%r]",REGNO (addr));
24213 }
24214 }
24215 return;
24216
24217 case 'C':
24218 {
24219 rtx addr;
24220
24221 gcc_assert (MEM_P (x));
24222 addr = XEXP (x, 0);
24223 gcc_assert (REG_P (addr));
24224 asm_fprintf (stream, "[%r]", REGNO (addr));
24225 }
24226 return;
24227
24228 /* Translate an S register number into a D register number and element index. */
24229 case 'y':
24230 {
24231 machine_mode mode = GET_MODE (x);
24232 int regno;
24233
24234 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24235 {
24236 output_operand_lossage ("invalid operand for code '%c'", code);
24237 return;
24238 }
24239
24240 regno = REGNO (x);
24241 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24242 {
24243 output_operand_lossage ("invalid operand for code '%c'", code);
24244 return;
24245 }
24246
24247 regno = regno - FIRST_VFP_REGNUM;
24248 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24249 }
24250 return;
24251
24252 case 'v':
24253 gcc_assert (CONST_DOUBLE_P (x));
24254 int result;
24255 result = vfp3_const_double_for_fract_bits (x);
24256 if (result == 0)
24257 result = vfp3_const_double_for_bits (x);
24258 fprintf (stream, "#%d", result);
24259 return;
24260
24261 /* Register specifier for vld1.16/vst1.16. Translate the S register
24262 number into a D register number and element index. */
24263 case 'z':
24264 {
24265 machine_mode mode = GET_MODE (x);
24266 int regno;
24267
24268 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24269 {
24270 output_operand_lossage ("invalid operand for code '%c'", code);
24271 return;
24272 }
24273
24274 regno = REGNO (x);
24275 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24276 {
24277 output_operand_lossage ("invalid operand for code '%c'", code);
24278 return;
24279 }
24280
24281 regno = regno - FIRST_VFP_REGNUM;
24282 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24283 }
24284 return;
24285
24286 default:
24287 if (x == 0)
24288 {
24289 output_operand_lossage ("missing operand");
24290 return;
24291 }
24292
24293 switch (GET_CODE (x))
24294 {
24295 case REG:
24296 asm_fprintf (stream, "%r", REGNO (x));
24297 break;
24298
24299 case MEM:
24300 output_address (GET_MODE (x), XEXP (x, 0));
24301 break;
24302
24303 case CONST_DOUBLE:
24304 {
24305 char fpstr[20];
24306 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24307 sizeof (fpstr), 0, 1);
24308 fprintf (stream, "#%s", fpstr);
24309 }
24310 break;
24311
24312 default:
24313 gcc_assert (GET_CODE (x) != NEG);
24314 fputc ('#', stream);
24315 if (GET_CODE (x) == HIGH)
24316 {
24317 fputs (":lower16:", stream);
24318 x = XEXP (x, 0);
24319 }
24320
24321 output_addr_const (stream, x);
24322 break;
24323 }
24324 }
24325 }
24326 \f
24327 /* Target hook for printing a memory address. */
24328 static void
24329 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24330 {
24331 if (TARGET_32BIT)
24332 {
24333 int is_minus = GET_CODE (x) == MINUS;
24334
24335 if (REG_P (x))
24336 asm_fprintf (stream, "[%r]", REGNO (x));
24337 else if (GET_CODE (x) == PLUS || is_minus)
24338 {
24339 rtx base = XEXP (x, 0);
24340 rtx index = XEXP (x, 1);
24341 HOST_WIDE_INT offset = 0;
24342 if (!REG_P (base)
24343 || (REG_P (index) && REGNO (index) == SP_REGNUM))
24344 {
24345 /* Ensure that BASE is a register. */
24346 /* (one of them must be). */
24347 /* Also ensure the SP is not used as in index register. */
24348 std::swap (base, index);
24349 }
24350 switch (GET_CODE (index))
24351 {
24352 case CONST_INT:
24353 offset = INTVAL (index);
24354 if (is_minus)
24355 offset = -offset;
24356 asm_fprintf (stream, "[%r, #%wd]",
24357 REGNO (base), offset);
24358 break;
24359
24360 case REG:
24361 asm_fprintf (stream, "[%r, %s%r]",
24362 REGNO (base), is_minus ? "-" : "",
24363 REGNO (index));
24364 break;
24365
24366 case MULT:
24367 case ASHIFTRT:
24368 case LSHIFTRT:
24369 case ASHIFT:
24370 case ROTATERT:
24371 {
24372 asm_fprintf (stream, "[%r, %s%r",
24373 REGNO (base), is_minus ? "-" : "",
24374 REGNO (XEXP (index, 0)));
24375 arm_print_operand (stream, index, 'S');
24376 fputs ("]", stream);
24377 break;
24378 }
24379
24380 default:
24381 gcc_unreachable ();
24382 }
24383 }
24384 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24385 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24386 {
24387 gcc_assert (REG_P (XEXP (x, 0)));
24388
24389 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24390 asm_fprintf (stream, "[%r, #%s%d]!",
24391 REGNO (XEXP (x, 0)),
24392 GET_CODE (x) == PRE_DEC ? "-" : "",
24393 GET_MODE_SIZE (mode));
24394 else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24395 asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24396 else
24397 asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24398 GET_CODE (x) == POST_DEC ? "-" : "",
24399 GET_MODE_SIZE (mode));
24400 }
24401 else if (GET_CODE (x) == PRE_MODIFY)
24402 {
24403 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24404 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24405 asm_fprintf (stream, "#%wd]!",
24406 INTVAL (XEXP (XEXP (x, 1), 1)));
24407 else
24408 asm_fprintf (stream, "%r]!",
24409 REGNO (XEXP (XEXP (x, 1), 1)));
24410 }
24411 else if (GET_CODE (x) == POST_MODIFY)
24412 {
24413 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24414 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24415 asm_fprintf (stream, "#%wd",
24416 INTVAL (XEXP (XEXP (x, 1), 1)));
24417 else
24418 asm_fprintf (stream, "%r",
24419 REGNO (XEXP (XEXP (x, 1), 1)));
24420 }
24421 else output_addr_const (stream, x);
24422 }
24423 else
24424 {
24425 if (REG_P (x))
24426 asm_fprintf (stream, "[%r]", REGNO (x));
24427 else if (GET_CODE (x) == POST_INC)
24428 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24429 else if (GET_CODE (x) == PLUS)
24430 {
24431 gcc_assert (REG_P (XEXP (x, 0)));
24432 if (CONST_INT_P (XEXP (x, 1)))
24433 asm_fprintf (stream, "[%r, #%wd]",
24434 REGNO (XEXP (x, 0)),
24435 INTVAL (XEXP (x, 1)));
24436 else
24437 asm_fprintf (stream, "[%r, %r]",
24438 REGNO (XEXP (x, 0)),
24439 REGNO (XEXP (x, 1)));
24440 }
24441 else
24442 output_addr_const (stream, x);
24443 }
24444 }
24445 \f
24446 /* Target hook for indicating whether a punctuation character for
24447 TARGET_PRINT_OPERAND is valid. */
24448 static bool
24449 arm_print_operand_punct_valid_p (unsigned char code)
24450 {
24451 return (code == '@' || code == '|' || code == '.'
24452 || code == '(' || code == ')' || code == '#'
24453 || (TARGET_32BIT && (code == '?'))
24454 || (TARGET_THUMB2 && (code == '!'))
24455 || (TARGET_THUMB && (code == '_')));
24456 }
24457 \f
24458 /* Target hook for assembling integer objects. The ARM version needs to
24459 handle word-sized values specially. */
24460 static bool
24461 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24462 {
24463 machine_mode mode;
24464
24465 if (size == UNITS_PER_WORD && aligned_p)
24466 {
24467 fputs ("\t.word\t", asm_out_file);
24468 output_addr_const (asm_out_file, x);
24469
24470 /* Mark symbols as position independent. We only do this in the
24471 .text segment, not in the .data segment. */
24472 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24473 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
24474 {
24475 /* See legitimize_pic_address for an explanation of the
24476 TARGET_VXWORKS_RTP check. */
24477 /* References to weak symbols cannot be resolved locally:
24478 they may be overridden by a non-weak definition at link
24479 time. */
24480 if (!arm_pic_data_is_text_relative
24481 || (GET_CODE (x) == SYMBOL_REF
24482 && (!SYMBOL_REF_LOCAL_P (x)
24483 || (SYMBOL_REF_DECL (x)
24484 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24485 || (SYMBOL_REF_FUNCTION_P (x)
24486 && !arm_fdpic_local_funcdesc_p (x)))))
24487 {
24488 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24489 fputs ("(GOTFUNCDESC)", asm_out_file);
24490 else
24491 fputs ("(GOT)", asm_out_file);
24492 }
24493 else
24494 {
24495 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24496 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24497 else
24498 {
24499 bool is_readonly;
24500
24501 if (!TARGET_FDPIC
24502 || arm_is_segment_info_known (x, &is_readonly))
24503 fputs ("(GOTOFF)", asm_out_file);
24504 else
24505 fputs ("(GOT)", asm_out_file);
24506 }
24507 }
24508 }
24509
24510 /* For FDPIC we also have to mark symbol for .data section. */
24511 if (TARGET_FDPIC
24512 && !making_const_table
24513 && SYMBOL_REF_P (x)
24514 && SYMBOL_REF_FUNCTION_P (x))
24515 fputs ("(FUNCDESC)", asm_out_file);
24516
24517 fputc ('\n', asm_out_file);
24518 return true;
24519 }
24520
24521 mode = GET_MODE (x);
24522
24523 if (arm_vector_mode_supported_p (mode))
24524 {
24525 int i, units;
24526
24527 gcc_assert (GET_CODE (x) == CONST_VECTOR);
24528
24529 units = CONST_VECTOR_NUNITS (x);
24530 size = GET_MODE_UNIT_SIZE (mode);
24531
24532 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24533 for (i = 0; i < units; i++)
24534 {
24535 rtx elt = CONST_VECTOR_ELT (x, i);
24536 assemble_integer
24537 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
24538 }
24539 else
24540 for (i = 0; i < units; i++)
24541 {
24542 rtx elt = CONST_VECTOR_ELT (x, i);
24543 assemble_real
24544 (*CONST_DOUBLE_REAL_VALUE (elt),
24545 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
24546 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
24547 }
24548
24549 return true;
24550 }
24551
24552 return default_assemble_integer (x, size, aligned_p);
24553 }
24554
24555 static void
24556 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
24557 {
24558 section *s;
24559
24560 if (!TARGET_AAPCS_BASED)
24561 {
24562 (is_ctor ?
24563 default_named_section_asm_out_constructor
24564 : default_named_section_asm_out_destructor) (symbol, priority);
24565 return;
24566 }
24567
24568 /* Put these in the .init_array section, using a special relocation. */
24569 if (priority != DEFAULT_INIT_PRIORITY)
24570 {
24571 char buf[18];
24572 sprintf (buf, "%s.%.5u",
24573 is_ctor ? ".init_array" : ".fini_array",
24574 priority);
24575 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
24576 }
24577 else if (is_ctor)
24578 s = ctors_section;
24579 else
24580 s = dtors_section;
24581
24582 switch_to_section (s);
24583 assemble_align (POINTER_SIZE);
24584 fputs ("\t.word\t", asm_out_file);
24585 output_addr_const (asm_out_file, symbol);
24586 fputs ("(target1)\n", asm_out_file);
24587 }
24588
24589 /* Add a function to the list of static constructors. */
24590
24591 static void
24592 arm_elf_asm_constructor (rtx symbol, int priority)
24593 {
24594 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
24595 }
24596
24597 /* Add a function to the list of static destructors. */
24598
24599 static void
24600 arm_elf_asm_destructor (rtx symbol, int priority)
24601 {
24602 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
24603 }
24604 \f
24605 /* A finite state machine takes care of noticing whether or not instructions
24606 can be conditionally executed, and thus decrease execution time and code
24607 size by deleting branch instructions. The fsm is controlled by
24608 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
24609
24610 /* The state of the fsm controlling condition codes are:
24611 0: normal, do nothing special
24612 1: make ASM_OUTPUT_OPCODE not output this instruction
24613 2: make ASM_OUTPUT_OPCODE not output this instruction
24614 3: make instructions conditional
24615 4: make instructions conditional
24616
24617 State transitions (state->state by whom under condition):
24618 0 -> 1 final_prescan_insn if the `target' is a label
24619 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
24620 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
24621 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
24622 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
24623 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
24624 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
24625 (the target insn is arm_target_insn).
24626
24627 If the jump clobbers the conditions then we use states 2 and 4.
24628
24629 A similar thing can be done with conditional return insns.
24630
24631 XXX In case the `target' is an unconditional branch, this conditionalising
24632 of the instructions always reduces code size, but not always execution
24633 time. But then, I want to reduce the code size to somewhere near what
24634 /bin/cc produces. */
24635
24636 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
24637 instructions. When a COND_EXEC instruction is seen the subsequent
24638 instructions are scanned so that multiple conditional instructions can be
24639 combined into a single IT block. arm_condexec_count and arm_condexec_mask
24640 specify the length and true/false mask for the IT block. These will be
24641 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
24642
24643 /* Returns the index of the ARM condition code string in
24644 `arm_condition_codes', or ARM_NV if the comparison is invalid.
24645 COMPARISON should be an rtx like `(eq (...) (...))'. */
24646
24647 enum arm_cond_code
24648 maybe_get_arm_condition_code (rtx comparison)
24649 {
24650 machine_mode mode = GET_MODE (XEXP (comparison, 0));
24651 enum arm_cond_code code;
24652 enum rtx_code comp_code = GET_CODE (comparison);
24653
24654 if (GET_MODE_CLASS (mode) != MODE_CC)
24655 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
24656 XEXP (comparison, 1));
24657
24658 switch (mode)
24659 {
24660 case E_CC_DNEmode: code = ARM_NE; goto dominance;
24661 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
24662 case E_CC_DGEmode: code = ARM_GE; goto dominance;
24663 case E_CC_DGTmode: code = ARM_GT; goto dominance;
24664 case E_CC_DLEmode: code = ARM_LE; goto dominance;
24665 case E_CC_DLTmode: code = ARM_LT; goto dominance;
24666 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
24667 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
24668 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
24669 case E_CC_DLTUmode: code = ARM_CC;
24670
24671 dominance:
24672 if (comp_code == EQ)
24673 return ARM_INVERSE_CONDITION_CODE (code);
24674 if (comp_code == NE)
24675 return code;
24676 return ARM_NV;
24677
24678 case E_CC_NZmode:
24679 switch (comp_code)
24680 {
24681 case NE: return ARM_NE;
24682 case EQ: return ARM_EQ;
24683 case GE: return ARM_PL;
24684 case LT: return ARM_MI;
24685 default: return ARM_NV;
24686 }
24687
24688 case E_CC_Zmode:
24689 switch (comp_code)
24690 {
24691 case NE: return ARM_NE;
24692 case EQ: return ARM_EQ;
24693 default: return ARM_NV;
24694 }
24695
24696 case E_CC_Nmode:
24697 switch (comp_code)
24698 {
24699 case NE: return ARM_MI;
24700 case EQ: return ARM_PL;
24701 default: return ARM_NV;
24702 }
24703
24704 case E_CCFPEmode:
24705 case E_CCFPmode:
24706 /* We can handle all cases except UNEQ and LTGT. */
24707 switch (comp_code)
24708 {
24709 case GE: return ARM_GE;
24710 case GT: return ARM_GT;
24711 case LE: return ARM_LS;
24712 case LT: return ARM_MI;
24713 case NE: return ARM_NE;
24714 case EQ: return ARM_EQ;
24715 case ORDERED: return ARM_VC;
24716 case UNORDERED: return ARM_VS;
24717 case UNLT: return ARM_LT;
24718 case UNLE: return ARM_LE;
24719 case UNGT: return ARM_HI;
24720 case UNGE: return ARM_PL;
24721 /* UNEQ and LTGT do not have a representation. */
24722 case UNEQ: /* Fall through. */
24723 case LTGT: /* Fall through. */
24724 default: return ARM_NV;
24725 }
24726
24727 case E_CC_SWPmode:
24728 switch (comp_code)
24729 {
24730 case NE: return ARM_NE;
24731 case EQ: return ARM_EQ;
24732 case GE: return ARM_LE;
24733 case GT: return ARM_LT;
24734 case LE: return ARM_GE;
24735 case LT: return ARM_GT;
24736 case GEU: return ARM_LS;
24737 case GTU: return ARM_CC;
24738 case LEU: return ARM_CS;
24739 case LTU: return ARM_HI;
24740 default: return ARM_NV;
24741 }
24742
24743 case E_CC_Cmode:
24744 switch (comp_code)
24745 {
24746 case LTU: return ARM_CS;
24747 case GEU: return ARM_CC;
24748 default: return ARM_NV;
24749 }
24750
24751 case E_CC_NVmode:
24752 switch (comp_code)
24753 {
24754 case GE: return ARM_GE;
24755 case LT: return ARM_LT;
24756 default: return ARM_NV;
24757 }
24758
24759 case E_CC_Bmode:
24760 switch (comp_code)
24761 {
24762 case GEU: return ARM_CS;
24763 case LTU: return ARM_CC;
24764 default: return ARM_NV;
24765 }
24766
24767 case E_CC_Vmode:
24768 switch (comp_code)
24769 {
24770 case NE: return ARM_VS;
24771 case EQ: return ARM_VC;
24772 default: return ARM_NV;
24773 }
24774
24775 case E_CC_ADCmode:
24776 switch (comp_code)
24777 {
24778 case GEU: return ARM_CS;
24779 case LTU: return ARM_CC;
24780 default: return ARM_NV;
24781 }
24782
24783 case E_CCmode:
24784 case E_CC_RSBmode:
24785 switch (comp_code)
24786 {
24787 case NE: return ARM_NE;
24788 case EQ: return ARM_EQ;
24789 case GE: return ARM_GE;
24790 case GT: return ARM_GT;
24791 case LE: return ARM_LE;
24792 case LT: return ARM_LT;
24793 case GEU: return ARM_CS;
24794 case GTU: return ARM_HI;
24795 case LEU: return ARM_LS;
24796 case LTU: return ARM_CC;
24797 default: return ARM_NV;
24798 }
24799
24800 default: gcc_unreachable ();
24801 }
24802 }
24803
24804 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
24805 static enum arm_cond_code
24806 get_arm_condition_code (rtx comparison)
24807 {
24808 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
24809 gcc_assert (code != ARM_NV);
24810 return code;
24811 }
24812
24813 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
24814 code registers when not targetting Thumb1. The VFP condition register
24815 only exists when generating hard-float code. */
24816 static bool
24817 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
24818 {
24819 if (!TARGET_32BIT)
24820 return false;
24821
24822 *p1 = CC_REGNUM;
24823 *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
24824 return true;
24825 }
24826
24827 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
24828 instructions. */
24829 void
24830 thumb2_final_prescan_insn (rtx_insn *insn)
24831 {
24832 rtx_insn *first_insn = insn;
24833 rtx body = PATTERN (insn);
24834 rtx predicate;
24835 enum arm_cond_code code;
24836 int n;
24837 int mask;
24838 int max;
24839
24840 /* max_insns_skipped in the tune was already taken into account in the
24841 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
24842 just emit the IT blocks as we can. It does not make sense to split
24843 the IT blocks. */
24844 max = MAX_INSN_PER_IT_BLOCK;
24845
24846 /* Remove the previous insn from the count of insns to be output. */
24847 if (arm_condexec_count)
24848 arm_condexec_count--;
24849
24850 /* Nothing to do if we are already inside a conditional block. */
24851 if (arm_condexec_count)
24852 return;
24853
24854 if (GET_CODE (body) != COND_EXEC)
24855 return;
24856
24857 /* Conditional jumps are implemented directly. */
24858 if (JUMP_P (insn))
24859 return;
24860
24861 predicate = COND_EXEC_TEST (body);
24862 arm_current_cc = get_arm_condition_code (predicate);
24863
24864 n = get_attr_ce_count (insn);
24865 arm_condexec_count = 1;
24866 arm_condexec_mask = (1 << n) - 1;
24867 arm_condexec_masklen = n;
24868 /* See if subsequent instructions can be combined into the same block. */
24869 for (;;)
24870 {
24871 insn = next_nonnote_insn (insn);
24872
24873 /* Jumping into the middle of an IT block is illegal, so a label or
24874 barrier terminates the block. */
24875 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
24876 break;
24877
24878 body = PATTERN (insn);
24879 /* USE and CLOBBER aren't really insns, so just skip them. */
24880 if (GET_CODE (body) == USE
24881 || GET_CODE (body) == CLOBBER)
24882 continue;
24883
24884 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
24885 if (GET_CODE (body) != COND_EXEC)
24886 break;
24887 /* Maximum number of conditionally executed instructions in a block. */
24888 n = get_attr_ce_count (insn);
24889 if (arm_condexec_masklen + n > max)
24890 break;
24891
24892 predicate = COND_EXEC_TEST (body);
24893 code = get_arm_condition_code (predicate);
24894 mask = (1 << n) - 1;
24895 if (arm_current_cc == code)
24896 arm_condexec_mask |= (mask << arm_condexec_masklen);
24897 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
24898 break;
24899
24900 arm_condexec_count++;
24901 arm_condexec_masklen += n;
24902
24903 /* A jump must be the last instruction in a conditional block. */
24904 if (JUMP_P (insn))
24905 break;
24906 }
24907 /* Restore recog_data (getting the attributes of other insns can
24908 destroy this array, but final.c assumes that it remains intact
24909 across this call). */
24910 extract_constrain_insn_cached (first_insn);
24911 }
24912
24913 void
24914 arm_final_prescan_insn (rtx_insn *insn)
24915 {
24916 /* BODY will hold the body of INSN. */
24917 rtx body = PATTERN (insn);
24918
24919 /* This will be 1 if trying to repeat the trick, and things need to be
24920 reversed if it appears to fail. */
24921 int reverse = 0;
24922
24923 /* If we start with a return insn, we only succeed if we find another one. */
24924 int seeking_return = 0;
24925 enum rtx_code return_code = UNKNOWN;
24926
24927 /* START_INSN will hold the insn from where we start looking. This is the
24928 first insn after the following code_label if REVERSE is true. */
24929 rtx_insn *start_insn = insn;
24930
24931 /* If in state 4, check if the target branch is reached, in order to
24932 change back to state 0. */
24933 if (arm_ccfsm_state == 4)
24934 {
24935 if (insn == arm_target_insn)
24936 {
24937 arm_target_insn = NULL;
24938 arm_ccfsm_state = 0;
24939 }
24940 return;
24941 }
24942
24943 /* If in state 3, it is possible to repeat the trick, if this insn is an
24944 unconditional branch to a label, and immediately following this branch
24945 is the previous target label which is only used once, and the label this
24946 branch jumps to is not too far off. */
24947 if (arm_ccfsm_state == 3)
24948 {
24949 if (simplejump_p (insn))
24950 {
24951 start_insn = next_nonnote_insn (start_insn);
24952 if (BARRIER_P (start_insn))
24953 {
24954 /* XXX Isn't this always a barrier? */
24955 start_insn = next_nonnote_insn (start_insn);
24956 }
24957 if (LABEL_P (start_insn)
24958 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
24959 && LABEL_NUSES (start_insn) == 1)
24960 reverse = TRUE;
24961 else
24962 return;
24963 }
24964 else if (ANY_RETURN_P (body))
24965 {
24966 start_insn = next_nonnote_insn (start_insn);
24967 if (BARRIER_P (start_insn))
24968 start_insn = next_nonnote_insn (start_insn);
24969 if (LABEL_P (start_insn)
24970 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
24971 && LABEL_NUSES (start_insn) == 1)
24972 {
24973 reverse = TRUE;
24974 seeking_return = 1;
24975 return_code = GET_CODE (body);
24976 }
24977 else
24978 return;
24979 }
24980 else
24981 return;
24982 }
24983
24984 gcc_assert (!arm_ccfsm_state || reverse);
24985 if (!JUMP_P (insn))
24986 return;
24987
24988 /* This jump might be paralleled with a clobber of the condition codes
24989 the jump should always come first */
24990 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
24991 body = XVECEXP (body, 0, 0);
24992
24993 if (reverse
24994 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
24995 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
24996 {
24997 int insns_skipped;
24998 int fail = FALSE, succeed = FALSE;
24999 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25000 int then_not_else = TRUE;
25001 rtx_insn *this_insn = start_insn;
25002 rtx label = 0;
25003
25004 /* Register the insn jumped to. */
25005 if (reverse)
25006 {
25007 if (!seeking_return)
25008 label = XEXP (SET_SRC (body), 0);
25009 }
25010 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25011 label = XEXP (XEXP (SET_SRC (body), 1), 0);
25012 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25013 {
25014 label = XEXP (XEXP (SET_SRC (body), 2), 0);
25015 then_not_else = FALSE;
25016 }
25017 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25018 {
25019 seeking_return = 1;
25020 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25021 }
25022 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25023 {
25024 seeking_return = 1;
25025 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25026 then_not_else = FALSE;
25027 }
25028 else
25029 gcc_unreachable ();
25030
25031 /* See how many insns this branch skips, and what kind of insns. If all
25032 insns are okay, and the label or unconditional branch to the same
25033 label is not too far away, succeed. */
25034 for (insns_skipped = 0;
25035 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25036 {
25037 rtx scanbody;
25038
25039 this_insn = next_nonnote_insn (this_insn);
25040 if (!this_insn)
25041 break;
25042
25043 switch (GET_CODE (this_insn))
25044 {
25045 case CODE_LABEL:
25046 /* Succeed if it is the target label, otherwise fail since
25047 control falls in from somewhere else. */
25048 if (this_insn == label)
25049 {
25050 arm_ccfsm_state = 1;
25051 succeed = TRUE;
25052 }
25053 else
25054 fail = TRUE;
25055 break;
25056
25057 case BARRIER:
25058 /* Succeed if the following insn is the target label.
25059 Otherwise fail.
25060 If return insns are used then the last insn in a function
25061 will be a barrier. */
25062 this_insn = next_nonnote_insn (this_insn);
25063 if (this_insn && this_insn == label)
25064 {
25065 arm_ccfsm_state = 1;
25066 succeed = TRUE;
25067 }
25068 else
25069 fail = TRUE;
25070 break;
25071
25072 case CALL_INSN:
25073 /* The AAPCS says that conditional calls should not be
25074 used since they make interworking inefficient (the
25075 linker can't transform BL<cond> into BLX). That's
25076 only a problem if the machine has BLX. */
25077 if (arm_arch5t)
25078 {
25079 fail = TRUE;
25080 break;
25081 }
25082
25083 /* Succeed if the following insn is the target label, or
25084 if the following two insns are a barrier and the
25085 target label. */
25086 this_insn = next_nonnote_insn (this_insn);
25087 if (this_insn && BARRIER_P (this_insn))
25088 this_insn = next_nonnote_insn (this_insn);
25089
25090 if (this_insn && this_insn == label
25091 && insns_skipped < max_insns_skipped)
25092 {
25093 arm_ccfsm_state = 1;
25094 succeed = TRUE;
25095 }
25096 else
25097 fail = TRUE;
25098 break;
25099
25100 case JUMP_INSN:
25101 /* If this is an unconditional branch to the same label, succeed.
25102 If it is to another label, do nothing. If it is conditional,
25103 fail. */
25104 /* XXX Probably, the tests for SET and the PC are
25105 unnecessary. */
25106
25107 scanbody = PATTERN (this_insn);
25108 if (GET_CODE (scanbody) == SET
25109 && GET_CODE (SET_DEST (scanbody)) == PC)
25110 {
25111 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25112 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25113 {
25114 arm_ccfsm_state = 2;
25115 succeed = TRUE;
25116 }
25117 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25118 fail = TRUE;
25119 }
25120 /* Fail if a conditional return is undesirable (e.g. on a
25121 StrongARM), but still allow this if optimizing for size. */
25122 else if (GET_CODE (scanbody) == return_code
25123 && !use_return_insn (TRUE, NULL)
25124 && !optimize_size)
25125 fail = TRUE;
25126 else if (GET_CODE (scanbody) == return_code)
25127 {
25128 arm_ccfsm_state = 2;
25129 succeed = TRUE;
25130 }
25131 else if (GET_CODE (scanbody) == PARALLEL)
25132 {
25133 switch (get_attr_conds (this_insn))
25134 {
25135 case CONDS_NOCOND:
25136 break;
25137 default:
25138 fail = TRUE;
25139 break;
25140 }
25141 }
25142 else
25143 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
25144
25145 break;
25146
25147 case INSN:
25148 /* Instructions using or affecting the condition codes make it
25149 fail. */
25150 scanbody = PATTERN (this_insn);
25151 if (!(GET_CODE (scanbody) == SET
25152 || GET_CODE (scanbody) == PARALLEL)
25153 || get_attr_conds (this_insn) != CONDS_NOCOND)
25154 fail = TRUE;
25155 break;
25156
25157 default:
25158 break;
25159 }
25160 }
25161 if (succeed)
25162 {
25163 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25164 arm_target_label = CODE_LABEL_NUMBER (label);
25165 else
25166 {
25167 gcc_assert (seeking_return || arm_ccfsm_state == 2);
25168
25169 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25170 {
25171 this_insn = next_nonnote_insn (this_insn);
25172 gcc_assert (!this_insn
25173 || (!BARRIER_P (this_insn)
25174 && !LABEL_P (this_insn)));
25175 }
25176 if (!this_insn)
25177 {
25178 /* Oh, dear! we ran off the end.. give up. */
25179 extract_constrain_insn_cached (insn);
25180 arm_ccfsm_state = 0;
25181 arm_target_insn = NULL;
25182 return;
25183 }
25184 arm_target_insn = this_insn;
25185 }
25186
25187 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25188 what it was. */
25189 if (!reverse)
25190 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25191
25192 if (reverse || then_not_else)
25193 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25194 }
25195
25196 /* Restore recog_data (getting the attributes of other insns can
25197 destroy this array, but final.c assumes that it remains intact
25198 across this call. */
25199 extract_constrain_insn_cached (insn);
25200 }
25201 }
25202
25203 /* Output IT instructions. */
25204 void
25205 thumb2_asm_output_opcode (FILE * stream)
25206 {
25207 char buff[5];
25208 int n;
25209
25210 if (arm_condexec_mask)
25211 {
25212 for (n = 0; n < arm_condexec_masklen; n++)
25213 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25214 buff[n] = 0;
25215 asm_fprintf(stream, "i%s\t%s\n\t", buff,
25216 arm_condition_codes[arm_current_cc]);
25217 arm_condexec_mask = 0;
25218 }
25219 }
25220
25221 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25222 UNITS_PER_WORD bytes wide. */
25223 static unsigned int
25224 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25225 {
25226 if (TARGET_32BIT
25227 && regno > PC_REGNUM
25228 && regno != FRAME_POINTER_REGNUM
25229 && regno != ARG_POINTER_REGNUM
25230 && !IS_VFP_REGNUM (regno))
25231 return 1;
25232
25233 return ARM_NUM_REGS (mode);
25234 }
25235
25236 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25237 static bool
25238 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25239 {
25240 if (GET_MODE_CLASS (mode) == MODE_CC)
25241 return (regno == CC_REGNUM
25242 || (TARGET_VFP_BASE
25243 && regno == VFPCC_REGNUM));
25244
25245 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25246 return false;
25247
25248 if (IS_VPR_REGNUM (regno))
25249 return true;
25250
25251 if (TARGET_THUMB1)
25252 /* For the Thumb we only allow values bigger than SImode in
25253 registers 0 - 6, so that there is always a second low
25254 register available to hold the upper part of the value.
25255 We probably we ought to ensure that the register is the
25256 start of an even numbered register pair. */
25257 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25258
25259 if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25260 {
25261 if (mode == DFmode || mode == DImode)
25262 return VFP_REGNO_OK_FOR_DOUBLE (regno);
25263
25264 if (mode == HFmode || mode == BFmode || mode == HImode
25265 || mode == SFmode || mode == SImode)
25266 return VFP_REGNO_OK_FOR_SINGLE (regno);
25267
25268 if (TARGET_NEON)
25269 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25270 || (VALID_NEON_QREG_MODE (mode)
25271 && NEON_REGNO_OK_FOR_QUAD (regno))
25272 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25273 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25274 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25275 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25276 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25277 if (TARGET_HAVE_MVE)
25278 return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25279 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25280 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25281
25282 return false;
25283 }
25284
25285 if (TARGET_REALLY_IWMMXT)
25286 {
25287 if (IS_IWMMXT_GR_REGNUM (regno))
25288 return mode == SImode;
25289
25290 if (IS_IWMMXT_REGNUM (regno))
25291 return VALID_IWMMXT_REG_MODE (mode);
25292 }
25293
25294 /* We allow almost any value to be stored in the general registers.
25295 Restrict doubleword quantities to even register pairs in ARM state
25296 so that we can use ldrd. The same restriction applies for MVE
25297 in order to support Armv8.1-M Mainline instructions.
25298 Do not allow very large Neon structure opaque modes in general
25299 registers; they would use too many. */
25300 if (regno <= LAST_ARM_REGNUM)
25301 {
25302 if (ARM_NUM_REGS (mode) > 4)
25303 return false;
25304
25305 if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25306 return true;
25307
25308 return !((TARGET_LDRD || TARGET_CDE)
25309 && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25310 }
25311
25312 if (regno == FRAME_POINTER_REGNUM
25313 || regno == ARG_POINTER_REGNUM)
25314 /* We only allow integers in the fake hard registers. */
25315 return GET_MODE_CLASS (mode) == MODE_INT;
25316
25317 return false;
25318 }
25319
25320 /* Implement TARGET_MODES_TIEABLE_P. */
25321
25322 static bool
25323 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25324 {
25325 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25326 return true;
25327
25328 /* We specifically want to allow elements of "structure" modes to
25329 be tieable to the structure. This more general condition allows
25330 other rarer situations too. */
25331 if ((TARGET_NEON
25332 && (VALID_NEON_DREG_MODE (mode1)
25333 || VALID_NEON_QREG_MODE (mode1)
25334 || VALID_NEON_STRUCT_MODE (mode1))
25335 && (VALID_NEON_DREG_MODE (mode2)
25336 || VALID_NEON_QREG_MODE (mode2)
25337 || VALID_NEON_STRUCT_MODE (mode2)))
25338 || (TARGET_HAVE_MVE
25339 && (VALID_MVE_MODE (mode1)
25340 || VALID_MVE_STRUCT_MODE (mode1))
25341 && (VALID_MVE_MODE (mode2)
25342 || VALID_MVE_STRUCT_MODE (mode2))))
25343 return true;
25344
25345 return false;
25346 }
25347
25348 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25349 not used in arm mode. */
25350
25351 enum reg_class
25352 arm_regno_class (int regno)
25353 {
25354 if (regno == PC_REGNUM)
25355 return NO_REGS;
25356
25357 if (IS_VPR_REGNUM (regno))
25358 return VPR_REG;
25359
25360 if (TARGET_THUMB1)
25361 {
25362 if (regno == STACK_POINTER_REGNUM)
25363 return STACK_REG;
25364 if (regno == CC_REGNUM)
25365 return CC_REG;
25366 if (regno < 8)
25367 return LO_REGS;
25368 return HI_REGS;
25369 }
25370
25371 if (TARGET_THUMB2 && regno < 8)
25372 return LO_REGS;
25373
25374 if ( regno <= LAST_ARM_REGNUM
25375 || regno == FRAME_POINTER_REGNUM
25376 || regno == ARG_POINTER_REGNUM)
25377 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25378
25379 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25380 return TARGET_THUMB2 ? CC_REG : NO_REGS;
25381
25382 if (IS_VFP_REGNUM (regno))
25383 {
25384 if (regno <= D7_VFP_REGNUM)
25385 return VFP_D0_D7_REGS;
25386 else if (regno <= LAST_LO_VFP_REGNUM)
25387 return VFP_LO_REGS;
25388 else
25389 return VFP_HI_REGS;
25390 }
25391
25392 if (IS_IWMMXT_REGNUM (regno))
25393 return IWMMXT_REGS;
25394
25395 if (IS_IWMMXT_GR_REGNUM (regno))
25396 return IWMMXT_GR_REGS;
25397
25398 return NO_REGS;
25399 }
25400
25401 /* Handle a special case when computing the offset
25402 of an argument from the frame pointer. */
25403 int
25404 arm_debugger_arg_offset (int value, rtx addr)
25405 {
25406 rtx_insn *insn;
25407
25408 /* We are only interested if dbxout_parms() failed to compute the offset. */
25409 if (value != 0)
25410 return 0;
25411
25412 /* We can only cope with the case where the address is held in a register. */
25413 if (!REG_P (addr))
25414 return 0;
25415
25416 /* If we are using the frame pointer to point at the argument, then
25417 an offset of 0 is correct. */
25418 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25419 return 0;
25420
25421 /* If we are using the stack pointer to point at the
25422 argument, then an offset of 0 is correct. */
25423 /* ??? Check this is consistent with thumb2 frame layout. */
25424 if ((TARGET_THUMB || !frame_pointer_needed)
25425 && REGNO (addr) == SP_REGNUM)
25426 return 0;
25427
25428 /* Oh dear. The argument is pointed to by a register rather
25429 than being held in a register, or being stored at a known
25430 offset from the frame pointer. Since GDB only understands
25431 those two kinds of argument we must translate the address
25432 held in the register into an offset from the frame pointer.
25433 We do this by searching through the insns for the function
25434 looking to see where this register gets its value. If the
25435 register is initialized from the frame pointer plus an offset
25436 then we are in luck and we can continue, otherwise we give up.
25437
25438 This code is exercised by producing debugging information
25439 for a function with arguments like this:
25440
25441 double func (double a, double b, int c, double d) {return d;}
25442
25443 Without this code the stab for parameter 'd' will be set to
25444 an offset of 0 from the frame pointer, rather than 8. */
25445
25446 /* The if() statement says:
25447
25448 If the insn is a normal instruction
25449 and if the insn is setting the value in a register
25450 and if the register being set is the register holding the address of the argument
25451 and if the address is computing by an addition
25452 that involves adding to a register
25453 which is the frame pointer
25454 a constant integer
25455
25456 then... */
25457
25458 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25459 {
25460 if ( NONJUMP_INSN_P (insn)
25461 && GET_CODE (PATTERN (insn)) == SET
25462 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25463 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25464 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25465 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25466 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25467 )
25468 {
25469 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25470
25471 break;
25472 }
25473 }
25474
25475 if (value == 0)
25476 {
25477 debug_rtx (addr);
25478 warning (0, "unable to compute real location of stacked parameter");
25479 value = 8; /* XXX magic hack */
25480 }
25481
25482 return value;
25483 }
25484 \f
25485 /* Implement TARGET_PROMOTED_TYPE. */
25486
25487 static tree
25488 arm_promoted_type (const_tree t)
25489 {
25490 if (SCALAR_FLOAT_TYPE_P (t)
25491 && TYPE_PRECISION (t) == 16
25492 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25493 return float_type_node;
25494 return NULL_TREE;
25495 }
25496
25497 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25498 This simply adds HFmode as a supported mode; even though we don't
25499 implement arithmetic on this type directly, it's supported by
25500 optabs conversions, much the way the double-word arithmetic is
25501 special-cased in the default hook. */
25502
25503 static bool
25504 arm_scalar_mode_supported_p (scalar_mode mode)
25505 {
25506 if (mode == HFmode)
25507 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25508 else if (ALL_FIXED_POINT_MODE_P (mode))
25509 return true;
25510 else
25511 return default_scalar_mode_supported_p (mode);
25512 }
25513
25514 /* Set the value of FLT_EVAL_METHOD.
25515 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25516
25517 0: evaluate all operations and constants, whose semantic type has at
25518 most the range and precision of type float, to the range and
25519 precision of float; evaluate all other operations and constants to
25520 the range and precision of the semantic type;
25521
25522 N, where _FloatN is a supported interchange floating type
25523 evaluate all operations and constants, whose semantic type has at
25524 most the range and precision of _FloatN type, to the range and
25525 precision of the _FloatN type; evaluate all other operations and
25526 constants to the range and precision of the semantic type;
25527
25528 If we have the ARMv8.2-A extensions then we support _Float16 in native
25529 precision, so we should set this to 16. Otherwise, we support the type,
25530 but want to evaluate expressions in float precision, so set this to
25531 0. */
25532
25533 static enum flt_eval_method
25534 arm_excess_precision (enum excess_precision_type type)
25535 {
25536 switch (type)
25537 {
25538 case EXCESS_PRECISION_TYPE_FAST:
25539 case EXCESS_PRECISION_TYPE_STANDARD:
25540 /* We can calculate either in 16-bit range and precision or
25541 32-bit range and precision. Make that decision based on whether
25542 we have native support for the ARMv8.2-A 16-bit floating-point
25543 instructions or not. */
25544 return (TARGET_VFP_FP16INST
25545 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25546 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
25547 case EXCESS_PRECISION_TYPE_IMPLICIT:
25548 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25549 default:
25550 gcc_unreachable ();
25551 }
25552 return FLT_EVAL_METHOD_UNPREDICTABLE;
25553 }
25554
25555
25556 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
25557 _Float16 if we are using anything other than ieee format for 16-bit
25558 floating point. Otherwise, punt to the default implementation. */
25559 static opt_scalar_float_mode
25560 arm_floatn_mode (int n, bool extended)
25561 {
25562 if (!extended && n == 16)
25563 {
25564 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
25565 return HFmode;
25566 return opt_scalar_float_mode ();
25567 }
25568
25569 return default_floatn_mode (n, extended);
25570 }
25571
25572
25573 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25574 not to early-clobber SRC registers in the process.
25575
25576 We assume that the operands described by SRC and DEST represent a
25577 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25578 number of components into which the copy has been decomposed. */
25579 void
25580 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25581 {
25582 unsigned int i;
25583
25584 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25585 || REGNO (operands[0]) < REGNO (operands[1]))
25586 {
25587 for (i = 0; i < count; i++)
25588 {
25589 operands[2 * i] = dest[i];
25590 operands[2 * i + 1] = src[i];
25591 }
25592 }
25593 else
25594 {
25595 for (i = 0; i < count; i++)
25596 {
25597 operands[2 * i] = dest[count - i - 1];
25598 operands[2 * i + 1] = src[count - i - 1];
25599 }
25600 }
25601 }
25602
25603 /* Split operands into moves from op[1] + op[2] into op[0]. */
25604
25605 void
25606 neon_split_vcombine (rtx operands[3])
25607 {
25608 unsigned int dest = REGNO (operands[0]);
25609 unsigned int src1 = REGNO (operands[1]);
25610 unsigned int src2 = REGNO (operands[2]);
25611 machine_mode halfmode = GET_MODE (operands[1]);
25612 unsigned int halfregs = REG_NREGS (operands[1]);
25613 rtx destlo, desthi;
25614
25615 if (src1 == dest && src2 == dest + halfregs)
25616 {
25617 /* No-op move. Can't split to nothing; emit something. */
25618 emit_note (NOTE_INSN_DELETED);
25619 return;
25620 }
25621
25622 /* Preserve register attributes for variable tracking. */
25623 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25624 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25625 GET_MODE_SIZE (halfmode));
25626
25627 /* Special case of reversed high/low parts. Use VSWP. */
25628 if (src2 == dest && src1 == dest + halfregs)
25629 {
25630 rtx x = gen_rtx_SET (destlo, operands[1]);
25631 rtx y = gen_rtx_SET (desthi, operands[2]);
25632 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25633 return;
25634 }
25635
25636 if (!reg_overlap_mentioned_p (operands[2], destlo))
25637 {
25638 /* Try to avoid unnecessary moves if part of the result
25639 is in the right place already. */
25640 if (src1 != dest)
25641 emit_move_insn (destlo, operands[1]);
25642 if (src2 != dest + halfregs)
25643 emit_move_insn (desthi, operands[2]);
25644 }
25645 else
25646 {
25647 if (src2 != dest + halfregs)
25648 emit_move_insn (desthi, operands[2]);
25649 if (src1 != dest)
25650 emit_move_insn (destlo, operands[1]);
25651 }
25652 }
25653 \f
25654 /* Return the number (counting from 0) of
25655 the least significant set bit in MASK. */
25656
25657 inline static int
25658 number_of_first_bit_set (unsigned mask)
25659 {
25660 return ctz_hwi (mask);
25661 }
25662
25663 /* Like emit_multi_reg_push, but allowing for a different set of
25664 registers to be described as saved. MASK is the set of registers
25665 to be saved; REAL_REGS is the set of registers to be described as
25666 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25667
25668 static rtx_insn *
25669 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25670 {
25671 unsigned long regno;
25672 rtx par[10], tmp, reg;
25673 rtx_insn *insn;
25674 int i, j;
25675
25676 /* Build the parallel of the registers actually being stored. */
25677 for (i = 0; mask; ++i, mask &= mask - 1)
25678 {
25679 regno = ctz_hwi (mask);
25680 reg = gen_rtx_REG (SImode, regno);
25681
25682 if (i == 0)
25683 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
25684 else
25685 tmp = gen_rtx_USE (VOIDmode, reg);
25686
25687 par[i] = tmp;
25688 }
25689
25690 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25691 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
25692 tmp = gen_frame_mem (BLKmode, tmp);
25693 tmp = gen_rtx_SET (tmp, par[0]);
25694 par[0] = tmp;
25695
25696 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
25697 insn = emit_insn (tmp);
25698
25699 /* Always build the stack adjustment note for unwind info. */
25700 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25701 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
25702 par[0] = tmp;
25703
25704 /* Build the parallel of the registers recorded as saved for unwind. */
25705 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
25706 {
25707 regno = ctz_hwi (real_regs);
25708 reg = gen_rtx_REG (SImode, regno);
25709
25710 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
25711 tmp = gen_frame_mem (SImode, tmp);
25712 tmp = gen_rtx_SET (tmp, reg);
25713 RTX_FRAME_RELATED_P (tmp) = 1;
25714 par[j + 1] = tmp;
25715 }
25716
25717 if (j == 0)
25718 tmp = par[0];
25719 else
25720 {
25721 RTX_FRAME_RELATED_P (par[0]) = 1;
25722 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
25723 }
25724
25725 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
25726
25727 return insn;
25728 }
25729
25730 /* Emit code to push or pop registers to or from the stack. F is the
25731 assembly file. MASK is the registers to pop. */
25732 static void
25733 thumb_pop (FILE *f, unsigned long mask)
25734 {
25735 int regno;
25736 int lo_mask = mask & 0xFF;
25737
25738 gcc_assert (mask);
25739
25740 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
25741 {
25742 /* Special case. Do not generate a POP PC statement here, do it in
25743 thumb_exit() */
25744 thumb_exit (f, -1);
25745 return;
25746 }
25747
25748 fprintf (f, "\tpop\t{");
25749
25750 /* Look at the low registers first. */
25751 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
25752 {
25753 if (lo_mask & 1)
25754 {
25755 asm_fprintf (f, "%r", regno);
25756
25757 if ((lo_mask & ~1) != 0)
25758 fprintf (f, ", ");
25759 }
25760 }
25761
25762 if (mask & (1 << PC_REGNUM))
25763 {
25764 /* Catch popping the PC. */
25765 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
25766 || IS_CMSE_ENTRY (arm_current_func_type ()))
25767 {
25768 /* The PC is never poped directly, instead
25769 it is popped into r3 and then BX is used. */
25770 fprintf (f, "}\n");
25771
25772 thumb_exit (f, -1);
25773
25774 return;
25775 }
25776 else
25777 {
25778 if (mask & 0xFF)
25779 fprintf (f, ", ");
25780
25781 asm_fprintf (f, "%r", PC_REGNUM);
25782 }
25783 }
25784
25785 fprintf (f, "}\n");
25786 }
25787
25788 /* Generate code to return from a thumb function.
25789 If 'reg_containing_return_addr' is -1, then the return address is
25790 actually on the stack, at the stack pointer.
25791
25792 Note: do not forget to update length attribute of corresponding insn pattern
25793 when changing assembly output (eg. length attribute of epilogue_insns when
25794 updating Armv8-M Baseline Security Extensions register clearing
25795 sequences). */
25796 static void
25797 thumb_exit (FILE *f, int reg_containing_return_addr)
25798 {
25799 unsigned regs_available_for_popping;
25800 unsigned regs_to_pop;
25801 int pops_needed;
25802 unsigned available;
25803 unsigned required;
25804 machine_mode mode;
25805 int size;
25806 int restore_a4 = FALSE;
25807
25808 /* Compute the registers we need to pop. */
25809 regs_to_pop = 0;
25810 pops_needed = 0;
25811
25812 if (reg_containing_return_addr == -1)
25813 {
25814 regs_to_pop |= 1 << LR_REGNUM;
25815 ++pops_needed;
25816 }
25817
25818 if (TARGET_BACKTRACE)
25819 {
25820 /* Restore the (ARM) frame pointer and stack pointer. */
25821 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
25822 pops_needed += 2;
25823 }
25824
25825 /* If there is nothing to pop then just emit the BX instruction and
25826 return. */
25827 if (pops_needed == 0)
25828 {
25829 if (crtl->calls_eh_return)
25830 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25831
25832 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25833 {
25834 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
25835 emitted by cmse_nonsecure_entry_clear_before_return (). */
25836 if (!TARGET_HAVE_FPCXT_CMSE)
25837 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
25838 reg_containing_return_addr);
25839 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
25840 }
25841 else
25842 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25843 return;
25844 }
25845 /* Otherwise if we are not supporting interworking and we have not created
25846 a backtrace structure and the function was not entered in ARM mode then
25847 just pop the return address straight into the PC. */
25848 else if (!TARGET_INTERWORK
25849 && !TARGET_BACKTRACE
25850 && !is_called_in_ARM_mode (current_function_decl)
25851 && !crtl->calls_eh_return
25852 && !IS_CMSE_ENTRY (arm_current_func_type ()))
25853 {
25854 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
25855 return;
25856 }
25857
25858 /* Find out how many of the (return) argument registers we can corrupt. */
25859 regs_available_for_popping = 0;
25860
25861 /* If returning via __builtin_eh_return, the bottom three registers
25862 all contain information needed for the return. */
25863 if (crtl->calls_eh_return)
25864 size = 12;
25865 else
25866 {
25867 /* If we can deduce the registers used from the function's
25868 return value. This is more reliable that examining
25869 df_regs_ever_live_p () because that will be set if the register is
25870 ever used in the function, not just if the register is used
25871 to hold a return value. */
25872
25873 if (crtl->return_rtx != 0)
25874 mode = GET_MODE (crtl->return_rtx);
25875 else
25876 mode = DECL_MODE (DECL_RESULT (current_function_decl));
25877
25878 size = GET_MODE_SIZE (mode);
25879
25880 if (size == 0)
25881 {
25882 /* In a void function we can use any argument register.
25883 In a function that returns a structure on the stack
25884 we can use the second and third argument registers. */
25885 if (mode == VOIDmode)
25886 regs_available_for_popping =
25887 (1 << ARG_REGISTER (1))
25888 | (1 << ARG_REGISTER (2))
25889 | (1 << ARG_REGISTER (3));
25890 else
25891 regs_available_for_popping =
25892 (1 << ARG_REGISTER (2))
25893 | (1 << ARG_REGISTER (3));
25894 }
25895 else if (size <= 4)
25896 regs_available_for_popping =
25897 (1 << ARG_REGISTER (2))
25898 | (1 << ARG_REGISTER (3));
25899 else if (size <= 8)
25900 regs_available_for_popping =
25901 (1 << ARG_REGISTER (3));
25902 }
25903
25904 /* Match registers to be popped with registers into which we pop them. */
25905 for (available = regs_available_for_popping,
25906 required = regs_to_pop;
25907 required != 0 && available != 0;
25908 available &= ~(available & - available),
25909 required &= ~(required & - required))
25910 -- pops_needed;
25911
25912 /* If we have any popping registers left over, remove them. */
25913 if (available > 0)
25914 regs_available_for_popping &= ~available;
25915
25916 /* Otherwise if we need another popping register we can use
25917 the fourth argument register. */
25918 else if (pops_needed)
25919 {
25920 /* If we have not found any free argument registers and
25921 reg a4 contains the return address, we must move it. */
25922 if (regs_available_for_popping == 0
25923 && reg_containing_return_addr == LAST_ARG_REGNUM)
25924 {
25925 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25926 reg_containing_return_addr = LR_REGNUM;
25927 }
25928 else if (size > 12)
25929 {
25930 /* Register a4 is being used to hold part of the return value,
25931 but we have dire need of a free, low register. */
25932 restore_a4 = TRUE;
25933
25934 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
25935 }
25936
25937 if (reg_containing_return_addr != LAST_ARG_REGNUM)
25938 {
25939 /* The fourth argument register is available. */
25940 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
25941
25942 --pops_needed;
25943 }
25944 }
25945
25946 /* Pop as many registers as we can. */
25947 thumb_pop (f, regs_available_for_popping);
25948
25949 /* Process the registers we popped. */
25950 if (reg_containing_return_addr == -1)
25951 {
25952 /* The return address was popped into the lowest numbered register. */
25953 regs_to_pop &= ~(1 << LR_REGNUM);
25954
25955 reg_containing_return_addr =
25956 number_of_first_bit_set (regs_available_for_popping);
25957
25958 /* Remove this register for the mask of available registers, so that
25959 the return address will not be corrupted by further pops. */
25960 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
25961 }
25962
25963 /* If we popped other registers then handle them here. */
25964 if (regs_available_for_popping)
25965 {
25966 int frame_pointer;
25967
25968 /* Work out which register currently contains the frame pointer. */
25969 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
25970
25971 /* Move it into the correct place. */
25972 asm_fprintf (f, "\tmov\t%r, %r\n",
25973 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
25974
25975 /* (Temporarily) remove it from the mask of popped registers. */
25976 regs_available_for_popping &= ~(1 << frame_pointer);
25977 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
25978
25979 if (regs_available_for_popping)
25980 {
25981 int stack_pointer;
25982
25983 /* We popped the stack pointer as well,
25984 find the register that contains it. */
25985 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
25986
25987 /* Move it into the stack register. */
25988 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
25989
25990 /* At this point we have popped all necessary registers, so
25991 do not worry about restoring regs_available_for_popping
25992 to its correct value:
25993
25994 assert (pops_needed == 0)
25995 assert (regs_available_for_popping == (1 << frame_pointer))
25996 assert (regs_to_pop == (1 << STACK_POINTER)) */
25997 }
25998 else
25999 {
26000 /* Since we have just move the popped value into the frame
26001 pointer, the popping register is available for reuse, and
26002 we know that we still have the stack pointer left to pop. */
26003 regs_available_for_popping |= (1 << frame_pointer);
26004 }
26005 }
26006
26007 /* If we still have registers left on the stack, but we no longer have
26008 any registers into which we can pop them, then we must move the return
26009 address into the link register and make available the register that
26010 contained it. */
26011 if (regs_available_for_popping == 0 && pops_needed > 0)
26012 {
26013 regs_available_for_popping |= 1 << reg_containing_return_addr;
26014
26015 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26016 reg_containing_return_addr);
26017
26018 reg_containing_return_addr = LR_REGNUM;
26019 }
26020
26021 /* If we have registers left on the stack then pop some more.
26022 We know that at most we will want to pop FP and SP. */
26023 if (pops_needed > 0)
26024 {
26025 int popped_into;
26026 int move_to;
26027
26028 thumb_pop (f, regs_available_for_popping);
26029
26030 /* We have popped either FP or SP.
26031 Move whichever one it is into the correct register. */
26032 popped_into = number_of_first_bit_set (regs_available_for_popping);
26033 move_to = number_of_first_bit_set (regs_to_pop);
26034
26035 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26036 --pops_needed;
26037 }
26038
26039 /* If we still have not popped everything then we must have only
26040 had one register available to us and we are now popping the SP. */
26041 if (pops_needed > 0)
26042 {
26043 int popped_into;
26044
26045 thumb_pop (f, regs_available_for_popping);
26046
26047 popped_into = number_of_first_bit_set (regs_available_for_popping);
26048
26049 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26050 /*
26051 assert (regs_to_pop == (1 << STACK_POINTER))
26052 assert (pops_needed == 1)
26053 */
26054 }
26055
26056 /* If necessary restore the a4 register. */
26057 if (restore_a4)
26058 {
26059 if (reg_containing_return_addr != LR_REGNUM)
26060 {
26061 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26062 reg_containing_return_addr = LR_REGNUM;
26063 }
26064
26065 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26066 }
26067
26068 if (crtl->calls_eh_return)
26069 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26070
26071 /* Return to caller. */
26072 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26073 {
26074 /* This is for the cases where LR is not being used to contain the return
26075 address. It may therefore contain information that we might not want
26076 to leak, hence it must be cleared. The value in R0 will never be a
26077 secret at this point, so it is safe to use it, see the clearing code
26078 in cmse_nonsecure_entry_clear_before_return (). */
26079 if (reg_containing_return_addr != LR_REGNUM)
26080 asm_fprintf (f, "\tmov\tlr, r0\n");
26081
26082 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26083 by cmse_nonsecure_entry_clear_before_return (). */
26084 if (!TARGET_HAVE_FPCXT_CMSE)
26085 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26086 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26087 }
26088 else
26089 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26090 }
26091 \f
26092 /* Scan INSN just before assembler is output for it.
26093 For Thumb-1, we track the status of the condition codes; this
26094 information is used in the cbranchsi4_insn pattern. */
26095 void
26096 thumb1_final_prescan_insn (rtx_insn *insn)
26097 {
26098 if (flag_print_asm_name)
26099 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26100 INSN_ADDRESSES (INSN_UID (insn)));
26101 /* Don't overwrite the previous setter when we get to a cbranch. */
26102 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26103 {
26104 enum attr_conds conds;
26105
26106 if (cfun->machine->thumb1_cc_insn)
26107 {
26108 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26109 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26110 CC_STATUS_INIT;
26111 }
26112 conds = get_attr_conds (insn);
26113 if (conds == CONDS_SET)
26114 {
26115 rtx set = single_set (insn);
26116 cfun->machine->thumb1_cc_insn = insn;
26117 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26118 cfun->machine->thumb1_cc_op1 = const0_rtx;
26119 cfun->machine->thumb1_cc_mode = CC_NZmode;
26120 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26121 {
26122 rtx src1 = XEXP (SET_SRC (set), 1);
26123 if (src1 == const0_rtx)
26124 cfun->machine->thumb1_cc_mode = CCmode;
26125 }
26126 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26127 {
26128 /* Record the src register operand instead of dest because
26129 cprop_hardreg pass propagates src. */
26130 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26131 }
26132 }
26133 else if (conds != CONDS_NOCOND)
26134 cfun->machine->thumb1_cc_insn = NULL_RTX;
26135 }
26136
26137 /* Check if unexpected far jump is used. */
26138 if (cfun->machine->lr_save_eliminated
26139 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26140 internal_error("Unexpected thumb1 far jump");
26141 }
26142
26143 int
26144 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26145 {
26146 unsigned HOST_WIDE_INT mask = 0xff;
26147 int i;
26148
26149 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26150 if (val == 0) /* XXX */
26151 return 0;
26152
26153 for (i = 0; i < 25; i++)
26154 if ((val & (mask << i)) == val)
26155 return 1;
26156
26157 return 0;
26158 }
26159
26160 /* Returns nonzero if the current function contains,
26161 or might contain a far jump. */
26162 static int
26163 thumb_far_jump_used_p (void)
26164 {
26165 rtx_insn *insn;
26166 bool far_jump = false;
26167 unsigned int func_size = 0;
26168
26169 /* If we have already decided that far jumps may be used,
26170 do not bother checking again, and always return true even if
26171 it turns out that they are not being used. Once we have made
26172 the decision that far jumps are present (and that hence the link
26173 register will be pushed onto the stack) we cannot go back on it. */
26174 if (cfun->machine->far_jump_used)
26175 return 1;
26176
26177 /* If this function is not being called from the prologue/epilogue
26178 generation code then it must be being called from the
26179 INITIAL_ELIMINATION_OFFSET macro. */
26180 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26181 {
26182 /* In this case we know that we are being asked about the elimination
26183 of the arg pointer register. If that register is not being used,
26184 then there are no arguments on the stack, and we do not have to
26185 worry that a far jump might force the prologue to push the link
26186 register, changing the stack offsets. In this case we can just
26187 return false, since the presence of far jumps in the function will
26188 not affect stack offsets.
26189
26190 If the arg pointer is live (or if it was live, but has now been
26191 eliminated and so set to dead) then we do have to test to see if
26192 the function might contain a far jump. This test can lead to some
26193 false negatives, since before reload is completed, then length of
26194 branch instructions is not known, so gcc defaults to returning their
26195 longest length, which in turn sets the far jump attribute to true.
26196
26197 A false negative will not result in bad code being generated, but it
26198 will result in a needless push and pop of the link register. We
26199 hope that this does not occur too often.
26200
26201 If we need doubleword stack alignment this could affect the other
26202 elimination offsets so we can't risk getting it wrong. */
26203 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26204 cfun->machine->arg_pointer_live = 1;
26205 else if (!cfun->machine->arg_pointer_live)
26206 return 0;
26207 }
26208
26209 /* We should not change far_jump_used during or after reload, as there is
26210 no chance to change stack frame layout. */
26211 if (reload_in_progress || reload_completed)
26212 return 0;
26213
26214 /* Check to see if the function contains a branch
26215 insn with the far jump attribute set. */
26216 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26217 {
26218 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26219 {
26220 far_jump = true;
26221 }
26222 func_size += get_attr_length (insn);
26223 }
26224
26225 /* Attribute far_jump will always be true for thumb1 before
26226 shorten_branch pass. So checking far_jump attribute before
26227 shorten_branch isn't much useful.
26228
26229 Following heuristic tries to estimate more accurately if a far jump
26230 may finally be used. The heuristic is very conservative as there is
26231 no chance to roll-back the decision of not to use far jump.
26232
26233 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26234 2-byte insn is associated with a 4 byte constant pool. Using
26235 function size 2048/3 as the threshold is conservative enough. */
26236 if (far_jump)
26237 {
26238 if ((func_size * 3) >= 2048)
26239 {
26240 /* Record the fact that we have decided that
26241 the function does use far jumps. */
26242 cfun->machine->far_jump_used = 1;
26243 return 1;
26244 }
26245 }
26246
26247 return 0;
26248 }
26249
26250 /* Return nonzero if FUNC must be entered in ARM mode. */
26251 static bool
26252 is_called_in_ARM_mode (tree func)
26253 {
26254 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26255
26256 /* Ignore the problem about functions whose address is taken. */
26257 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26258 return true;
26259
26260 #ifdef ARM_PE
26261 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26262 #else
26263 return false;
26264 #endif
26265 }
26266
26267 /* Given the stack offsets and register mask in OFFSETS, decide how
26268 many additional registers to push instead of subtracting a constant
26269 from SP. For epilogues the principle is the same except we use pop.
26270 FOR_PROLOGUE indicates which we're generating. */
26271 static int
26272 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26273 {
26274 HOST_WIDE_INT amount;
26275 unsigned long live_regs_mask = offsets->saved_regs_mask;
26276 /* Extract a mask of the ones we can give to the Thumb's push/pop
26277 instruction. */
26278 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26279 /* Then count how many other high registers will need to be pushed. */
26280 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26281 int n_free, reg_base, size;
26282
26283 if (!for_prologue && frame_pointer_needed)
26284 amount = offsets->locals_base - offsets->saved_regs;
26285 else
26286 amount = offsets->outgoing_args - offsets->saved_regs;
26287
26288 /* If the stack frame size is 512 exactly, we can save one load
26289 instruction, which should make this a win even when optimizing
26290 for speed. */
26291 if (!optimize_size && amount != 512)
26292 return 0;
26293
26294 /* Can't do this if there are high registers to push. */
26295 if (high_regs_pushed != 0)
26296 return 0;
26297
26298 /* Shouldn't do it in the prologue if no registers would normally
26299 be pushed at all. In the epilogue, also allow it if we'll have
26300 a pop insn for the PC. */
26301 if (l_mask == 0
26302 && (for_prologue
26303 || TARGET_BACKTRACE
26304 || (live_regs_mask & 1 << LR_REGNUM) == 0
26305 || TARGET_INTERWORK
26306 || crtl->args.pretend_args_size != 0))
26307 return 0;
26308
26309 /* Don't do this if thumb_expand_prologue wants to emit instructions
26310 between the push and the stack frame allocation. */
26311 if (for_prologue
26312 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26313 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26314 return 0;
26315
26316 reg_base = 0;
26317 n_free = 0;
26318 if (!for_prologue)
26319 {
26320 size = arm_size_return_regs ();
26321 reg_base = ARM_NUM_INTS (size);
26322 live_regs_mask >>= reg_base;
26323 }
26324
26325 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26326 && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26327 {
26328 live_regs_mask >>= 1;
26329 n_free++;
26330 }
26331
26332 if (n_free == 0)
26333 return 0;
26334 gcc_assert (amount / 4 * 4 == amount);
26335
26336 if (amount >= 512 && (amount - n_free * 4) < 512)
26337 return (amount - 508) / 4;
26338 if (amount <= n_free * 4)
26339 return amount / 4;
26340 return 0;
26341 }
26342
26343 /* The bits which aren't usefully expanded as rtl. */
26344 const char *
26345 thumb1_unexpanded_epilogue (void)
26346 {
26347 arm_stack_offsets *offsets;
26348 int regno;
26349 unsigned long live_regs_mask = 0;
26350 int high_regs_pushed = 0;
26351 int extra_pop;
26352 int had_to_push_lr;
26353 int size;
26354
26355 if (cfun->machine->return_used_this_function != 0)
26356 return "";
26357
26358 if (IS_NAKED (arm_current_func_type ()))
26359 return "";
26360
26361 offsets = arm_get_frame_offsets ();
26362 live_regs_mask = offsets->saved_regs_mask;
26363 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26364
26365 /* If we can deduce the registers used from the function's return value.
26366 This is more reliable that examining df_regs_ever_live_p () because that
26367 will be set if the register is ever used in the function, not just if
26368 the register is used to hold a return value. */
26369 size = arm_size_return_regs ();
26370
26371 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26372 if (extra_pop > 0)
26373 {
26374 unsigned long extra_mask = (1 << extra_pop) - 1;
26375 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26376 }
26377
26378 /* The prolog may have pushed some high registers to use as
26379 work registers. e.g. the testsuite file:
26380 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26381 compiles to produce:
26382 push {r4, r5, r6, r7, lr}
26383 mov r7, r9
26384 mov r6, r8
26385 push {r6, r7}
26386 as part of the prolog. We have to undo that pushing here. */
26387
26388 if (high_regs_pushed)
26389 {
26390 unsigned long mask = live_regs_mask & 0xff;
26391 int next_hi_reg;
26392
26393 mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26394
26395 if (mask == 0)
26396 /* Oh dear! We have no low registers into which we can pop
26397 high registers! */
26398 internal_error
26399 ("no low registers available for popping high registers");
26400
26401 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26402 if (live_regs_mask & (1 << next_hi_reg))
26403 break;
26404
26405 while (high_regs_pushed)
26406 {
26407 /* Find lo register(s) into which the high register(s) can
26408 be popped. */
26409 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26410 {
26411 if (mask & (1 << regno))
26412 high_regs_pushed--;
26413 if (high_regs_pushed == 0)
26414 break;
26415 }
26416
26417 if (high_regs_pushed == 0 && regno >= 0)
26418 mask &= ~((1 << regno) - 1);
26419
26420 /* Pop the values into the low register(s). */
26421 thumb_pop (asm_out_file, mask);
26422
26423 /* Move the value(s) into the high registers. */
26424 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26425 {
26426 if (mask & (1 << regno))
26427 {
26428 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26429 regno);
26430
26431 for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26432 next_hi_reg--)
26433 if (live_regs_mask & (1 << next_hi_reg))
26434 break;
26435 }
26436 }
26437 }
26438 live_regs_mask &= ~0x0f00;
26439 }
26440
26441 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26442 live_regs_mask &= 0xff;
26443
26444 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26445 {
26446 /* Pop the return address into the PC. */
26447 if (had_to_push_lr)
26448 live_regs_mask |= 1 << PC_REGNUM;
26449
26450 /* Either no argument registers were pushed or a backtrace
26451 structure was created which includes an adjusted stack
26452 pointer, so just pop everything. */
26453 if (live_regs_mask)
26454 thumb_pop (asm_out_file, live_regs_mask);
26455
26456 /* We have either just popped the return address into the
26457 PC or it is was kept in LR for the entire function.
26458 Note that thumb_pop has already called thumb_exit if the
26459 PC was in the list. */
26460 if (!had_to_push_lr)
26461 thumb_exit (asm_out_file, LR_REGNUM);
26462 }
26463 else
26464 {
26465 /* Pop everything but the return address. */
26466 if (live_regs_mask)
26467 thumb_pop (asm_out_file, live_regs_mask);
26468
26469 if (had_to_push_lr)
26470 {
26471 if (size > 12)
26472 {
26473 /* We have no free low regs, so save one. */
26474 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26475 LAST_ARG_REGNUM);
26476 }
26477
26478 /* Get the return address into a temporary register. */
26479 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26480
26481 if (size > 12)
26482 {
26483 /* Move the return address to lr. */
26484 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26485 LAST_ARG_REGNUM);
26486 /* Restore the low register. */
26487 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26488 IP_REGNUM);
26489 regno = LR_REGNUM;
26490 }
26491 else
26492 regno = LAST_ARG_REGNUM;
26493 }
26494 else
26495 regno = LR_REGNUM;
26496
26497 /* Remove the argument registers that were pushed onto the stack. */
26498 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26499 SP_REGNUM, SP_REGNUM,
26500 crtl->args.pretend_args_size);
26501
26502 thumb_exit (asm_out_file, regno);
26503 }
26504
26505 return "";
26506 }
26507
26508 /* Functions to save and restore machine-specific function data. */
26509 static struct machine_function *
26510 arm_init_machine_status (void)
26511 {
26512 struct machine_function *machine;
26513 machine = ggc_cleared_alloc<machine_function> ();
26514
26515 #if ARM_FT_UNKNOWN != 0
26516 machine->func_type = ARM_FT_UNKNOWN;
26517 #endif
26518 machine->static_chain_stack_bytes = -1;
26519 return machine;
26520 }
26521
26522 /* Return an RTX indicating where the return address to the
26523 calling function can be found. */
26524 rtx
26525 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26526 {
26527 if (count != 0)
26528 return NULL_RTX;
26529
26530 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26531 }
26532
26533 /* Do anything needed before RTL is emitted for each function. */
26534 void
26535 arm_init_expanders (void)
26536 {
26537 /* Arrange to initialize and mark the machine per-function status. */
26538 init_machine_status = arm_init_machine_status;
26539
26540 /* This is to stop the combine pass optimizing away the alignment
26541 adjustment of va_arg. */
26542 /* ??? It is claimed that this should not be necessary. */
26543 if (cfun)
26544 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26545 }
26546
26547 /* Check that FUNC is called with a different mode. */
26548
26549 bool
26550 arm_change_mode_p (tree func)
26551 {
26552 if (TREE_CODE (func) != FUNCTION_DECL)
26553 return false;
26554
26555 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
26556
26557 if (!callee_tree)
26558 callee_tree = target_option_default_node;
26559
26560 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
26561 int flags = callee_opts->x_target_flags;
26562
26563 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
26564 }
26565
26566 /* Like arm_compute_initial_elimination offset. Simpler because there
26567 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26568 to point at the base of the local variables after static stack
26569 space for a function has been allocated. */
26570
26571 HOST_WIDE_INT
26572 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26573 {
26574 arm_stack_offsets *offsets;
26575
26576 offsets = arm_get_frame_offsets ();
26577
26578 switch (from)
26579 {
26580 case ARG_POINTER_REGNUM:
26581 switch (to)
26582 {
26583 case STACK_POINTER_REGNUM:
26584 return offsets->outgoing_args - offsets->saved_args;
26585
26586 case FRAME_POINTER_REGNUM:
26587 return offsets->soft_frame - offsets->saved_args;
26588
26589 case ARM_HARD_FRAME_POINTER_REGNUM:
26590 return offsets->saved_regs - offsets->saved_args;
26591
26592 case THUMB_HARD_FRAME_POINTER_REGNUM:
26593 return offsets->locals_base - offsets->saved_args;
26594
26595 default:
26596 gcc_unreachable ();
26597 }
26598 break;
26599
26600 case FRAME_POINTER_REGNUM:
26601 switch (to)
26602 {
26603 case STACK_POINTER_REGNUM:
26604 return offsets->outgoing_args - offsets->soft_frame;
26605
26606 case ARM_HARD_FRAME_POINTER_REGNUM:
26607 return offsets->saved_regs - offsets->soft_frame;
26608
26609 case THUMB_HARD_FRAME_POINTER_REGNUM:
26610 return offsets->locals_base - offsets->soft_frame;
26611
26612 default:
26613 gcc_unreachable ();
26614 }
26615 break;
26616
26617 default:
26618 gcc_unreachable ();
26619 }
26620 }
26621
26622 /* Generate the function's prologue. */
26623
26624 void
26625 thumb1_expand_prologue (void)
26626 {
26627 rtx_insn *insn;
26628
26629 HOST_WIDE_INT amount;
26630 HOST_WIDE_INT size;
26631 arm_stack_offsets *offsets;
26632 unsigned long func_type;
26633 int regno;
26634 unsigned long live_regs_mask;
26635 unsigned long l_mask;
26636 unsigned high_regs_pushed = 0;
26637 bool lr_needs_saving;
26638
26639 func_type = arm_current_func_type ();
26640
26641 /* Naked functions don't have prologues. */
26642 if (IS_NAKED (func_type))
26643 {
26644 if (flag_stack_usage_info)
26645 current_function_static_stack_size = 0;
26646 return;
26647 }
26648
26649 if (IS_INTERRUPT (func_type))
26650 {
26651 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
26652 return;
26653 }
26654
26655 if (is_called_in_ARM_mode (current_function_decl))
26656 emit_insn (gen_prologue_thumb1_interwork ());
26657
26658 offsets = arm_get_frame_offsets ();
26659 live_regs_mask = offsets->saved_regs_mask;
26660 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
26661
26662 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26663 l_mask = live_regs_mask & 0x40ff;
26664 /* Then count how many other high registers will need to be pushed. */
26665 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26666
26667 if (crtl->args.pretend_args_size)
26668 {
26669 rtx x = GEN_INT (-crtl->args.pretend_args_size);
26670
26671 if (cfun->machine->uses_anonymous_args)
26672 {
26673 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26674 unsigned long mask;
26675
26676 mask = 1ul << (LAST_ARG_REGNUM + 1);
26677 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26678
26679 insn = thumb1_emit_multi_reg_push (mask, 0);
26680 }
26681 else
26682 {
26683 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26684 stack_pointer_rtx, x));
26685 }
26686 RTX_FRAME_RELATED_P (insn) = 1;
26687 }
26688
26689 if (TARGET_BACKTRACE)
26690 {
26691 HOST_WIDE_INT offset = 0;
26692 unsigned work_register;
26693 rtx work_reg, x, arm_hfp_rtx;
26694
26695 /* We have been asked to create a stack backtrace structure.
26696 The code looks like this:
26697
26698 0 .align 2
26699 0 func:
26700 0 sub SP, #16 Reserve space for 4 registers.
26701 2 push {R7} Push low registers.
26702 4 add R7, SP, #20 Get the stack pointer before the push.
26703 6 str R7, [SP, #8] Store the stack pointer
26704 (before reserving the space).
26705 8 mov R7, PC Get hold of the start of this code + 12.
26706 10 str R7, [SP, #16] Store it.
26707 12 mov R7, FP Get hold of the current frame pointer.
26708 14 str R7, [SP, #4] Store it.
26709 16 mov R7, LR Get hold of the current return address.
26710 18 str R7, [SP, #12] Store it.
26711 20 add R7, SP, #16 Point at the start of the
26712 backtrace structure.
26713 22 mov FP, R7 Put this value into the frame pointer. */
26714
26715 work_register = thumb_find_work_register (live_regs_mask);
26716 work_reg = gen_rtx_REG (SImode, work_register);
26717 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
26718
26719 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26720 stack_pointer_rtx, GEN_INT (-16)));
26721 RTX_FRAME_RELATED_P (insn) = 1;
26722
26723 if (l_mask)
26724 {
26725 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
26726 RTX_FRAME_RELATED_P (insn) = 1;
26727 lr_needs_saving = false;
26728
26729 offset = bit_count (l_mask) * UNITS_PER_WORD;
26730 }
26731
26732 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
26733 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26734
26735 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
26736 x = gen_frame_mem (SImode, x);
26737 emit_move_insn (x, work_reg);
26738
26739 /* Make sure that the instruction fetching the PC is in the right place
26740 to calculate "start of backtrace creation code + 12". */
26741 /* ??? The stores using the common WORK_REG ought to be enough to
26742 prevent the scheduler from doing anything weird. Failing that
26743 we could always move all of the following into an UNSPEC_VOLATILE. */
26744 if (l_mask)
26745 {
26746 x = gen_rtx_REG (SImode, PC_REGNUM);
26747 emit_move_insn (work_reg, x);
26748
26749 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26750 x = gen_frame_mem (SImode, x);
26751 emit_move_insn (x, work_reg);
26752
26753 emit_move_insn (work_reg, arm_hfp_rtx);
26754
26755 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26756 x = gen_frame_mem (SImode, x);
26757 emit_move_insn (x, work_reg);
26758 }
26759 else
26760 {
26761 emit_move_insn (work_reg, arm_hfp_rtx);
26762
26763 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26764 x = gen_frame_mem (SImode, x);
26765 emit_move_insn (x, work_reg);
26766
26767 x = gen_rtx_REG (SImode, PC_REGNUM);
26768 emit_move_insn (work_reg, x);
26769
26770 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26771 x = gen_frame_mem (SImode, x);
26772 emit_move_insn (x, work_reg);
26773 }
26774
26775 x = gen_rtx_REG (SImode, LR_REGNUM);
26776 emit_move_insn (work_reg, x);
26777
26778 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
26779 x = gen_frame_mem (SImode, x);
26780 emit_move_insn (x, work_reg);
26781
26782 x = GEN_INT (offset + 12);
26783 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26784
26785 emit_move_insn (arm_hfp_rtx, work_reg);
26786 }
26787 /* Optimization: If we are not pushing any low registers but we are going
26788 to push some high registers then delay our first push. This will just
26789 be a push of LR and we can combine it with the push of the first high
26790 register. */
26791 else if ((l_mask & 0xff) != 0
26792 || (high_regs_pushed == 0 && lr_needs_saving))
26793 {
26794 unsigned long mask = l_mask;
26795 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
26796 insn = thumb1_emit_multi_reg_push (mask, mask);
26797 RTX_FRAME_RELATED_P (insn) = 1;
26798 lr_needs_saving = false;
26799 }
26800
26801 if (high_regs_pushed)
26802 {
26803 unsigned pushable_regs;
26804 unsigned next_hi_reg;
26805 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
26806 : crtl->args.info.nregs;
26807 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
26808
26809 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26810 if (live_regs_mask & (1 << next_hi_reg))
26811 break;
26812
26813 /* Here we need to mask out registers used for passing arguments
26814 even if they can be pushed. This is to avoid using them to
26815 stash the high registers. Such kind of stash may clobber the
26816 use of arguments. */
26817 pushable_regs = l_mask & (~arg_regs_mask);
26818 pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
26819
26820 /* Normally, LR can be used as a scratch register once it has been
26821 saved; but if the function examines its own return address then
26822 the value is still live and we need to avoid using it. */
26823 bool return_addr_live
26824 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
26825 LR_REGNUM);
26826
26827 if (lr_needs_saving || return_addr_live)
26828 pushable_regs &= ~(1 << LR_REGNUM);
26829
26830 if (pushable_regs == 0)
26831 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
26832
26833 while (high_regs_pushed > 0)
26834 {
26835 unsigned long real_regs_mask = 0;
26836 unsigned long push_mask = 0;
26837
26838 for (regno = LR_REGNUM; regno >= 0; regno --)
26839 {
26840 if (pushable_regs & (1 << regno))
26841 {
26842 emit_move_insn (gen_rtx_REG (SImode, regno),
26843 gen_rtx_REG (SImode, next_hi_reg));
26844
26845 high_regs_pushed --;
26846 real_regs_mask |= (1 << next_hi_reg);
26847 push_mask |= (1 << regno);
26848
26849 if (high_regs_pushed)
26850 {
26851 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
26852 next_hi_reg --)
26853 if (live_regs_mask & (1 << next_hi_reg))
26854 break;
26855 }
26856 else
26857 break;
26858 }
26859 }
26860
26861 /* If we had to find a work register and we have not yet
26862 saved the LR then add it to the list of regs to push. */
26863 if (lr_needs_saving)
26864 {
26865 push_mask |= 1 << LR_REGNUM;
26866 real_regs_mask |= 1 << LR_REGNUM;
26867 lr_needs_saving = false;
26868 /* If the return address is not live at this point, we
26869 can add LR to the list of registers that we can use
26870 for pushes. */
26871 if (!return_addr_live)
26872 pushable_regs |= 1 << LR_REGNUM;
26873 }
26874
26875 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
26876 RTX_FRAME_RELATED_P (insn) = 1;
26877 }
26878 }
26879
26880 /* Load the pic register before setting the frame pointer,
26881 so we can use r7 as a temporary work register. */
26882 if (flag_pic && arm_pic_register != INVALID_REGNUM)
26883 arm_load_pic_register (live_regs_mask, NULL_RTX);
26884
26885 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
26886 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
26887 stack_pointer_rtx);
26888
26889 size = offsets->outgoing_args - offsets->saved_args;
26890 if (flag_stack_usage_info)
26891 current_function_static_stack_size = size;
26892
26893 /* If we have a frame, then do stack checking. FIXME: not implemented. */
26894 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
26895 || flag_stack_clash_protection)
26896 && size)
26897 sorry ("%<-fstack-check=specific%> for Thumb-1");
26898
26899 amount = offsets->outgoing_args - offsets->saved_regs;
26900 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
26901 if (amount)
26902 {
26903 if (amount < 512)
26904 {
26905 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26906 GEN_INT (- amount)));
26907 RTX_FRAME_RELATED_P (insn) = 1;
26908 }
26909 else
26910 {
26911 rtx reg, dwarf;
26912
26913 /* The stack decrement is too big for an immediate value in a single
26914 insn. In theory we could issue multiple subtracts, but after
26915 three of them it becomes more space efficient to place the full
26916 value in the constant pool and load into a register. (Also the
26917 ARM debugger really likes to see only one stack decrement per
26918 function). So instead we look for a scratch register into which
26919 we can load the decrement, and then we subtract this from the
26920 stack pointer. Unfortunately on the thumb the only available
26921 scratch registers are the argument registers, and we cannot use
26922 these as they may hold arguments to the function. Instead we
26923 attempt to locate a call preserved register which is used by this
26924 function. If we can find one, then we know that it will have
26925 been pushed at the start of the prologue and so we can corrupt
26926 it now. */
26927 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
26928 if (live_regs_mask & (1 << regno))
26929 break;
26930
26931 gcc_assert(regno <= LAST_LO_REGNUM);
26932
26933 reg = gen_rtx_REG (SImode, regno);
26934
26935 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
26936
26937 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26938 stack_pointer_rtx, reg));
26939
26940 dwarf = gen_rtx_SET (stack_pointer_rtx,
26941 plus_constant (Pmode, stack_pointer_rtx,
26942 -amount));
26943 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
26944 RTX_FRAME_RELATED_P (insn) = 1;
26945 }
26946 }
26947
26948 if (frame_pointer_needed)
26949 thumb_set_frame_pointer (offsets);
26950
26951 /* If we are profiling, make sure no instructions are scheduled before
26952 the call to mcount. Similarly if the user has requested no
26953 scheduling in the prolog. Similarly if we want non-call exceptions
26954 using the EABI unwinder, to prevent faulting instructions from being
26955 swapped with a stack adjustment. */
26956 if (crtl->profile || !TARGET_SCHED_PROLOG
26957 || (arm_except_unwind_info (&global_options) == UI_TARGET
26958 && cfun->can_throw_non_call_exceptions))
26959 emit_insn (gen_blockage ());
26960
26961 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
26962 if (live_regs_mask & 0xff)
26963 cfun->machine->lr_save_eliminated = 0;
26964 }
26965
26966 /* Clear caller saved registers not used to pass return values and leaked
26967 condition flags before exiting a cmse_nonsecure_entry function. */
26968
26969 void
26970 cmse_nonsecure_entry_clear_before_return (void)
26971 {
26972 bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
26973 int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
26974 uint32_t padding_bits_to_clear = 0;
26975 auto_sbitmap to_clear_bitmap (maxregno + 1);
26976 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
26977 tree result_type;
26978
26979 bitmap_clear (to_clear_bitmap);
26980 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
26981 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
26982
26983 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
26984 registers. */
26985 if (clear_vfpregs)
26986 {
26987 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
26988
26989 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
26990
26991 if (!TARGET_HAVE_FPCXT_CMSE)
26992 {
26993 /* Make sure we don't clear the two scratch registers used to clear
26994 the relevant FPSCR bits in output_return_instruction. */
26995 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
26996 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
26997 emit_use (gen_rtx_REG (SImode, 4));
26998 bitmap_clear_bit (to_clear_bitmap, 4);
26999 }
27000 }
27001
27002 /* If the user has defined registers to be caller saved, these are no longer
27003 restored by the function before returning and must thus be cleared for
27004 security purposes. */
27005 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27006 {
27007 /* We do not touch registers that can be used to pass arguments as per
27008 the AAPCS, since these should never be made callee-saved by user
27009 options. */
27010 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27011 continue;
27012 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27013 continue;
27014 if (!callee_saved_reg_p (regno)
27015 && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27016 || TARGET_HARD_FLOAT))
27017 bitmap_set_bit (to_clear_bitmap, regno);
27018 }
27019
27020 /* Make sure we do not clear the registers used to return the result in. */
27021 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27022 if (!VOID_TYPE_P (result_type))
27023 {
27024 uint64_t to_clear_return_mask;
27025 result_rtl = arm_function_value (result_type, current_function_decl, 0);
27026
27027 /* No need to check that we return in registers, because we don't
27028 support returning on stack yet. */
27029 gcc_assert (REG_P (result_rtl));
27030 to_clear_return_mask
27031 = compute_not_to_clear_mask (result_type, result_rtl, 0,
27032 &padding_bits_to_clear);
27033 if (to_clear_return_mask)
27034 {
27035 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27036 for (regno = R0_REGNUM; regno <= maxregno; regno++)
27037 {
27038 if (to_clear_return_mask & (1ULL << regno))
27039 bitmap_clear_bit (to_clear_bitmap, regno);
27040 }
27041 }
27042 }
27043
27044 if (padding_bits_to_clear != 0)
27045 {
27046 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27047 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27048
27049 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27050 returning a composite type, which only uses r0. Let's make sure that
27051 r1-r3 is cleared too. */
27052 bitmap_clear (to_clear_arg_regs_bitmap);
27053 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27054 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27055 }
27056
27057 /* Clear full registers that leak before returning. */
27058 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27059 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27060 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27061 clearing_reg);
27062 }
27063
27064 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27065 POP instruction can be generated. LR should be replaced by PC. All
27066 the checks required are already done by USE_RETURN_INSN (). Hence,
27067 all we really need to check here is if single register is to be
27068 returned, or multiple register return. */
27069 void
27070 thumb2_expand_return (bool simple_return)
27071 {
27072 int i, num_regs;
27073 unsigned long saved_regs_mask;
27074 arm_stack_offsets *offsets;
27075
27076 offsets = arm_get_frame_offsets ();
27077 saved_regs_mask = offsets->saved_regs_mask;
27078
27079 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27080 if (saved_regs_mask & (1 << i))
27081 num_regs++;
27082
27083 if (!simple_return && saved_regs_mask)
27084 {
27085 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27086 functions or adapt code to handle according to ACLE. This path should
27087 not be reachable for cmse_nonsecure_entry functions though we prefer
27088 to assert it for now to ensure that future code changes do not silently
27089 change this behavior. */
27090 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27091 if (num_regs == 1)
27092 {
27093 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27094 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27095 rtx addr = gen_rtx_MEM (SImode,
27096 gen_rtx_POST_INC (SImode,
27097 stack_pointer_rtx));
27098 set_mem_alias_set (addr, get_frame_alias_set ());
27099 XVECEXP (par, 0, 0) = ret_rtx;
27100 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27101 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27102 emit_jump_insn (par);
27103 }
27104 else
27105 {
27106 saved_regs_mask &= ~ (1 << LR_REGNUM);
27107 saved_regs_mask |= (1 << PC_REGNUM);
27108 arm_emit_multi_reg_pop (saved_regs_mask);
27109 }
27110 }
27111 else
27112 {
27113 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27114 cmse_nonsecure_entry_clear_before_return ();
27115 emit_jump_insn (simple_return_rtx);
27116 }
27117 }
27118
27119 void
27120 thumb1_expand_epilogue (void)
27121 {
27122 HOST_WIDE_INT amount;
27123 arm_stack_offsets *offsets;
27124 int regno;
27125
27126 /* Naked functions don't have prologues. */
27127 if (IS_NAKED (arm_current_func_type ()))
27128 return;
27129
27130 offsets = arm_get_frame_offsets ();
27131 amount = offsets->outgoing_args - offsets->saved_regs;
27132
27133 if (frame_pointer_needed)
27134 {
27135 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27136 amount = offsets->locals_base - offsets->saved_regs;
27137 }
27138 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27139
27140 gcc_assert (amount >= 0);
27141 if (amount)
27142 {
27143 emit_insn (gen_blockage ());
27144
27145 if (amount < 512)
27146 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27147 GEN_INT (amount)));
27148 else
27149 {
27150 /* r3 is always free in the epilogue. */
27151 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27152
27153 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27154 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27155 }
27156 }
27157
27158 /* Emit a USE (stack_pointer_rtx), so that
27159 the stack adjustment will not be deleted. */
27160 emit_insn (gen_force_register_use (stack_pointer_rtx));
27161
27162 if (crtl->profile || !TARGET_SCHED_PROLOG)
27163 emit_insn (gen_blockage ());
27164
27165 /* Emit a clobber for each insn that will be restored in the epilogue,
27166 so that flow2 will get register lifetimes correct. */
27167 for (regno = 0; regno < 13; regno++)
27168 if (reg_needs_saving_p (regno))
27169 emit_clobber (gen_rtx_REG (SImode, regno));
27170
27171 if (! df_regs_ever_live_p (LR_REGNUM))
27172 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27173
27174 /* Clear all caller-saved regs that are not used to return. */
27175 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27176 cmse_nonsecure_entry_clear_before_return ();
27177 }
27178
27179 /* Epilogue code for APCS frame. */
27180 static void
27181 arm_expand_epilogue_apcs_frame (bool really_return)
27182 {
27183 unsigned long func_type;
27184 unsigned long saved_regs_mask;
27185 int num_regs = 0;
27186 int i;
27187 int floats_from_frame = 0;
27188 arm_stack_offsets *offsets;
27189
27190 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27191 func_type = arm_current_func_type ();
27192
27193 /* Get frame offsets for ARM. */
27194 offsets = arm_get_frame_offsets ();
27195 saved_regs_mask = offsets->saved_regs_mask;
27196
27197 /* Find the offset of the floating-point save area in the frame. */
27198 floats_from_frame
27199 = (offsets->saved_args
27200 + arm_compute_static_chain_stack_bytes ()
27201 - offsets->frame);
27202
27203 /* Compute how many core registers saved and how far away the floats are. */
27204 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27205 if (saved_regs_mask & (1 << i))
27206 {
27207 num_regs++;
27208 floats_from_frame += 4;
27209 }
27210
27211 if (TARGET_VFP_BASE)
27212 {
27213 int start_reg;
27214 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27215
27216 /* The offset is from IP_REGNUM. */
27217 int saved_size = arm_get_vfp_saved_size ();
27218 if (saved_size > 0)
27219 {
27220 rtx_insn *insn;
27221 floats_from_frame += saved_size;
27222 insn = emit_insn (gen_addsi3 (ip_rtx,
27223 hard_frame_pointer_rtx,
27224 GEN_INT (-floats_from_frame)));
27225 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27226 ip_rtx, hard_frame_pointer_rtx);
27227 }
27228
27229 /* Generate VFP register multi-pop. */
27230 start_reg = FIRST_VFP_REGNUM;
27231
27232 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27233 /* Look for a case where a reg does not need restoring. */
27234 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27235 {
27236 if (start_reg != i)
27237 arm_emit_vfp_multi_reg_pop (start_reg,
27238 (i - start_reg) / 2,
27239 gen_rtx_REG (SImode,
27240 IP_REGNUM));
27241 start_reg = i + 2;
27242 }
27243
27244 /* Restore the remaining regs that we have discovered (or possibly
27245 even all of them, if the conditional in the for loop never
27246 fired). */
27247 if (start_reg != i)
27248 arm_emit_vfp_multi_reg_pop (start_reg,
27249 (i - start_reg) / 2,
27250 gen_rtx_REG (SImode, IP_REGNUM));
27251 }
27252
27253 if (TARGET_IWMMXT)
27254 {
27255 /* The frame pointer is guaranteed to be non-double-word aligned, as
27256 it is set to double-word-aligned old_stack_pointer - 4. */
27257 rtx_insn *insn;
27258 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27259
27260 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27261 if (reg_needs_saving_p (i))
27262 {
27263 rtx addr = gen_frame_mem (V2SImode,
27264 plus_constant (Pmode, hard_frame_pointer_rtx,
27265 - lrm_count * 4));
27266 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27267 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27268 gen_rtx_REG (V2SImode, i),
27269 NULL_RTX);
27270 lrm_count += 2;
27271 }
27272 }
27273
27274 /* saved_regs_mask should contain IP which contains old stack pointer
27275 at the time of activation creation. Since SP and IP are adjacent registers,
27276 we can restore the value directly into SP. */
27277 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27278 saved_regs_mask &= ~(1 << IP_REGNUM);
27279 saved_regs_mask |= (1 << SP_REGNUM);
27280
27281 /* There are two registers left in saved_regs_mask - LR and PC. We
27282 only need to restore LR (the return address), but to
27283 save time we can load it directly into PC, unless we need a
27284 special function exit sequence, or we are not really returning. */
27285 if (really_return
27286 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27287 && !crtl->calls_eh_return)
27288 /* Delete LR from the register mask, so that LR on
27289 the stack is loaded into the PC in the register mask. */
27290 saved_regs_mask &= ~(1 << LR_REGNUM);
27291 else
27292 saved_regs_mask &= ~(1 << PC_REGNUM);
27293
27294 num_regs = bit_count (saved_regs_mask);
27295 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27296 {
27297 rtx_insn *insn;
27298 emit_insn (gen_blockage ());
27299 /* Unwind the stack to just below the saved registers. */
27300 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27301 hard_frame_pointer_rtx,
27302 GEN_INT (- 4 * num_regs)));
27303
27304 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27305 stack_pointer_rtx, hard_frame_pointer_rtx);
27306 }
27307
27308 arm_emit_multi_reg_pop (saved_regs_mask);
27309
27310 if (IS_INTERRUPT (func_type))
27311 {
27312 /* Interrupt handlers will have pushed the
27313 IP onto the stack, so restore it now. */
27314 rtx_insn *insn;
27315 rtx addr = gen_rtx_MEM (SImode,
27316 gen_rtx_POST_INC (SImode,
27317 stack_pointer_rtx));
27318 set_mem_alias_set (addr, get_frame_alias_set ());
27319 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27320 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27321 gen_rtx_REG (SImode, IP_REGNUM),
27322 NULL_RTX);
27323 }
27324
27325 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27326 return;
27327
27328 if (crtl->calls_eh_return)
27329 emit_insn (gen_addsi3 (stack_pointer_rtx,
27330 stack_pointer_rtx,
27331 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27332
27333 if (IS_STACKALIGN (func_type))
27334 /* Restore the original stack pointer. Before prologue, the stack was
27335 realigned and the original stack pointer saved in r0. For details,
27336 see comment in arm_expand_prologue. */
27337 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27338
27339 emit_jump_insn (simple_return_rtx);
27340 }
27341
27342 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27343 function is not a sibcall. */
27344 void
27345 arm_expand_epilogue (bool really_return)
27346 {
27347 unsigned long func_type;
27348 unsigned long saved_regs_mask;
27349 int num_regs = 0;
27350 int i;
27351 int amount;
27352 arm_stack_offsets *offsets;
27353
27354 func_type = arm_current_func_type ();
27355
27356 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27357 let output_return_instruction take care of instruction emission if any. */
27358 if (IS_NAKED (func_type)
27359 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27360 {
27361 if (really_return)
27362 emit_jump_insn (simple_return_rtx);
27363 return;
27364 }
27365
27366 /* If we are throwing an exception, then we really must be doing a
27367 return, so we can't tail-call. */
27368 gcc_assert (!crtl->calls_eh_return || really_return);
27369
27370 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27371 {
27372 arm_expand_epilogue_apcs_frame (really_return);
27373 return;
27374 }
27375
27376 /* Get frame offsets for ARM. */
27377 offsets = arm_get_frame_offsets ();
27378 saved_regs_mask = offsets->saved_regs_mask;
27379 num_regs = bit_count (saved_regs_mask);
27380
27381 if (frame_pointer_needed)
27382 {
27383 rtx_insn *insn;
27384 /* Restore stack pointer if necessary. */
27385 if (TARGET_ARM)
27386 {
27387 /* In ARM mode, frame pointer points to first saved register.
27388 Restore stack pointer to last saved register. */
27389 amount = offsets->frame - offsets->saved_regs;
27390
27391 /* Force out any pending memory operations that reference stacked data
27392 before stack de-allocation occurs. */
27393 emit_insn (gen_blockage ());
27394 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27395 hard_frame_pointer_rtx,
27396 GEN_INT (amount)));
27397 arm_add_cfa_adjust_cfa_note (insn, amount,
27398 stack_pointer_rtx,
27399 hard_frame_pointer_rtx);
27400
27401 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27402 deleted. */
27403 emit_insn (gen_force_register_use (stack_pointer_rtx));
27404 }
27405 else
27406 {
27407 /* In Thumb-2 mode, the frame pointer points to the last saved
27408 register. */
27409 amount = offsets->locals_base - offsets->saved_regs;
27410 if (amount)
27411 {
27412 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27413 hard_frame_pointer_rtx,
27414 GEN_INT (amount)));
27415 arm_add_cfa_adjust_cfa_note (insn, amount,
27416 hard_frame_pointer_rtx,
27417 hard_frame_pointer_rtx);
27418 }
27419
27420 /* Force out any pending memory operations that reference stacked data
27421 before stack de-allocation occurs. */
27422 emit_insn (gen_blockage ());
27423 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27424 hard_frame_pointer_rtx));
27425 arm_add_cfa_adjust_cfa_note (insn, 0,
27426 stack_pointer_rtx,
27427 hard_frame_pointer_rtx);
27428 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27429 deleted. */
27430 emit_insn (gen_force_register_use (stack_pointer_rtx));
27431 }
27432 }
27433 else
27434 {
27435 /* Pop off outgoing args and local frame to adjust stack pointer to
27436 last saved register. */
27437 amount = offsets->outgoing_args - offsets->saved_regs;
27438 if (amount)
27439 {
27440 rtx_insn *tmp;
27441 /* Force out any pending memory operations that reference stacked data
27442 before stack de-allocation occurs. */
27443 emit_insn (gen_blockage ());
27444 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27445 stack_pointer_rtx,
27446 GEN_INT (amount)));
27447 arm_add_cfa_adjust_cfa_note (tmp, amount,
27448 stack_pointer_rtx, stack_pointer_rtx);
27449 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27450 not deleted. */
27451 emit_insn (gen_force_register_use (stack_pointer_rtx));
27452 }
27453 }
27454
27455 if (TARGET_VFP_BASE)
27456 {
27457 /* Generate VFP register multi-pop. */
27458 int end_reg = LAST_VFP_REGNUM + 1;
27459
27460 /* Scan the registers in reverse order. We need to match
27461 any groupings made in the prologue and generate matching
27462 vldm operations. The need to match groups is because,
27463 unlike pop, vldm can only do consecutive regs. */
27464 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27465 /* Look for a case where a reg does not need restoring. */
27466 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27467 {
27468 /* Restore the regs discovered so far (from reg+2 to
27469 end_reg). */
27470 if (end_reg > i + 2)
27471 arm_emit_vfp_multi_reg_pop (i + 2,
27472 (end_reg - (i + 2)) / 2,
27473 stack_pointer_rtx);
27474 end_reg = i;
27475 }
27476
27477 /* Restore the remaining regs that we have discovered (or possibly
27478 even all of them, if the conditional in the for loop never
27479 fired). */
27480 if (end_reg > i + 2)
27481 arm_emit_vfp_multi_reg_pop (i + 2,
27482 (end_reg - (i + 2)) / 2,
27483 stack_pointer_rtx);
27484 }
27485
27486 if (TARGET_IWMMXT)
27487 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27488 if (reg_needs_saving_p (i))
27489 {
27490 rtx_insn *insn;
27491 rtx addr = gen_rtx_MEM (V2SImode,
27492 gen_rtx_POST_INC (SImode,
27493 stack_pointer_rtx));
27494 set_mem_alias_set (addr, get_frame_alias_set ());
27495 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27496 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27497 gen_rtx_REG (V2SImode, i),
27498 NULL_RTX);
27499 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27500 stack_pointer_rtx, stack_pointer_rtx);
27501 }
27502
27503 if (saved_regs_mask)
27504 {
27505 rtx insn;
27506 bool return_in_pc = false;
27507
27508 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27509 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27510 && !IS_CMSE_ENTRY (func_type)
27511 && !IS_STACKALIGN (func_type)
27512 && really_return
27513 && crtl->args.pretend_args_size == 0
27514 && saved_regs_mask & (1 << LR_REGNUM)
27515 && !crtl->calls_eh_return)
27516 {
27517 saved_regs_mask &= ~(1 << LR_REGNUM);
27518 saved_regs_mask |= (1 << PC_REGNUM);
27519 return_in_pc = true;
27520 }
27521
27522 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27523 {
27524 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27525 if (saved_regs_mask & (1 << i))
27526 {
27527 rtx addr = gen_rtx_MEM (SImode,
27528 gen_rtx_POST_INC (SImode,
27529 stack_pointer_rtx));
27530 set_mem_alias_set (addr, get_frame_alias_set ());
27531
27532 if (i == PC_REGNUM)
27533 {
27534 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27535 XVECEXP (insn, 0, 0) = ret_rtx;
27536 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
27537 addr);
27538 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27539 insn = emit_jump_insn (insn);
27540 }
27541 else
27542 {
27543 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27544 addr));
27545 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27546 gen_rtx_REG (SImode, i),
27547 NULL_RTX);
27548 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27549 stack_pointer_rtx,
27550 stack_pointer_rtx);
27551 }
27552 }
27553 }
27554 else
27555 {
27556 if (TARGET_LDRD
27557 && current_tune->prefer_ldrd_strd
27558 && !optimize_function_for_size_p (cfun))
27559 {
27560 if (TARGET_THUMB2)
27561 thumb2_emit_ldrd_pop (saved_regs_mask);
27562 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27563 arm_emit_ldrd_pop (saved_regs_mask);
27564 else
27565 arm_emit_multi_reg_pop (saved_regs_mask);
27566 }
27567 else
27568 arm_emit_multi_reg_pop (saved_regs_mask);
27569 }
27570
27571 if (return_in_pc)
27572 return;
27573 }
27574
27575 amount
27576 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
27577 if (amount)
27578 {
27579 int i, j;
27580 rtx dwarf = NULL_RTX;
27581 rtx_insn *tmp =
27582 emit_insn (gen_addsi3 (stack_pointer_rtx,
27583 stack_pointer_rtx,
27584 GEN_INT (amount)));
27585
27586 RTX_FRAME_RELATED_P (tmp) = 1;
27587
27588 if (cfun->machine->uses_anonymous_args)
27589 {
27590 /* Restore pretend args. Refer arm_expand_prologue on how to save
27591 pretend_args in stack. */
27592 int num_regs = crtl->args.pretend_args_size / 4;
27593 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27594 for (j = 0, i = 0; j < num_regs; i++)
27595 if (saved_regs_mask & (1 << i))
27596 {
27597 rtx reg = gen_rtx_REG (SImode, i);
27598 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27599 j++;
27600 }
27601 REG_NOTES (tmp) = dwarf;
27602 }
27603 arm_add_cfa_adjust_cfa_note (tmp, amount,
27604 stack_pointer_rtx, stack_pointer_rtx);
27605 }
27606
27607 if (IS_CMSE_ENTRY (func_type))
27608 {
27609 /* CMSE_ENTRY always returns. */
27610 gcc_assert (really_return);
27611 /* Clear all caller-saved regs that are not used to return. */
27612 cmse_nonsecure_entry_clear_before_return ();
27613
27614 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
27615 VLDR. */
27616 if (TARGET_HAVE_FPCXT_CMSE)
27617 {
27618 rtx_insn *insn;
27619
27620 insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
27621 GEN_INT (FPCXTNS_ENUM)));
27622 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
27623 plus_constant (Pmode, stack_pointer_rtx, 4));
27624 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27625 RTX_FRAME_RELATED_P (insn) = 1;
27626 }
27627 }
27628
27629 if (!really_return)
27630 return;
27631
27632 if (crtl->calls_eh_return)
27633 emit_insn (gen_addsi3 (stack_pointer_rtx,
27634 stack_pointer_rtx,
27635 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27636
27637 if (IS_STACKALIGN (func_type))
27638 /* Restore the original stack pointer. Before prologue, the stack was
27639 realigned and the original stack pointer saved in r0. For details,
27640 see comment in arm_expand_prologue. */
27641 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27642
27643 emit_jump_insn (simple_return_rtx);
27644 }
27645
27646 /* Implementation of insn prologue_thumb1_interwork. This is the first
27647 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27648
27649 const char *
27650 thumb1_output_interwork (void)
27651 {
27652 const char * name;
27653 FILE *f = asm_out_file;
27654
27655 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27656 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27657 == SYMBOL_REF);
27658 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27659
27660 /* Generate code sequence to switch us into Thumb mode. */
27661 /* The .code 32 directive has already been emitted by
27662 ASM_DECLARE_FUNCTION_NAME. */
27663 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27664 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27665
27666 /* Generate a label, so that the debugger will notice the
27667 change in instruction sets. This label is also used by
27668 the assembler to bypass the ARM code when this function
27669 is called from a Thumb encoded function elsewhere in the
27670 same file. Hence the definition of STUB_NAME here must
27671 agree with the definition in gas/config/tc-arm.c. */
27672
27673 #define STUB_NAME ".real_start_of"
27674
27675 fprintf (f, "\t.code\t16\n");
27676 #ifdef ARM_PE
27677 if (arm_dllexport_name_p (name))
27678 name = arm_strip_name_encoding (name);
27679 #endif
27680 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27681 fprintf (f, "\t.thumb_func\n");
27682 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27683
27684 return "";
27685 }
27686
27687 /* Handle the case of a double word load into a low register from
27688 a computed memory address. The computed address may involve a
27689 register which is overwritten by the load. */
27690 const char *
27691 thumb_load_double_from_address (rtx *operands)
27692 {
27693 rtx addr;
27694 rtx base;
27695 rtx offset;
27696 rtx arg1;
27697 rtx arg2;
27698
27699 gcc_assert (REG_P (operands[0]));
27700 gcc_assert (MEM_P (operands[1]));
27701
27702 /* Get the memory address. */
27703 addr = XEXP (operands[1], 0);
27704
27705 /* Work out how the memory address is computed. */
27706 switch (GET_CODE (addr))
27707 {
27708 case REG:
27709 operands[2] = adjust_address (operands[1], SImode, 4);
27710
27711 if (REGNO (operands[0]) == REGNO (addr))
27712 {
27713 output_asm_insn ("ldr\t%H0, %2", operands);
27714 output_asm_insn ("ldr\t%0, %1", operands);
27715 }
27716 else
27717 {
27718 output_asm_insn ("ldr\t%0, %1", operands);
27719 output_asm_insn ("ldr\t%H0, %2", operands);
27720 }
27721 break;
27722
27723 case CONST:
27724 /* Compute <address> + 4 for the high order load. */
27725 operands[2] = adjust_address (operands[1], SImode, 4);
27726
27727 output_asm_insn ("ldr\t%0, %1", operands);
27728 output_asm_insn ("ldr\t%H0, %2", operands);
27729 break;
27730
27731 case PLUS:
27732 arg1 = XEXP (addr, 0);
27733 arg2 = XEXP (addr, 1);
27734
27735 if (CONSTANT_P (arg1))
27736 base = arg2, offset = arg1;
27737 else
27738 base = arg1, offset = arg2;
27739
27740 gcc_assert (REG_P (base));
27741
27742 /* Catch the case of <address> = <reg> + <reg> */
27743 if (REG_P (offset))
27744 {
27745 int reg_offset = REGNO (offset);
27746 int reg_base = REGNO (base);
27747 int reg_dest = REGNO (operands[0]);
27748
27749 /* Add the base and offset registers together into the
27750 higher destination register. */
27751 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
27752 reg_dest + 1, reg_base, reg_offset);
27753
27754 /* Load the lower destination register from the address in
27755 the higher destination register. */
27756 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
27757 reg_dest, reg_dest + 1);
27758
27759 /* Load the higher destination register from its own address
27760 plus 4. */
27761 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
27762 reg_dest + 1, reg_dest + 1);
27763 }
27764 else
27765 {
27766 /* Compute <address> + 4 for the high order load. */
27767 operands[2] = adjust_address (operands[1], SImode, 4);
27768
27769 /* If the computed address is held in the low order register
27770 then load the high order register first, otherwise always
27771 load the low order register first. */
27772 if (REGNO (operands[0]) == REGNO (base))
27773 {
27774 output_asm_insn ("ldr\t%H0, %2", operands);
27775 output_asm_insn ("ldr\t%0, %1", operands);
27776 }
27777 else
27778 {
27779 output_asm_insn ("ldr\t%0, %1", operands);
27780 output_asm_insn ("ldr\t%H0, %2", operands);
27781 }
27782 }
27783 break;
27784
27785 case LABEL_REF:
27786 /* With no registers to worry about we can just load the value
27787 directly. */
27788 operands[2] = adjust_address (operands[1], SImode, 4);
27789
27790 output_asm_insn ("ldr\t%H0, %2", operands);
27791 output_asm_insn ("ldr\t%0, %1", operands);
27792 break;
27793
27794 default:
27795 gcc_unreachable ();
27796 }
27797
27798 return "";
27799 }
27800
27801 const char *
27802 thumb_output_move_mem_multiple (int n, rtx *operands)
27803 {
27804 switch (n)
27805 {
27806 case 2:
27807 if (REGNO (operands[4]) > REGNO (operands[5]))
27808 std::swap (operands[4], operands[5]);
27809
27810 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27811 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27812 break;
27813
27814 case 3:
27815 if (REGNO (operands[4]) > REGNO (operands[5]))
27816 std::swap (operands[4], operands[5]);
27817 if (REGNO (operands[5]) > REGNO (operands[6]))
27818 std::swap (operands[5], operands[6]);
27819 if (REGNO (operands[4]) > REGNO (operands[5]))
27820 std::swap (operands[4], operands[5]);
27821
27822 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27823 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27824 break;
27825
27826 default:
27827 gcc_unreachable ();
27828 }
27829
27830 return "";
27831 }
27832
27833 /* Output a call-via instruction for thumb state. */
27834 const char *
27835 thumb_call_via_reg (rtx reg)
27836 {
27837 int regno = REGNO (reg);
27838 rtx *labelp;
27839
27840 gcc_assert (regno < LR_REGNUM);
27841
27842 /* If we are in the normal text section we can use a single instance
27843 per compilation unit. If we are doing function sections, then we need
27844 an entry per section, since we can't rely on reachability. */
27845 if (in_section == text_section)
27846 {
27847 thumb_call_reg_needed = 1;
27848
27849 if (thumb_call_via_label[regno] == NULL)
27850 thumb_call_via_label[regno] = gen_label_rtx ();
27851 labelp = thumb_call_via_label + regno;
27852 }
27853 else
27854 {
27855 if (cfun->machine->call_via[regno] == NULL)
27856 cfun->machine->call_via[regno] = gen_label_rtx ();
27857 labelp = cfun->machine->call_via + regno;
27858 }
27859
27860 output_asm_insn ("bl\t%a0", labelp);
27861 return "";
27862 }
27863
27864 /* Routines for generating rtl. */
27865 void
27866 thumb_expand_cpymemqi (rtx *operands)
27867 {
27868 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
27869 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
27870 HOST_WIDE_INT len = INTVAL (operands[2]);
27871 HOST_WIDE_INT offset = 0;
27872
27873 while (len >= 12)
27874 {
27875 emit_insn (gen_cpymem12b (out, in, out, in));
27876 len -= 12;
27877 }
27878
27879 if (len >= 8)
27880 {
27881 emit_insn (gen_cpymem8b (out, in, out, in));
27882 len -= 8;
27883 }
27884
27885 if (len >= 4)
27886 {
27887 rtx reg = gen_reg_rtx (SImode);
27888 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
27889 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
27890 len -= 4;
27891 offset += 4;
27892 }
27893
27894 if (len >= 2)
27895 {
27896 rtx reg = gen_reg_rtx (HImode);
27897 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
27898 plus_constant (Pmode, in,
27899 offset))));
27900 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
27901 offset)),
27902 reg));
27903 len -= 2;
27904 offset += 2;
27905 }
27906
27907 if (len)
27908 {
27909 rtx reg = gen_reg_rtx (QImode);
27910 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
27911 plus_constant (Pmode, in,
27912 offset))));
27913 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
27914 offset)),
27915 reg));
27916 }
27917 }
27918
27919 void
27920 thumb_reload_out_hi (rtx *operands)
27921 {
27922 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
27923 }
27924
27925 /* Return the length of a function name prefix
27926 that starts with the character 'c'. */
27927 static int
27928 arm_get_strip_length (int c)
27929 {
27930 switch (c)
27931 {
27932 ARM_NAME_ENCODING_LENGTHS
27933 default: return 0;
27934 }
27935 }
27936
27937 /* Return a pointer to a function's name with any
27938 and all prefix encodings stripped from it. */
27939 const char *
27940 arm_strip_name_encoding (const char *name)
27941 {
27942 int skip;
27943
27944 while ((skip = arm_get_strip_length (* name)))
27945 name += skip;
27946
27947 return name;
27948 }
27949
27950 /* If there is a '*' anywhere in the name's prefix, then
27951 emit the stripped name verbatim, otherwise prepend an
27952 underscore if leading underscores are being used. */
27953 void
27954 arm_asm_output_labelref (FILE *stream, const char *name)
27955 {
27956 int skip;
27957 int verbatim = 0;
27958
27959 while ((skip = arm_get_strip_length (* name)))
27960 {
27961 verbatim |= (*name == '*');
27962 name += skip;
27963 }
27964
27965 if (verbatim)
27966 fputs (name, stream);
27967 else
27968 asm_fprintf (stream, "%U%s", name);
27969 }
27970
27971 /* This function is used to emit an EABI tag and its associated value.
27972 We emit the numerical value of the tag in case the assembler does not
27973 support textual tags. (Eg gas prior to 2.20). If requested we include
27974 the tag name in a comment so that anyone reading the assembler output
27975 will know which tag is being set.
27976
27977 This function is not static because arm-c.c needs it too. */
27978
27979 void
27980 arm_emit_eabi_attribute (const char *name, int num, int val)
27981 {
27982 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
27983 if (flag_verbose_asm || flag_debug_asm)
27984 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
27985 asm_fprintf (asm_out_file, "\n");
27986 }
27987
27988 /* This function is used to print CPU tuning information as comment
27989 in assembler file. Pointers are not printed for now. */
27990
27991 void
27992 arm_print_tune_info (void)
27993 {
27994 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
27995 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
27996 current_tune->constant_limit);
27997 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27998 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
27999 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28000 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28001 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28002 "prefetch.l1_cache_size:\t%d\n",
28003 current_tune->prefetch.l1_cache_size);
28004 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28005 "prefetch.l1_cache_line_size:\t%d\n",
28006 current_tune->prefetch.l1_cache_line_size);
28007 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28008 "prefer_constant_pool:\t%d\n",
28009 (int) current_tune->prefer_constant_pool);
28010 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28011 "branch_cost:\t(s:speed, p:predictable)\n");
28012 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28013 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28014 current_tune->branch_cost (false, false));
28015 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28016 current_tune->branch_cost (false, true));
28017 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28018 current_tune->branch_cost (true, false));
28019 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28020 current_tune->branch_cost (true, true));
28021 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28022 "prefer_ldrd_strd:\t%d\n",
28023 (int) current_tune->prefer_ldrd_strd);
28024 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28025 "logical_op_non_short_circuit:\t[%d,%d]\n",
28026 (int) current_tune->logical_op_non_short_circuit_thumb,
28027 (int) current_tune->logical_op_non_short_circuit_arm);
28028 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28029 "disparage_flag_setting_t16_encodings:\t%d\n",
28030 (int) current_tune->disparage_flag_setting_t16_encodings);
28031 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28032 "string_ops_prefer_neon:\t%d\n",
28033 (int) current_tune->string_ops_prefer_neon);
28034 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28035 "max_insns_inline_memset:\t%d\n",
28036 current_tune->max_insns_inline_memset);
28037 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28038 current_tune->fusible_ops);
28039 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28040 (int) current_tune->sched_autopref);
28041 }
28042
28043 /* Print .arch and .arch_extension directives corresponding to the
28044 current architecture configuration. */
28045 static void
28046 arm_print_asm_arch_directives ()
28047 {
28048 const arch_option *arch
28049 = arm_parse_arch_option_name (all_architectures, "-march",
28050 arm_active_target.arch_name);
28051 auto_sbitmap opt_bits (isa_num_bits);
28052
28053 gcc_assert (arch);
28054
28055 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
28056 arm_last_printed_arch_string = arm_active_target.arch_name;
28057 if (!arch->common.extensions)
28058 return;
28059
28060 for (const struct cpu_arch_extension *opt = arch->common.extensions;
28061 opt->name != NULL;
28062 opt++)
28063 {
28064 if (!opt->remove)
28065 {
28066 arm_initialize_isa (opt_bits, opt->isa_bits);
28067
28068 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28069 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28070 floating point instructions is disabled. So the following check
28071 restricts the printing of ".arch_extension mve" and
28072 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28073 this special behaviour because the feature bit "mve" and
28074 "mve_float" are not part of "fpu bits", so they are not cleared
28075 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28076 TARGET_HAVE_MVE_FLOAT are disabled. */
28077 if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28078 || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28079 && !TARGET_HAVE_MVE_FLOAT))
28080 continue;
28081
28082 /* If every feature bit of this option is set in the target
28083 ISA specification, print out the option name. However,
28084 don't print anything if all the bits are part of the
28085 FPU specification. */
28086 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
28087 && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28088 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
28089 }
28090 }
28091 }
28092
28093 static void
28094 arm_file_start (void)
28095 {
28096 int val;
28097
28098 if (TARGET_BPABI)
28099 {
28100 /* We don't have a specified CPU. Use the architecture to
28101 generate the tags.
28102
28103 Note: it might be better to do this unconditionally, then the
28104 assembler would not need to know about all new CPU names as
28105 they are added. */
28106 if (!arm_active_target.core_name)
28107 {
28108 /* armv7ve doesn't support any extensions. */
28109 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
28110 {
28111 /* Keep backward compatability for assemblers
28112 which don't support armv7ve. */
28113 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28114 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28115 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28116 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28117 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28118 arm_last_printed_arch_string = "armv7ve";
28119 }
28120 else
28121 arm_print_asm_arch_directives ();
28122 }
28123 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
28124 {
28125 asm_fprintf (asm_out_file, "\t.arch %s\n",
28126 arm_active_target.core_name + 8);
28127 arm_last_printed_arch_string = arm_active_target.core_name + 8;
28128 }
28129 else
28130 {
28131 const char* truncated_name
28132 = arm_rewrite_selected_cpu (arm_active_target.core_name);
28133 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28134 asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28135 truncated_name);
28136 else
28137 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28138 }
28139
28140 if (print_tune_info)
28141 arm_print_tune_info ();
28142
28143 if (! TARGET_SOFT_FLOAT)
28144 {
28145 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28146 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28147
28148 if (TARGET_HARD_FLOAT_ABI)
28149 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28150 }
28151
28152 /* Some of these attributes only apply when the corresponding features
28153 are used. However we don't have any easy way of figuring this out.
28154 Conservatively record the setting that would have been used. */
28155
28156 if (flag_rounding_math)
28157 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28158
28159 if (!flag_unsafe_math_optimizations)
28160 {
28161 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28162 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28163 }
28164 if (flag_signaling_nans)
28165 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28166
28167 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28168 flag_finite_math_only ? 1 : 3);
28169
28170 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28171 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28172 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28173 flag_short_enums ? 1 : 2);
28174
28175 /* Tag_ABI_optimization_goals. */
28176 if (optimize_size)
28177 val = 4;
28178 else if (optimize >= 2)
28179 val = 2;
28180 else if (optimize)
28181 val = 1;
28182 else
28183 val = 6;
28184 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28185
28186 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28187 unaligned_access);
28188
28189 if (arm_fp16_format)
28190 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28191 (int) arm_fp16_format);
28192
28193 if (arm_lang_output_object_attributes_hook)
28194 arm_lang_output_object_attributes_hook();
28195 }
28196
28197 default_file_start ();
28198 }
28199
28200 static void
28201 arm_file_end (void)
28202 {
28203 int regno;
28204
28205 if (NEED_INDICATE_EXEC_STACK)
28206 /* Add .note.GNU-stack. */
28207 file_end_indicate_exec_stack ();
28208
28209 if (! thumb_call_reg_needed)
28210 return;
28211
28212 switch_to_section (text_section);
28213 asm_fprintf (asm_out_file, "\t.code 16\n");
28214 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28215
28216 for (regno = 0; regno < LR_REGNUM; regno++)
28217 {
28218 rtx label = thumb_call_via_label[regno];
28219
28220 if (label != 0)
28221 {
28222 targetm.asm_out.internal_label (asm_out_file, "L",
28223 CODE_LABEL_NUMBER (label));
28224 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28225 }
28226 }
28227 }
28228
28229 #ifndef ARM_PE
28230 /* Symbols in the text segment can be accessed without indirecting via the
28231 constant pool; it may take an extra binary operation, but this is still
28232 faster than indirecting via memory. Don't do this when not optimizing,
28233 since we won't be calculating al of the offsets necessary to do this
28234 simplification. */
28235
28236 static void
28237 arm_encode_section_info (tree decl, rtx rtl, int first)
28238 {
28239 if (optimize > 0 && TREE_CONSTANT (decl))
28240 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28241
28242 default_encode_section_info (decl, rtl, first);
28243 }
28244 #endif /* !ARM_PE */
28245
28246 static void
28247 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28248 {
28249 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28250 && !strcmp (prefix, "L"))
28251 {
28252 arm_ccfsm_state = 0;
28253 arm_target_insn = NULL;
28254 }
28255 default_internal_label (stream, prefix, labelno);
28256 }
28257
28258 /* Output code to add DELTA to the first argument, and then jump
28259 to FUNCTION. Used for C++ multiple inheritance. */
28260
28261 static void
28262 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28263 HOST_WIDE_INT, tree function)
28264 {
28265 static int thunk_label = 0;
28266 char label[256];
28267 char labelpc[256];
28268 int mi_delta = delta;
28269 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28270 int shift = 0;
28271 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28272 ? 1 : 0);
28273 if (mi_delta < 0)
28274 mi_delta = - mi_delta;
28275
28276 final_start_function (emit_barrier (), file, 1);
28277
28278 if (TARGET_THUMB1)
28279 {
28280 int labelno = thunk_label++;
28281 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28282 /* Thunks are entered in arm mode when available. */
28283 if (TARGET_THUMB1_ONLY)
28284 {
28285 /* push r3 so we can use it as a temporary. */
28286 /* TODO: Omit this save if r3 is not used. */
28287 fputs ("\tpush {r3}\n", file);
28288
28289 /* With -mpure-code, we cannot load the address from the
28290 constant pool: we build it explicitly. */
28291 if (target_pure_code)
28292 {
28293 fputs ("\tmovs\tr3, #:upper8_15:#", file);
28294 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28295 fputc ('\n', file);
28296 fputs ("\tlsls r3, #8\n", file);
28297 fputs ("\tadds\tr3, #:upper0_7:#", file);
28298 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28299 fputc ('\n', file);
28300 fputs ("\tlsls r3, #8\n", file);
28301 fputs ("\tadds\tr3, #:lower8_15:#", file);
28302 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28303 fputc ('\n', file);
28304 fputs ("\tlsls r3, #8\n", file);
28305 fputs ("\tadds\tr3, #:lower0_7:#", file);
28306 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28307 fputc ('\n', file);
28308 }
28309 else
28310 fputs ("\tldr\tr3, ", file);
28311 }
28312 else
28313 {
28314 fputs ("\tldr\tr12, ", file);
28315 }
28316
28317 if (!target_pure_code)
28318 {
28319 assemble_name (file, label);
28320 fputc ('\n', file);
28321 }
28322
28323 if (flag_pic)
28324 {
28325 /* If we are generating PIC, the ldr instruction below loads
28326 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28327 the address of the add + 8, so we have:
28328
28329 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28330 = target + 1.
28331
28332 Note that we have "+ 1" because some versions of GNU ld
28333 don't set the low bit of the result for R_ARM_REL32
28334 relocations against thumb function symbols.
28335 On ARMv6M this is +4, not +8. */
28336 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28337 assemble_name (file, labelpc);
28338 fputs (":\n", file);
28339 if (TARGET_THUMB1_ONLY)
28340 {
28341 /* This is 2 insns after the start of the thunk, so we know it
28342 is 4-byte aligned. */
28343 fputs ("\tadd\tr3, pc, r3\n", file);
28344 fputs ("\tmov r12, r3\n", file);
28345 }
28346 else
28347 fputs ("\tadd\tr12, pc, r12\n", file);
28348 }
28349 else if (TARGET_THUMB1_ONLY)
28350 fputs ("\tmov r12, r3\n", file);
28351 }
28352 if (TARGET_THUMB1_ONLY)
28353 {
28354 if (mi_delta > 255)
28355 {
28356 fputs ("\tldr\tr3, ", file);
28357 assemble_name (file, label);
28358 fputs ("+4\n", file);
28359 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
28360 mi_op, this_regno, this_regno);
28361 }
28362 else if (mi_delta != 0)
28363 {
28364 /* Thumb1 unified syntax requires s suffix in instruction name when
28365 one of the operands is immediate. */
28366 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
28367 mi_op, this_regno, this_regno,
28368 mi_delta);
28369 }
28370 }
28371 else
28372 {
28373 /* TODO: Use movw/movt for large constants when available. */
28374 while (mi_delta != 0)
28375 {
28376 if ((mi_delta & (3 << shift)) == 0)
28377 shift += 2;
28378 else
28379 {
28380 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28381 mi_op, this_regno, this_regno,
28382 mi_delta & (0xff << shift));
28383 mi_delta &= ~(0xff << shift);
28384 shift += 8;
28385 }
28386 }
28387 }
28388 if (TARGET_THUMB1)
28389 {
28390 if (TARGET_THUMB1_ONLY)
28391 fputs ("\tpop\t{r3}\n", file);
28392
28393 fprintf (file, "\tbx\tr12\n");
28394 ASM_OUTPUT_ALIGN (file, 2);
28395 assemble_name (file, label);
28396 fputs (":\n", file);
28397 if (flag_pic)
28398 {
28399 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28400 rtx tem = XEXP (DECL_RTL (function), 0);
28401 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28402 pipeline offset is four rather than eight. Adjust the offset
28403 accordingly. */
28404 tem = plus_constant (GET_MODE (tem), tem,
28405 TARGET_THUMB1_ONLY ? -3 : -7);
28406 tem = gen_rtx_MINUS (GET_MODE (tem),
28407 tem,
28408 gen_rtx_SYMBOL_REF (Pmode,
28409 ggc_strdup (labelpc)));
28410 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28411 }
28412 else
28413 /* Output ".word .LTHUNKn". */
28414 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28415
28416 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28417 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28418 }
28419 else
28420 {
28421 fputs ("\tb\t", file);
28422 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28423 if (NEED_PLT_RELOC)
28424 fputs ("(PLT)", file);
28425 fputc ('\n', file);
28426 }
28427
28428 final_end_function ();
28429 }
28430
28431 /* MI thunk handling for TARGET_32BIT. */
28432
28433 static void
28434 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28435 HOST_WIDE_INT vcall_offset, tree function)
28436 {
28437 const bool long_call_p = arm_is_long_call_p (function);
28438
28439 /* On ARM, this_regno is R0 or R1 depending on
28440 whether the function returns an aggregate or not.
28441 */
28442 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
28443 function)
28444 ? R1_REGNUM : R0_REGNUM);
28445
28446 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
28447 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
28448 reload_completed = 1;
28449 emit_note (NOTE_INSN_PROLOGUE_END);
28450
28451 /* Add DELTA to THIS_RTX. */
28452 if (delta != 0)
28453 arm_split_constant (PLUS, Pmode, NULL_RTX,
28454 delta, this_rtx, this_rtx, false);
28455
28456 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
28457 if (vcall_offset != 0)
28458 {
28459 /* Load *THIS_RTX. */
28460 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
28461 /* Compute *THIS_RTX + VCALL_OFFSET. */
28462 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
28463 false);
28464 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
28465 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
28466 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
28467 }
28468
28469 /* Generate a tail call to the target function. */
28470 if (!TREE_USED (function))
28471 {
28472 assemble_external (function);
28473 TREE_USED (function) = 1;
28474 }
28475 rtx funexp = XEXP (DECL_RTL (function), 0);
28476 if (long_call_p)
28477 {
28478 emit_move_insn (temp, funexp);
28479 funexp = temp;
28480 }
28481 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
28482 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
28483 SIBLING_CALL_P (insn) = 1;
28484 emit_barrier ();
28485
28486 /* Indirect calls require a bit of fixup in PIC mode. */
28487 if (long_call_p)
28488 {
28489 split_all_insns_noflow ();
28490 arm_reorg ();
28491 }
28492
28493 insn = get_insns ();
28494 shorten_branches (insn);
28495 final_start_function (insn, file, 1);
28496 final (insn, file, 1);
28497 final_end_function ();
28498
28499 /* Stop pretending this is a post-reload pass. */
28500 reload_completed = 0;
28501 }
28502
28503 /* Output code to add DELTA to the first argument, and then jump
28504 to FUNCTION. Used for C++ multiple inheritance. */
28505
28506 static void
28507 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
28508 HOST_WIDE_INT vcall_offset, tree function)
28509 {
28510 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
28511
28512 assemble_start_function (thunk, fnname);
28513 if (TARGET_32BIT)
28514 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
28515 else
28516 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
28517 assemble_end_function (thunk, fnname);
28518 }
28519
28520 int
28521 arm_emit_vector_const (FILE *file, rtx x)
28522 {
28523 int i;
28524 const char * pattern;
28525
28526 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28527
28528 switch (GET_MODE (x))
28529 {
28530 case E_V2SImode: pattern = "%08x"; break;
28531 case E_V4HImode: pattern = "%04x"; break;
28532 case E_V8QImode: pattern = "%02x"; break;
28533 default: gcc_unreachable ();
28534 }
28535
28536 fprintf (file, "0x");
28537 for (i = CONST_VECTOR_NUNITS (x); i--;)
28538 {
28539 rtx element;
28540
28541 element = CONST_VECTOR_ELT (x, i);
28542 fprintf (file, pattern, INTVAL (element));
28543 }
28544
28545 return 1;
28546 }
28547
28548 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28549 HFmode constant pool entries are actually loaded with ldr. */
28550 void
28551 arm_emit_fp16_const (rtx c)
28552 {
28553 long bits;
28554
28555 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
28556 if (WORDS_BIG_ENDIAN)
28557 assemble_zeros (2);
28558 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28559 if (!WORDS_BIG_ENDIAN)
28560 assemble_zeros (2);
28561 }
28562
28563 const char *
28564 arm_output_load_gr (rtx *operands)
28565 {
28566 rtx reg;
28567 rtx offset;
28568 rtx wcgr;
28569 rtx sum;
28570
28571 if (!MEM_P (operands [1])
28572 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28573 || !REG_P (reg = XEXP (sum, 0))
28574 || !CONST_INT_P (offset = XEXP (sum, 1))
28575 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28576 return "wldrw%?\t%0, %1";
28577
28578 /* Fix up an out-of-range load of a GR register. */
28579 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28580 wcgr = operands[0];
28581 operands[0] = reg;
28582 output_asm_insn ("ldr%?\t%0, %1", operands);
28583
28584 operands[0] = wcgr;
28585 operands[1] = reg;
28586 output_asm_insn ("tmcr%?\t%0, %1", operands);
28587 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28588
28589 return "";
28590 }
28591
28592 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28593
28594 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28595 named arg and all anonymous args onto the stack.
28596 XXX I know the prologue shouldn't be pushing registers, but it is faster
28597 that way. */
28598
28599 static void
28600 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28601 const function_arg_info &arg,
28602 int *pretend_size,
28603 int second_time ATTRIBUTE_UNUSED)
28604 {
28605 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28606 int nregs;
28607
28608 cfun->machine->uses_anonymous_args = 1;
28609 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28610 {
28611 nregs = pcum->aapcs_ncrn;
28612 if (nregs & 1)
28613 {
28614 int res = arm_needs_doubleword_align (arg.mode, arg.type);
28615 if (res < 0 && warn_psabi)
28616 inform (input_location, "parameter passing for argument of "
28617 "type %qT changed in GCC 7.1", arg.type);
28618 else if (res > 0)
28619 {
28620 nregs++;
28621 if (res > 1 && warn_psabi)
28622 inform (input_location,
28623 "parameter passing for argument of type "
28624 "%qT changed in GCC 9.1", arg.type);
28625 }
28626 }
28627 }
28628 else
28629 nregs = pcum->nregs;
28630
28631 if (nregs < NUM_ARG_REGS)
28632 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28633 }
28634
28635 /* We can't rely on the caller doing the proper promotion when
28636 using APCS or ATPCS. */
28637
28638 static bool
28639 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28640 {
28641 return !TARGET_AAPCS_BASED;
28642 }
28643
28644 static machine_mode
28645 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28646 machine_mode mode,
28647 int *punsignedp ATTRIBUTE_UNUSED,
28648 const_tree fntype ATTRIBUTE_UNUSED,
28649 int for_return ATTRIBUTE_UNUSED)
28650 {
28651 if (GET_MODE_CLASS (mode) == MODE_INT
28652 && GET_MODE_SIZE (mode) < 4)
28653 return SImode;
28654
28655 return mode;
28656 }
28657
28658
28659 static bool
28660 arm_default_short_enums (void)
28661 {
28662 return ARM_DEFAULT_SHORT_ENUMS;
28663 }
28664
28665
28666 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28667
28668 static bool
28669 arm_align_anon_bitfield (void)
28670 {
28671 return TARGET_AAPCS_BASED;
28672 }
28673
28674
28675 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28676
28677 static tree
28678 arm_cxx_guard_type (void)
28679 {
28680 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28681 }
28682
28683
28684 /* The EABI says test the least significant bit of a guard variable. */
28685
28686 static bool
28687 arm_cxx_guard_mask_bit (void)
28688 {
28689 return TARGET_AAPCS_BASED;
28690 }
28691
28692
28693 /* The EABI specifies that all array cookies are 8 bytes long. */
28694
28695 static tree
28696 arm_get_cookie_size (tree type)
28697 {
28698 tree size;
28699
28700 if (!TARGET_AAPCS_BASED)
28701 return default_cxx_get_cookie_size (type);
28702
28703 size = build_int_cst (sizetype, 8);
28704 return size;
28705 }
28706
28707
28708 /* The EABI says that array cookies should also contain the element size. */
28709
28710 static bool
28711 arm_cookie_has_size (void)
28712 {
28713 return TARGET_AAPCS_BASED;
28714 }
28715
28716
28717 /* The EABI says constructors and destructors should return a pointer to
28718 the object constructed/destroyed. */
28719
28720 static bool
28721 arm_cxx_cdtor_returns_this (void)
28722 {
28723 return TARGET_AAPCS_BASED;
28724 }
28725
28726 /* The EABI says that an inline function may never be the key
28727 method. */
28728
28729 static bool
28730 arm_cxx_key_method_may_be_inline (void)
28731 {
28732 return !TARGET_AAPCS_BASED;
28733 }
28734
28735 static void
28736 arm_cxx_determine_class_data_visibility (tree decl)
28737 {
28738 if (!TARGET_AAPCS_BASED
28739 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28740 return;
28741
28742 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28743 is exported. However, on systems without dynamic vague linkage,
28744 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28745 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28746 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28747 else
28748 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28749 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28750 }
28751
28752 static bool
28753 arm_cxx_class_data_always_comdat (void)
28754 {
28755 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28756 vague linkage if the class has no key function. */
28757 return !TARGET_AAPCS_BASED;
28758 }
28759
28760
28761 /* The EABI says __aeabi_atexit should be used to register static
28762 destructors. */
28763
28764 static bool
28765 arm_cxx_use_aeabi_atexit (void)
28766 {
28767 return TARGET_AAPCS_BASED;
28768 }
28769
28770
28771 void
28772 arm_set_return_address (rtx source, rtx scratch)
28773 {
28774 arm_stack_offsets *offsets;
28775 HOST_WIDE_INT delta;
28776 rtx addr, mem;
28777 unsigned long saved_regs;
28778
28779 offsets = arm_get_frame_offsets ();
28780 saved_regs = offsets->saved_regs_mask;
28781
28782 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28783 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28784 else
28785 {
28786 if (frame_pointer_needed)
28787 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28788 else
28789 {
28790 /* LR will be the first saved register. */
28791 delta = offsets->outgoing_args - (offsets->frame + 4);
28792
28793
28794 if (delta >= 4096)
28795 {
28796 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28797 GEN_INT (delta & ~4095)));
28798 addr = scratch;
28799 delta &= 4095;
28800 }
28801 else
28802 addr = stack_pointer_rtx;
28803
28804 addr = plus_constant (Pmode, addr, delta);
28805 }
28806
28807 /* The store needs to be marked to prevent DSE from deleting
28808 it as dead if it is based on fp. */
28809 mem = gen_frame_mem (Pmode, addr);
28810 MEM_VOLATILE_P (mem) = true;
28811 emit_move_insn (mem, source);
28812 }
28813 }
28814
28815
28816 void
28817 thumb_set_return_address (rtx source, rtx scratch)
28818 {
28819 arm_stack_offsets *offsets;
28820 HOST_WIDE_INT delta;
28821 HOST_WIDE_INT limit;
28822 int reg;
28823 rtx addr, mem;
28824 unsigned long mask;
28825
28826 emit_use (source);
28827
28828 offsets = arm_get_frame_offsets ();
28829 mask = offsets->saved_regs_mask;
28830 if (mask & (1 << LR_REGNUM))
28831 {
28832 limit = 1024;
28833 /* Find the saved regs. */
28834 if (frame_pointer_needed)
28835 {
28836 delta = offsets->soft_frame - offsets->saved_args;
28837 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28838 if (TARGET_THUMB1)
28839 limit = 128;
28840 }
28841 else
28842 {
28843 delta = offsets->outgoing_args - offsets->saved_args;
28844 reg = SP_REGNUM;
28845 }
28846 /* Allow for the stack frame. */
28847 if (TARGET_THUMB1 && TARGET_BACKTRACE)
28848 delta -= 16;
28849 /* The link register is always the first saved register. */
28850 delta -= 4;
28851
28852 /* Construct the address. */
28853 addr = gen_rtx_REG (SImode, reg);
28854 if (delta > limit)
28855 {
28856 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28857 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28858 addr = scratch;
28859 }
28860 else
28861 addr = plus_constant (Pmode, addr, delta);
28862
28863 /* The store needs to be marked to prevent DSE from deleting
28864 it as dead if it is based on fp. */
28865 mem = gen_frame_mem (Pmode, addr);
28866 MEM_VOLATILE_P (mem) = true;
28867 emit_move_insn (mem, source);
28868 }
28869 else
28870 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28871 }
28872
28873 /* Implements target hook vector_mode_supported_p. */
28874 bool
28875 arm_vector_mode_supported_p (machine_mode mode)
28876 {
28877 /* Neon also supports V2SImode, etc. listed in the clause below. */
28878 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28879 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
28880 || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
28881 || mode == V8BFmode))
28882 return true;
28883
28884 if ((TARGET_NEON || TARGET_IWMMXT)
28885 && ((mode == V2SImode)
28886 || (mode == V4HImode)
28887 || (mode == V8QImode)))
28888 return true;
28889
28890 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28891 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28892 || mode == V2HAmode))
28893 return true;
28894
28895 if (TARGET_HAVE_MVE
28896 && (mode == V2DImode || mode == V4SImode || mode == V8HImode
28897 || mode == V16QImode))
28898 return true;
28899
28900 if (TARGET_HAVE_MVE_FLOAT
28901 && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
28902 return true;
28903
28904 return false;
28905 }
28906
28907 /* Implements target hook array_mode_supported_p. */
28908
28909 static bool
28910 arm_array_mode_supported_p (machine_mode mode,
28911 unsigned HOST_WIDE_INT nelems)
28912 {
28913 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
28914 for now, as the lane-swapping logic needs to be extended in the expanders.
28915 See PR target/82518. */
28916 if (TARGET_NEON && !BYTES_BIG_ENDIAN
28917 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28918 && (nelems >= 2 && nelems <= 4))
28919 return true;
28920
28921 if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
28922 && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
28923 return true;
28924
28925 return false;
28926 }
28927
28928 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28929 registers when autovectorizing for Neon, at least until multiple vector
28930 widths are supported properly by the middle-end. */
28931
28932 static machine_mode
28933 arm_preferred_simd_mode (scalar_mode mode)
28934 {
28935 if (TARGET_NEON)
28936 switch (mode)
28937 {
28938 case E_SFmode:
28939 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
28940 case E_SImode:
28941 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
28942 case E_HImode:
28943 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
28944 case E_QImode:
28945 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
28946 case E_DImode:
28947 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28948 return V2DImode;
28949 break;
28950
28951 default:;
28952 }
28953
28954 if (TARGET_REALLY_IWMMXT)
28955 switch (mode)
28956 {
28957 case E_SImode:
28958 return V2SImode;
28959 case E_HImode:
28960 return V4HImode;
28961 case E_QImode:
28962 return V8QImode;
28963
28964 default:;
28965 }
28966
28967 if (TARGET_HAVE_MVE)
28968 switch (mode)
28969 {
28970 case QImode:
28971 return V16QImode;
28972 case HImode:
28973 return V8HImode;
28974 case SImode:
28975 return V4SImode;
28976
28977 default:;
28978 }
28979
28980 if (TARGET_HAVE_MVE_FLOAT)
28981 switch (mode)
28982 {
28983 case HFmode:
28984 return V8HFmode;
28985 case SFmode:
28986 return V4SFmode;
28987
28988 default:;
28989 }
28990
28991 return word_mode;
28992 }
28993
28994 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28995
28996 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28997 using r0-r4 for function arguments, r7 for the stack frame and don't have
28998 enough left over to do doubleword arithmetic. For Thumb-2 all the
28999 potentially problematic instructions accept high registers so this is not
29000 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29001 that require many low registers. */
29002 static bool
29003 arm_class_likely_spilled_p (reg_class_t rclass)
29004 {
29005 if ((TARGET_THUMB1 && rclass == LO_REGS)
29006 || rclass == CC_REG)
29007 return true;
29008
29009 return false;
29010 }
29011
29012 /* Implements target hook small_register_classes_for_mode_p. */
29013 bool
29014 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29015 {
29016 return TARGET_THUMB1;
29017 }
29018
29019 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29020 ARM insns and therefore guarantee that the shift count is modulo 256.
29021 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29022 guarantee no particular behavior for out-of-range counts. */
29023
29024 static unsigned HOST_WIDE_INT
29025 arm_shift_truncation_mask (machine_mode mode)
29026 {
29027 return mode == SImode ? 255 : 0;
29028 }
29029
29030
29031 /* Map internal gcc register numbers to DWARF2 register numbers. */
29032
29033 unsigned int
29034 arm_dbx_register_number (unsigned int regno)
29035 {
29036 if (regno < 16)
29037 return regno;
29038
29039 if (IS_VFP_REGNUM (regno))
29040 {
29041 /* See comment in arm_dwarf_register_span. */
29042 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29043 return 64 + regno - FIRST_VFP_REGNUM;
29044 else
29045 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29046 }
29047
29048 if (IS_IWMMXT_GR_REGNUM (regno))
29049 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29050
29051 if (IS_IWMMXT_REGNUM (regno))
29052 return 112 + regno - FIRST_IWMMXT_REGNUM;
29053
29054 return DWARF_FRAME_REGISTERS;
29055 }
29056
29057 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29058 GCC models tham as 64 32-bit registers, so we need to describe this to
29059 the DWARF generation code. Other registers can use the default. */
29060 static rtx
29061 arm_dwarf_register_span (rtx rtl)
29062 {
29063 machine_mode mode;
29064 unsigned regno;
29065 rtx parts[16];
29066 int nregs;
29067 int i;
29068
29069 regno = REGNO (rtl);
29070 if (!IS_VFP_REGNUM (regno))
29071 return NULL_RTX;
29072
29073 /* XXX FIXME: The EABI defines two VFP register ranges:
29074 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29075 256-287: D0-D31
29076 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29077 corresponding D register. Until GDB supports this, we shall use the
29078 legacy encodings. We also use these encodings for D0-D15 for
29079 compatibility with older debuggers. */
29080 mode = GET_MODE (rtl);
29081 if (GET_MODE_SIZE (mode) < 8)
29082 return NULL_RTX;
29083
29084 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29085 {
29086 nregs = GET_MODE_SIZE (mode) / 4;
29087 for (i = 0; i < nregs; i += 2)
29088 if (TARGET_BIG_END)
29089 {
29090 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29091 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29092 }
29093 else
29094 {
29095 parts[i] = gen_rtx_REG (SImode, regno + i);
29096 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29097 }
29098 }
29099 else
29100 {
29101 nregs = GET_MODE_SIZE (mode) / 8;
29102 for (i = 0; i < nregs; i++)
29103 parts[i] = gen_rtx_REG (DImode, regno + i);
29104 }
29105
29106 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29107 }
29108
29109 #if ARM_UNWIND_INFO
29110 /* Emit unwind directives for a store-multiple instruction or stack pointer
29111 push during alignment.
29112 These should only ever be generated by the function prologue code, so
29113 expect them to have a particular form.
29114 The store-multiple instruction sometimes pushes pc as the last register,
29115 although it should not be tracked into unwind information, or for -Os
29116 sometimes pushes some dummy registers before first register that needs
29117 to be tracked in unwind information; such dummy registers are there just
29118 to avoid separate stack adjustment, and will not be restored in the
29119 epilogue. */
29120
29121 static void
29122 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29123 {
29124 int i;
29125 HOST_WIDE_INT offset;
29126 HOST_WIDE_INT nregs;
29127 int reg_size;
29128 unsigned reg;
29129 unsigned lastreg;
29130 unsigned padfirst = 0, padlast = 0;
29131 rtx e;
29132
29133 e = XVECEXP (p, 0, 0);
29134 gcc_assert (GET_CODE (e) == SET);
29135
29136 /* First insn will adjust the stack pointer. */
29137 gcc_assert (GET_CODE (e) == SET
29138 && REG_P (SET_DEST (e))
29139 && REGNO (SET_DEST (e)) == SP_REGNUM
29140 && GET_CODE (SET_SRC (e)) == PLUS);
29141
29142 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29143 nregs = XVECLEN (p, 0) - 1;
29144 gcc_assert (nregs);
29145
29146 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29147 if (reg < 16)
29148 {
29149 /* For -Os dummy registers can be pushed at the beginning to
29150 avoid separate stack pointer adjustment. */
29151 e = XVECEXP (p, 0, 1);
29152 e = XEXP (SET_DEST (e), 0);
29153 if (GET_CODE (e) == PLUS)
29154 padfirst = INTVAL (XEXP (e, 1));
29155 gcc_assert (padfirst == 0 || optimize_size);
29156 /* The function prologue may also push pc, but not annotate it as it is
29157 never restored. We turn this into a stack pointer adjustment. */
29158 e = XVECEXP (p, 0, nregs);
29159 e = XEXP (SET_DEST (e), 0);
29160 if (GET_CODE (e) == PLUS)
29161 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29162 else
29163 padlast = offset - 4;
29164 gcc_assert (padlast == 0 || padlast == 4);
29165 if (padlast == 4)
29166 fprintf (asm_out_file, "\t.pad #4\n");
29167 reg_size = 4;
29168 fprintf (asm_out_file, "\t.save {");
29169 }
29170 else if (IS_VFP_REGNUM (reg))
29171 {
29172 reg_size = 8;
29173 fprintf (asm_out_file, "\t.vsave {");
29174 }
29175 else
29176 /* Unknown register type. */
29177 gcc_unreachable ();
29178
29179 /* If the stack increment doesn't match the size of the saved registers,
29180 something has gone horribly wrong. */
29181 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29182
29183 offset = padfirst;
29184 lastreg = 0;
29185 /* The remaining insns will describe the stores. */
29186 for (i = 1; i <= nregs; i++)
29187 {
29188 /* Expect (set (mem <addr>) (reg)).
29189 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29190 e = XVECEXP (p, 0, i);
29191 gcc_assert (GET_CODE (e) == SET
29192 && MEM_P (SET_DEST (e))
29193 && REG_P (SET_SRC (e)));
29194
29195 reg = REGNO (SET_SRC (e));
29196 gcc_assert (reg >= lastreg);
29197
29198 if (i != 1)
29199 fprintf (asm_out_file, ", ");
29200 /* We can't use %r for vfp because we need to use the
29201 double precision register names. */
29202 if (IS_VFP_REGNUM (reg))
29203 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29204 else
29205 asm_fprintf (asm_out_file, "%r", reg);
29206
29207 if (flag_checking)
29208 {
29209 /* Check that the addresses are consecutive. */
29210 e = XEXP (SET_DEST (e), 0);
29211 if (GET_CODE (e) == PLUS)
29212 gcc_assert (REG_P (XEXP (e, 0))
29213 && REGNO (XEXP (e, 0)) == SP_REGNUM
29214 && CONST_INT_P (XEXP (e, 1))
29215 && offset == INTVAL (XEXP (e, 1)));
29216 else
29217 gcc_assert (i == 1
29218 && REG_P (e)
29219 && REGNO (e) == SP_REGNUM);
29220 offset += reg_size;
29221 }
29222 }
29223 fprintf (asm_out_file, "}\n");
29224 if (padfirst)
29225 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29226 }
29227
29228 /* Emit unwind directives for a SET. */
29229
29230 static void
29231 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29232 {
29233 rtx e0;
29234 rtx e1;
29235 unsigned reg;
29236
29237 e0 = XEXP (p, 0);
29238 e1 = XEXP (p, 1);
29239 switch (GET_CODE (e0))
29240 {
29241 case MEM:
29242 /* Pushing a single register. */
29243 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29244 || !REG_P (XEXP (XEXP (e0, 0), 0))
29245 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29246 abort ();
29247
29248 asm_fprintf (asm_out_file, "\t.save ");
29249 if (IS_VFP_REGNUM (REGNO (e1)))
29250 asm_fprintf(asm_out_file, "{d%d}\n",
29251 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29252 else
29253 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29254 break;
29255
29256 case REG:
29257 if (REGNO (e0) == SP_REGNUM)
29258 {
29259 /* A stack increment. */
29260 if (GET_CODE (e1) != PLUS
29261 || !REG_P (XEXP (e1, 0))
29262 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29263 || !CONST_INT_P (XEXP (e1, 1)))
29264 abort ();
29265
29266 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29267 -INTVAL (XEXP (e1, 1)));
29268 }
29269 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29270 {
29271 HOST_WIDE_INT offset;
29272
29273 if (GET_CODE (e1) == PLUS)
29274 {
29275 if (!REG_P (XEXP (e1, 0))
29276 || !CONST_INT_P (XEXP (e1, 1)))
29277 abort ();
29278 reg = REGNO (XEXP (e1, 0));
29279 offset = INTVAL (XEXP (e1, 1));
29280 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29281 HARD_FRAME_POINTER_REGNUM, reg,
29282 offset);
29283 }
29284 else if (REG_P (e1))
29285 {
29286 reg = REGNO (e1);
29287 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29288 HARD_FRAME_POINTER_REGNUM, reg);
29289 }
29290 else
29291 abort ();
29292 }
29293 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29294 {
29295 /* Move from sp to reg. */
29296 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29297 }
29298 else if (GET_CODE (e1) == PLUS
29299 && REG_P (XEXP (e1, 0))
29300 && REGNO (XEXP (e1, 0)) == SP_REGNUM
29301 && CONST_INT_P (XEXP (e1, 1)))
29302 {
29303 /* Set reg to offset from sp. */
29304 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29305 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29306 }
29307 else
29308 abort ();
29309 break;
29310
29311 default:
29312 abort ();
29313 }
29314 }
29315
29316
29317 /* Emit unwind directives for the given insn. */
29318
29319 static void
29320 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
29321 {
29322 rtx note, pat;
29323 bool handled_one = false;
29324
29325 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29326 return;
29327
29328 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29329 && (TREE_NOTHROW (current_function_decl)
29330 || crtl->all_throwers_are_sibcalls))
29331 return;
29332
29333 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29334 return;
29335
29336 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29337 {
29338 switch (REG_NOTE_KIND (note))
29339 {
29340 case REG_FRAME_RELATED_EXPR:
29341 pat = XEXP (note, 0);
29342 goto found;
29343
29344 case REG_CFA_REGISTER:
29345 pat = XEXP (note, 0);
29346 if (pat == NULL)
29347 {
29348 pat = PATTERN (insn);
29349 if (GET_CODE (pat) == PARALLEL)
29350 pat = XVECEXP (pat, 0, 0);
29351 }
29352
29353 /* Only emitted for IS_STACKALIGN re-alignment. */
29354 {
29355 rtx dest, src;
29356 unsigned reg;
29357
29358 src = SET_SRC (pat);
29359 dest = SET_DEST (pat);
29360
29361 gcc_assert (src == stack_pointer_rtx);
29362 reg = REGNO (dest);
29363 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29364 reg + 0x90, reg);
29365 }
29366 handled_one = true;
29367 break;
29368
29369 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29370 to get correct dwarf information for shrink-wrap. We should not
29371 emit unwind information for it because these are used either for
29372 pretend arguments or notes to adjust sp and restore registers from
29373 stack. */
29374 case REG_CFA_DEF_CFA:
29375 case REG_CFA_ADJUST_CFA:
29376 case REG_CFA_RESTORE:
29377 return;
29378
29379 case REG_CFA_EXPRESSION:
29380 case REG_CFA_OFFSET:
29381 /* ??? Only handling here what we actually emit. */
29382 gcc_unreachable ();
29383
29384 default:
29385 break;
29386 }
29387 }
29388 if (handled_one)
29389 return;
29390 pat = PATTERN (insn);
29391 found:
29392
29393 switch (GET_CODE (pat))
29394 {
29395 case SET:
29396 arm_unwind_emit_set (asm_out_file, pat);
29397 break;
29398
29399 case SEQUENCE:
29400 /* Store multiple. */
29401 arm_unwind_emit_sequence (asm_out_file, pat);
29402 break;
29403
29404 default:
29405 abort();
29406 }
29407 }
29408
29409
29410 /* Output a reference from a function exception table to the type_info
29411 object X. The EABI specifies that the symbol should be relocated by
29412 an R_ARM_TARGET2 relocation. */
29413
29414 static bool
29415 arm_output_ttype (rtx x)
29416 {
29417 fputs ("\t.word\t", asm_out_file);
29418 output_addr_const (asm_out_file, x);
29419 /* Use special relocations for symbol references. */
29420 if (!CONST_INT_P (x))
29421 fputs ("(TARGET2)", asm_out_file);
29422 fputc ('\n', asm_out_file);
29423
29424 return TRUE;
29425 }
29426
29427 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29428
29429 static void
29430 arm_asm_emit_except_personality (rtx personality)
29431 {
29432 fputs ("\t.personality\t", asm_out_file);
29433 output_addr_const (asm_out_file, personality);
29434 fputc ('\n', asm_out_file);
29435 }
29436 #endif /* ARM_UNWIND_INFO */
29437
29438 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29439
29440 static void
29441 arm_asm_init_sections (void)
29442 {
29443 #if ARM_UNWIND_INFO
29444 exception_section = get_unnamed_section (0, output_section_asm_op,
29445 "\t.handlerdata");
29446 #endif /* ARM_UNWIND_INFO */
29447
29448 #ifdef OBJECT_FORMAT_ELF
29449 if (target_pure_code)
29450 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
29451 #endif
29452 }
29453
29454 /* Output unwind directives for the start/end of a function. */
29455
29456 void
29457 arm_output_fn_unwind (FILE * f, bool prologue)
29458 {
29459 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29460 return;
29461
29462 if (prologue)
29463 fputs ("\t.fnstart\n", f);
29464 else
29465 {
29466 /* If this function will never be unwound, then mark it as such.
29467 The came condition is used in arm_unwind_emit to suppress
29468 the frame annotations. */
29469 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29470 && (TREE_NOTHROW (current_function_decl)
29471 || crtl->all_throwers_are_sibcalls))
29472 fputs("\t.cantunwind\n", f);
29473
29474 fputs ("\t.fnend\n", f);
29475 }
29476 }
29477
29478 static bool
29479 arm_emit_tls_decoration (FILE *fp, rtx x)
29480 {
29481 enum tls_reloc reloc;
29482 rtx val;
29483
29484 val = XVECEXP (x, 0, 0);
29485 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29486
29487 output_addr_const (fp, val);
29488
29489 switch (reloc)
29490 {
29491 case TLS_GD32:
29492 fputs ("(tlsgd)", fp);
29493 break;
29494 case TLS_GD32_FDPIC:
29495 fputs ("(tlsgd_fdpic)", fp);
29496 break;
29497 case TLS_LDM32:
29498 fputs ("(tlsldm)", fp);
29499 break;
29500 case TLS_LDM32_FDPIC:
29501 fputs ("(tlsldm_fdpic)", fp);
29502 break;
29503 case TLS_LDO32:
29504 fputs ("(tlsldo)", fp);
29505 break;
29506 case TLS_IE32:
29507 fputs ("(gottpoff)", fp);
29508 break;
29509 case TLS_IE32_FDPIC:
29510 fputs ("(gottpoff_fdpic)", fp);
29511 break;
29512 case TLS_LE32:
29513 fputs ("(tpoff)", fp);
29514 break;
29515 case TLS_DESCSEQ:
29516 fputs ("(tlsdesc)", fp);
29517 break;
29518 default:
29519 gcc_unreachable ();
29520 }
29521
29522 switch (reloc)
29523 {
29524 case TLS_GD32:
29525 case TLS_LDM32:
29526 case TLS_IE32:
29527 case TLS_DESCSEQ:
29528 fputs (" + (. - ", fp);
29529 output_addr_const (fp, XVECEXP (x, 0, 2));
29530 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29531 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29532 output_addr_const (fp, XVECEXP (x, 0, 3));
29533 fputc (')', fp);
29534 break;
29535 default:
29536 break;
29537 }
29538
29539 return TRUE;
29540 }
29541
29542 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29543
29544 static void
29545 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29546 {
29547 gcc_assert (size == 4);
29548 fputs ("\t.word\t", file);
29549 output_addr_const (file, x);
29550 fputs ("(tlsldo)", file);
29551 }
29552
29553 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29554
29555 static bool
29556 arm_output_addr_const_extra (FILE *fp, rtx x)
29557 {
29558 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29559 return arm_emit_tls_decoration (fp, x);
29560 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29561 {
29562 char label[256];
29563 int labelno = INTVAL (XVECEXP (x, 0, 0));
29564
29565 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29566 assemble_name_raw (fp, label);
29567
29568 return TRUE;
29569 }
29570 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29571 {
29572 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29573 if (GOT_PCREL)
29574 fputs ("+.", fp);
29575 fputs ("-(", fp);
29576 output_addr_const (fp, XVECEXP (x, 0, 0));
29577 fputc (')', fp);
29578 return TRUE;
29579 }
29580 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29581 {
29582 output_addr_const (fp, XVECEXP (x, 0, 0));
29583 if (GOT_PCREL)
29584 fputs ("+.", fp);
29585 fputs ("-(", fp);
29586 output_addr_const (fp, XVECEXP (x, 0, 1));
29587 fputc (')', fp);
29588 return TRUE;
29589 }
29590 else if (GET_CODE (x) == CONST_VECTOR)
29591 return arm_emit_vector_const (fp, x);
29592
29593 return FALSE;
29594 }
29595
29596 /* Output assembly for a shift instruction.
29597 SET_FLAGS determines how the instruction modifies the condition codes.
29598 0 - Do not set condition codes.
29599 1 - Set condition codes.
29600 2 - Use smallest instruction. */
29601 const char *
29602 arm_output_shift(rtx * operands, int set_flags)
29603 {
29604 char pattern[100];
29605 static const char flag_chars[3] = {'?', '.', '!'};
29606 const char *shift;
29607 HOST_WIDE_INT val;
29608 char c;
29609
29610 c = flag_chars[set_flags];
29611 shift = shift_op(operands[3], &val);
29612 if (shift)
29613 {
29614 if (val != -1)
29615 operands[2] = GEN_INT(val);
29616 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29617 }
29618 else
29619 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29620
29621 output_asm_insn (pattern, operands);
29622 return "";
29623 }
29624
29625 /* Output assembly for a WMMX immediate shift instruction. */
29626 const char *
29627 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29628 {
29629 int shift = INTVAL (operands[2]);
29630 char templ[50];
29631 machine_mode opmode = GET_MODE (operands[0]);
29632
29633 gcc_assert (shift >= 0);
29634
29635 /* If the shift value in the register versions is > 63 (for D qualifier),
29636 31 (for W qualifier) or 15 (for H qualifier). */
29637 if (((opmode == V4HImode) && (shift > 15))
29638 || ((opmode == V2SImode) && (shift > 31))
29639 || ((opmode == DImode) && (shift > 63)))
29640 {
29641 if (wror_or_wsra)
29642 {
29643 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29644 output_asm_insn (templ, operands);
29645 if (opmode == DImode)
29646 {
29647 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29648 output_asm_insn (templ, operands);
29649 }
29650 }
29651 else
29652 {
29653 /* The destination register will contain all zeros. */
29654 sprintf (templ, "wzero\t%%0");
29655 output_asm_insn (templ, operands);
29656 }
29657 return "";
29658 }
29659
29660 if ((opmode == DImode) && (shift > 32))
29661 {
29662 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29663 output_asm_insn (templ, operands);
29664 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29665 output_asm_insn (templ, operands);
29666 }
29667 else
29668 {
29669 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29670 output_asm_insn (templ, operands);
29671 }
29672 return "";
29673 }
29674
29675 /* Output assembly for a WMMX tinsr instruction. */
29676 const char *
29677 arm_output_iwmmxt_tinsr (rtx *operands)
29678 {
29679 int mask = INTVAL (operands[3]);
29680 int i;
29681 char templ[50];
29682 int units = mode_nunits[GET_MODE (operands[0])];
29683 gcc_assert ((mask & (mask - 1)) == 0);
29684 for (i = 0; i < units; ++i)
29685 {
29686 if ((mask & 0x01) == 1)
29687 {
29688 break;
29689 }
29690 mask >>= 1;
29691 }
29692 gcc_assert (i < units);
29693 {
29694 switch (GET_MODE (operands[0]))
29695 {
29696 case E_V8QImode:
29697 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29698 break;
29699 case E_V4HImode:
29700 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29701 break;
29702 case E_V2SImode:
29703 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29704 break;
29705 default:
29706 gcc_unreachable ();
29707 break;
29708 }
29709 output_asm_insn (templ, operands);
29710 }
29711 return "";
29712 }
29713
29714 /* Output a Thumb-1 casesi dispatch sequence. */
29715 const char *
29716 thumb1_output_casesi (rtx *operands)
29717 {
29718 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
29719
29720 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29721
29722 switch (GET_MODE(diff_vec))
29723 {
29724 case E_QImode:
29725 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29726 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29727 case E_HImode:
29728 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29729 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29730 case E_SImode:
29731 return "bl\t%___gnu_thumb1_case_si";
29732 default:
29733 gcc_unreachable ();
29734 }
29735 }
29736
29737 /* Output a Thumb-2 casesi instruction. */
29738 const char *
29739 thumb2_output_casesi (rtx *operands)
29740 {
29741 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
29742
29743 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29744
29745 output_asm_insn ("cmp\t%0, %1", operands);
29746 output_asm_insn ("bhi\t%l3", operands);
29747 switch (GET_MODE(diff_vec))
29748 {
29749 case E_QImode:
29750 return "tbb\t[%|pc, %0]";
29751 case E_HImode:
29752 return "tbh\t[%|pc, %0, lsl #1]";
29753 case E_SImode:
29754 if (flag_pic)
29755 {
29756 output_asm_insn ("adr\t%4, %l2", operands);
29757 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29758 output_asm_insn ("add\t%4, %4, %5", operands);
29759 return "bx\t%4";
29760 }
29761 else
29762 {
29763 output_asm_insn ("adr\t%4, %l2", operands);
29764 return "ldr\t%|pc, [%4, %0, lsl #2]";
29765 }
29766 default:
29767 gcc_unreachable ();
29768 }
29769 }
29770
29771 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
29772 per-core tuning structs. */
29773 static int
29774 arm_issue_rate (void)
29775 {
29776 return current_tune->issue_rate;
29777 }
29778
29779 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
29780 static int
29781 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
29782 {
29783 if (DEBUG_INSN_P (insn))
29784 return more;
29785
29786 rtx_code code = GET_CODE (PATTERN (insn));
29787 if (code == USE || code == CLOBBER)
29788 return more;
29789
29790 if (get_attr_type (insn) == TYPE_NO_INSN)
29791 return more;
29792
29793 return more - 1;
29794 }
29795
29796 /* Return how many instructions should scheduler lookahead to choose the
29797 best one. */
29798 static int
29799 arm_first_cycle_multipass_dfa_lookahead (void)
29800 {
29801 int issue_rate = arm_issue_rate ();
29802
29803 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
29804 }
29805
29806 /* Enable modeling of L2 auto-prefetcher. */
29807 static int
29808 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
29809 {
29810 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
29811 }
29812
29813 const char *
29814 arm_mangle_type (const_tree type)
29815 {
29816 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29817 has to be managled as if it is in the "std" namespace. */
29818 if (TARGET_AAPCS_BASED
29819 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29820 return "St9__va_list";
29821
29822 /* Half-precision floating point types. */
29823 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29824 {
29825 if (TYPE_MODE (type) == BFmode)
29826 return "u6__bf16";
29827 else
29828 return "Dh";
29829 }
29830
29831 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
29832 builtin type. */
29833 if (TYPE_NAME (type) != NULL)
29834 return arm_mangle_builtin_type (type);
29835
29836 /* Use the default mangling. */
29837 return NULL;
29838 }
29839
29840 /* Order of allocation of core registers for Thumb: this allocation is
29841 written over the corresponding initial entries of the array
29842 initialized with REG_ALLOC_ORDER. We allocate all low registers
29843 first. Saving and restoring a low register is usually cheaper than
29844 using a call-clobbered high register. */
29845
29846 static const int thumb_core_reg_alloc_order[] =
29847 {
29848 3, 2, 1, 0, 4, 5, 6, 7,
29849 12, 14, 8, 9, 10, 11
29850 };
29851
29852 /* Adjust register allocation order when compiling for Thumb. */
29853
29854 void
29855 arm_order_regs_for_local_alloc (void)
29856 {
29857 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29858 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29859 if (TARGET_THUMB)
29860 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29861 sizeof (thumb_core_reg_alloc_order));
29862 }
29863
29864 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29865
29866 bool
29867 arm_frame_pointer_required (void)
29868 {
29869 if (SUBTARGET_FRAME_POINTER_REQUIRED)
29870 return true;
29871
29872 /* If the function receives nonlocal gotos, it needs to save the frame
29873 pointer in the nonlocal_goto_save_area object. */
29874 if (cfun->has_nonlocal_label)
29875 return true;
29876
29877 /* The frame pointer is required for non-leaf APCS frames. */
29878 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
29879 return true;
29880
29881 /* If we are probing the stack in the prologue, we will have a faulting
29882 instruction prior to the stack adjustment and this requires a frame
29883 pointer if we want to catch the exception using the EABI unwinder. */
29884 if (!IS_INTERRUPT (arm_current_func_type ())
29885 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
29886 || flag_stack_clash_protection)
29887 && arm_except_unwind_info (&global_options) == UI_TARGET
29888 && cfun->can_throw_non_call_exceptions)
29889 {
29890 HOST_WIDE_INT size = get_frame_size ();
29891
29892 /* That's irrelevant if there is no stack adjustment. */
29893 if (size <= 0)
29894 return false;
29895
29896 /* That's relevant only if there is a stack probe. */
29897 if (crtl->is_leaf && !cfun->calls_alloca)
29898 {
29899 /* We don't have the final size of the frame so adjust. */
29900 size += 32 * UNITS_PER_WORD;
29901 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
29902 return true;
29903 }
29904 else
29905 return true;
29906 }
29907
29908 return false;
29909 }
29910
29911 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
29912 All modes except THUMB1 have conditional execution.
29913 If we have conditional arithmetic, return false before reload to
29914 enable some ifcvt transformations. */
29915 static bool
29916 arm_have_conditional_execution (void)
29917 {
29918 bool has_cond_exec, enable_ifcvt_trans;
29919
29920 /* Only THUMB1 cannot support conditional execution. */
29921 has_cond_exec = !TARGET_THUMB1;
29922
29923 /* Enable ifcvt transformations if we have conditional arithmetic, but only
29924 before reload. */
29925 enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
29926
29927 return has_cond_exec && !enable_ifcvt_trans;
29928 }
29929
29930 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29931 static HOST_WIDE_INT
29932 arm_vector_alignment (const_tree type)
29933 {
29934 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
29935
29936 if (TARGET_AAPCS_BASED)
29937 align = MIN (align, 64);
29938
29939 return align;
29940 }
29941
29942 static unsigned int
29943 arm_autovectorize_vector_modes (vector_modes *modes, bool)
29944 {
29945 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29946 {
29947 modes->safe_push (V16QImode);
29948 modes->safe_push (V8QImode);
29949 }
29950 return 0;
29951 }
29952
29953 static bool
29954 arm_vector_alignment_reachable (const_tree type, bool is_packed)
29955 {
29956 /* Vectors which aren't in packed structures will not be less aligned than
29957 the natural alignment of their element type, so this is safe. */
29958 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29959 return !is_packed;
29960
29961 return default_builtin_vector_alignment_reachable (type, is_packed);
29962 }
29963
29964 static bool
29965 arm_builtin_support_vector_misalignment (machine_mode mode,
29966 const_tree type, int misalignment,
29967 bool is_packed)
29968 {
29969 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29970 {
29971 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
29972
29973 if (is_packed)
29974 return align == 1;
29975
29976 /* If the misalignment is unknown, we should be able to handle the access
29977 so long as it is not to a member of a packed data structure. */
29978 if (misalignment == -1)
29979 return true;
29980
29981 /* Return true if the misalignment is a multiple of the natural alignment
29982 of the vector's element type. This is probably always going to be
29983 true in practice, since we've already established that this isn't a
29984 packed access. */
29985 return ((misalignment % align) == 0);
29986 }
29987
29988 return default_builtin_support_vector_misalignment (mode, type, misalignment,
29989 is_packed);
29990 }
29991
29992 static void
29993 arm_conditional_register_usage (void)
29994 {
29995 int regno;
29996
29997 if (TARGET_THUMB1 && optimize_size)
29998 {
29999 /* When optimizing for size on Thumb-1, it's better not
30000 to use the HI regs, because of the overhead of
30001 stacking them. */
30002 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30003 fixed_regs[regno] = call_used_regs[regno] = 1;
30004 }
30005
30006 /* The link register can be clobbered by any branch insn,
30007 but we have no way to track that at present, so mark
30008 it as unavailable. */
30009 if (TARGET_THUMB1)
30010 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30011
30012 if (TARGET_32BIT && TARGET_VFP_BASE)
30013 {
30014 /* VFPv3 registers are disabled when earlier VFP
30015 versions are selected due to the definition of
30016 LAST_VFP_REGNUM. */
30017 for (regno = FIRST_VFP_REGNUM;
30018 regno <= LAST_VFP_REGNUM; ++ regno)
30019 {
30020 fixed_regs[regno] = 0;
30021 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30022 || regno >= FIRST_VFP_REGNUM + 32;
30023 }
30024 if (TARGET_HAVE_MVE)
30025 fixed_regs[VPR_REGNUM] = 0;
30026 }
30027
30028 if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30029 {
30030 regno = FIRST_IWMMXT_GR_REGNUM;
30031 /* The 2002/10/09 revision of the XScale ABI has wCG0
30032 and wCG1 as call-preserved registers. The 2002/11/21
30033 revision changed this so that all wCG registers are
30034 scratch registers. */
30035 for (regno = FIRST_IWMMXT_GR_REGNUM;
30036 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30037 fixed_regs[regno] = 0;
30038 /* The XScale ABI has wR0 - wR9 as scratch registers,
30039 the rest as call-preserved registers. */
30040 for (regno = FIRST_IWMMXT_REGNUM;
30041 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30042 {
30043 fixed_regs[regno] = 0;
30044 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30045 }
30046 }
30047
30048 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30049 {
30050 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30051 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30052 }
30053 else if (TARGET_APCS_STACK)
30054 {
30055 fixed_regs[10] = 1;
30056 call_used_regs[10] = 1;
30057 }
30058 /* -mcaller-super-interworking reserves r11 for calls to
30059 _interwork_r11_call_via_rN(). Making the register global
30060 is an easy way of ensuring that it remains valid for all
30061 calls. */
30062 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30063 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30064 {
30065 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30066 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30067 if (TARGET_CALLER_INTERWORKING)
30068 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30069 }
30070
30071 /* The Q and GE bits are only accessed via special ACLE patterns. */
30072 CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30073 CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30074
30075 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30076 }
30077
30078 static reg_class_t
30079 arm_preferred_rename_class (reg_class_t rclass)
30080 {
30081 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30082 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30083 and code size can be reduced. */
30084 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30085 return LO_REGS;
30086 else
30087 return NO_REGS;
30088 }
30089
30090 /* Compute the attribute "length" of insn "*push_multi".
30091 So this function MUST be kept in sync with that insn pattern. */
30092 int
30093 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30094 {
30095 int i, regno, hi_reg;
30096 int num_saves = XVECLEN (parallel_op, 0);
30097
30098 /* ARM mode. */
30099 if (TARGET_ARM)
30100 return 4;
30101 /* Thumb1 mode. */
30102 if (TARGET_THUMB1)
30103 return 2;
30104
30105 /* Thumb2 mode. */
30106 regno = REGNO (first_op);
30107 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30108 list is 8-bit. Normally this means all registers in the list must be
30109 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
30110 encodings. There is one exception for PUSH that LR in HI_REGS can be used
30111 with 16-bit encoding. */
30112 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30113 for (i = 1; i < num_saves && !hi_reg; i++)
30114 {
30115 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30116 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30117 }
30118
30119 if (!hi_reg)
30120 return 2;
30121 return 4;
30122 }
30123
30124 /* Compute the attribute "length" of insn. Currently, this function is used
30125 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30126 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
30127 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
30128 true if OPERANDS contains insn which explicit updates base register. */
30129
30130 int
30131 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30132 {
30133 /* ARM mode. */
30134 if (TARGET_ARM)
30135 return 4;
30136 /* Thumb1 mode. */
30137 if (TARGET_THUMB1)
30138 return 2;
30139
30140 rtx parallel_op = operands[0];
30141 /* Initialize to elements number of PARALLEL. */
30142 unsigned indx = XVECLEN (parallel_op, 0) - 1;
30143 /* Initialize the value to base register. */
30144 unsigned regno = REGNO (operands[1]);
30145 /* Skip return and write back pattern.
30146 We only need register pop pattern for later analysis. */
30147 unsigned first_indx = 0;
30148 first_indx += return_pc ? 1 : 0;
30149 first_indx += write_back_p ? 1 : 0;
30150
30151 /* A pop operation can be done through LDM or POP. If the base register is SP
30152 and if it's with write back, then a LDM will be alias of POP. */
30153 bool pop_p = (regno == SP_REGNUM && write_back_p);
30154 bool ldm_p = !pop_p;
30155
30156 /* Check base register for LDM. */
30157 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30158 return 4;
30159
30160 /* Check each register in the list. */
30161 for (; indx >= first_indx; indx--)
30162 {
30163 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30164 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
30165 comment in arm_attr_length_push_multi. */
30166 if (REGNO_REG_CLASS (regno) == HI_REGS
30167 && (regno != PC_REGNUM || ldm_p))
30168 return 4;
30169 }
30170
30171 return 2;
30172 }
30173
30174 /* Compute the number of instructions emitted by output_move_double. */
30175 int
30176 arm_count_output_move_double_insns (rtx *operands)
30177 {
30178 int count;
30179 rtx ops[2];
30180 /* output_move_double may modify the operands array, so call it
30181 here on a copy of the array. */
30182 ops[0] = operands[0];
30183 ops[1] = operands[1];
30184 output_move_double (ops, false, &count);
30185 return count;
30186 }
30187
30188 /* Same as above, but operands are a register/memory pair in SImode.
30189 Assumes operands has the base register in position 0 and memory in position
30190 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
30191 int
30192 arm_count_ldrdstrd_insns (rtx *operands, bool load)
30193 {
30194 int count;
30195 rtx ops[2];
30196 int regnum, memnum;
30197 if (load)
30198 regnum = 0, memnum = 1;
30199 else
30200 regnum = 1, memnum = 0;
30201 ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
30202 ops[memnum] = adjust_address (operands[2], DImode, 0);
30203 output_move_double (ops, false, &count);
30204 return count;
30205 }
30206
30207
30208 int
30209 vfp3_const_double_for_fract_bits (rtx operand)
30210 {
30211 REAL_VALUE_TYPE r0;
30212
30213 if (!CONST_DOUBLE_P (operand))
30214 return 0;
30215
30216 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
30217 if (exact_real_inverse (DFmode, &r0)
30218 && !REAL_VALUE_NEGATIVE (r0))
30219 {
30220 if (exact_real_truncate (DFmode, &r0))
30221 {
30222 HOST_WIDE_INT value = real_to_integer (&r0);
30223 value = value & 0xffffffff;
30224 if ((value != 0) && ( (value & (value - 1)) == 0))
30225 {
30226 int ret = exact_log2 (value);
30227 gcc_assert (IN_RANGE (ret, 0, 31));
30228 return ret;
30229 }
30230 }
30231 }
30232 return 0;
30233 }
30234
30235 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30236 log2 is in [1, 32], return that log2. Otherwise return -1.
30237 This is used in the patterns for vcvt.s32.f32 floating-point to
30238 fixed-point conversions. */
30239
30240 int
30241 vfp3_const_double_for_bits (rtx x)
30242 {
30243 const REAL_VALUE_TYPE *r;
30244
30245 if (!CONST_DOUBLE_P (x))
30246 return -1;
30247
30248 r = CONST_DOUBLE_REAL_VALUE (x);
30249
30250 if (REAL_VALUE_NEGATIVE (*r)
30251 || REAL_VALUE_ISNAN (*r)
30252 || REAL_VALUE_ISINF (*r)
30253 || !real_isinteger (r, SFmode))
30254 return -1;
30255
30256 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
30257
30258 /* The exact_log2 above will have returned -1 if this is
30259 not an exact log2. */
30260 if (!IN_RANGE (hwint, 1, 32))
30261 return -1;
30262
30263 return hwint;
30264 }
30265
30266 \f
30267 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30268
30269 static void
30270 arm_pre_atomic_barrier (enum memmodel model)
30271 {
30272 if (need_atomic_barrier_p (model, true))
30273 emit_insn (gen_memory_barrier ());
30274 }
30275
30276 static void
30277 arm_post_atomic_barrier (enum memmodel model)
30278 {
30279 if (need_atomic_barrier_p (model, false))
30280 emit_insn (gen_memory_barrier ());
30281 }
30282
30283 /* Emit the load-exclusive and store-exclusive instructions.
30284 Use acquire and release versions if necessary. */
30285
30286 static void
30287 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30288 {
30289 rtx (*gen) (rtx, rtx);
30290
30291 if (acq)
30292 {
30293 switch (mode)
30294 {
30295 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30296 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30297 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30298 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30299 default:
30300 gcc_unreachable ();
30301 }
30302 }
30303 else
30304 {
30305 switch (mode)
30306 {
30307 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
30308 case E_HImode: gen = gen_arm_load_exclusivehi; break;
30309 case E_SImode: gen = gen_arm_load_exclusivesi; break;
30310 case E_DImode: gen = gen_arm_load_exclusivedi; break;
30311 default:
30312 gcc_unreachable ();
30313 }
30314 }
30315
30316 emit_insn (gen (rval, mem));
30317 }
30318
30319 static void
30320 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30321 rtx mem, bool rel)
30322 {
30323 rtx (*gen) (rtx, rtx, rtx);
30324
30325 if (rel)
30326 {
30327 switch (mode)
30328 {
30329 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
30330 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
30331 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
30332 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
30333 default:
30334 gcc_unreachable ();
30335 }
30336 }
30337 else
30338 {
30339 switch (mode)
30340 {
30341 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
30342 case E_HImode: gen = gen_arm_store_exclusivehi; break;
30343 case E_SImode: gen = gen_arm_store_exclusivesi; break;
30344 case E_DImode: gen = gen_arm_store_exclusivedi; break;
30345 default:
30346 gcc_unreachable ();
30347 }
30348 }
30349
30350 emit_insn (gen (bval, rval, mem));
30351 }
30352
30353 /* Mark the previous jump instruction as unlikely. */
30354
30355 static void
30356 emit_unlikely_jump (rtx insn)
30357 {
30358 rtx_insn *jump = emit_jump_insn (insn);
30359 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
30360 }
30361
30362 /* Expand a compare and swap pattern. */
30363
30364 void
30365 arm_expand_compare_and_swap (rtx operands[])
30366 {
30367 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30368 machine_mode mode, cmp_mode;
30369
30370 bval = operands[0];
30371 rval = operands[1];
30372 mem = operands[2];
30373 oldval = operands[3];
30374 newval = operands[4];
30375 is_weak = operands[5];
30376 mod_s = operands[6];
30377 mod_f = operands[7];
30378 mode = GET_MODE (mem);
30379
30380 /* Normally the succ memory model must be stronger than fail, but in the
30381 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30382 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30383
30384 if (TARGET_HAVE_LDACQ
30385 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
30386 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
30387 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30388
30389 switch (mode)
30390 {
30391 case E_QImode:
30392 case E_HImode:
30393 /* For narrow modes, we're going to perform the comparison in SImode,
30394 so do the zero-extension now. */
30395 rval = gen_reg_rtx (SImode);
30396 oldval = convert_modes (SImode, mode, oldval, true);
30397 /* FALLTHRU */
30398
30399 case E_SImode:
30400 /* Force the value into a register if needed. We waited until after
30401 the zero-extension above to do this properly. */
30402 if (!arm_add_operand (oldval, SImode))
30403 oldval = force_reg (SImode, oldval);
30404 break;
30405
30406 case E_DImode:
30407 if (!cmpdi_operand (oldval, mode))
30408 oldval = force_reg (mode, oldval);
30409 break;
30410
30411 default:
30412 gcc_unreachable ();
30413 }
30414
30415 if (TARGET_THUMB1)
30416 cmp_mode = E_SImode;
30417 else
30418 cmp_mode = CC_Zmode;
30419
30420 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
30421 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
30422 oldval, newval, is_weak, mod_s, mod_f));
30423
30424 if (mode == QImode || mode == HImode)
30425 emit_move_insn (operands[1], gen_lowpart (mode, rval));
30426
30427 /* In all cases, we arrange for success to be signaled by Z set.
30428 This arrangement allows for the boolean result to be used directly
30429 in a subsequent branch, post optimization. For Thumb-1 targets, the
30430 boolean negation of the result is also stored in bval because Thumb-1
30431 backend lacks dependency tracking for CC flag due to flag-setting not
30432 being represented at RTL level. */
30433 if (TARGET_THUMB1)
30434 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
30435 else
30436 {
30437 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
30438 emit_insn (gen_rtx_SET (bval, x));
30439 }
30440 }
30441
30442 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30443 another memory store between the load-exclusive and store-exclusive can
30444 reset the monitor from Exclusive to Open state. This means we must wait
30445 until after reload to split the pattern, lest we get a register spill in
30446 the middle of the atomic sequence. Success of the compare and swap is
30447 indicated by the Z flag set for 32bit targets and by neg_bval being zero
30448 for Thumb-1 targets (ie. negation of the boolean value returned by
30449 atomic_compare_and_swapmode standard pattern in operand 0). */
30450
30451 void
30452 arm_split_compare_and_swap (rtx operands[])
30453 {
30454 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
30455 machine_mode mode;
30456 enum memmodel mod_s, mod_f;
30457 bool is_weak;
30458 rtx_code_label *label1, *label2;
30459 rtx x, cond;
30460
30461 rval = operands[1];
30462 mem = operands[2];
30463 oldval = operands[3];
30464 newval = operands[4];
30465 is_weak = (operands[5] != const0_rtx);
30466 mod_s_rtx = operands[6];
30467 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
30468 mod_f = memmodel_from_int (INTVAL (operands[7]));
30469 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
30470 mode = GET_MODE (mem);
30471
30472 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
30473
30474 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
30475 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
30476
30477 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
30478 a full barrier is emitted after the store-release. */
30479 if (is_armv8_sync)
30480 use_acquire = false;
30481
30482 /* Checks whether a barrier is needed and emits one accordingly. */
30483 if (!(use_acquire || use_release))
30484 arm_pre_atomic_barrier (mod_s);
30485
30486 label1 = NULL;
30487 if (!is_weak)
30488 {
30489 label1 = gen_label_rtx ();
30490 emit_label (label1);
30491 }
30492 label2 = gen_label_rtx ();
30493
30494 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30495
30496 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
30497 as required to communicate with arm_expand_compare_and_swap. */
30498 if (TARGET_32BIT)
30499 {
30500 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
30501 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30502 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30503 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30504 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30505 }
30506 else
30507 {
30508 emit_move_insn (neg_bval, const1_rtx);
30509 cond = gen_rtx_NE (VOIDmode, rval, oldval);
30510 if (thumb1_cmpneg_operand (oldval, SImode))
30511 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
30512 label2, cond));
30513 else
30514 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
30515 }
30516
30517 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
30518
30519 /* Weak or strong, we want EQ to be true for success, so that we
30520 match the flags that we got from the compare above. */
30521 if (TARGET_32BIT)
30522 {
30523 cond = gen_rtx_REG (CCmode, CC_REGNUM);
30524 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
30525 emit_insn (gen_rtx_SET (cond, x));
30526 }
30527
30528 if (!is_weak)
30529 {
30530 /* Z is set to boolean value of !neg_bval, as required to communicate
30531 with arm_expand_compare_and_swap. */
30532 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
30533 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
30534 }
30535
30536 if (!is_mm_relaxed (mod_f))
30537 emit_label (label2);
30538
30539 /* Checks whether a barrier is needed and emits one accordingly. */
30540 if (is_armv8_sync
30541 || !(use_acquire || use_release))
30542 arm_post_atomic_barrier (mod_s);
30543
30544 if (is_mm_relaxed (mod_f))
30545 emit_label (label2);
30546 }
30547
30548 /* Split an atomic operation pattern. Operation is given by CODE and is one
30549 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
30550 operation). Operation is performed on the content at MEM and on VALUE
30551 following the memory model MODEL_RTX. The content at MEM before and after
30552 the operation is returned in OLD_OUT and NEW_OUT respectively while the
30553 success of the operation is returned in COND. Using a scratch register or
30554 an operand register for these determines what result is returned for that
30555 pattern. */
30556
30557 void
30558 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30559 rtx value, rtx model_rtx, rtx cond)
30560 {
30561 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
30562 machine_mode mode = GET_MODE (mem);
30563 machine_mode wmode = (mode == DImode ? DImode : SImode);
30564 rtx_code_label *label;
30565 bool all_low_regs, bind_old_new;
30566 rtx x;
30567
30568 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
30569
30570 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
30571 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
30572
30573 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
30574 a full barrier is emitted after the store-release. */
30575 if (is_armv8_sync)
30576 use_acquire = false;
30577
30578 /* Checks whether a barrier is needed and emits one accordingly. */
30579 if (!(use_acquire || use_release))
30580 arm_pre_atomic_barrier (model);
30581
30582 label = gen_label_rtx ();
30583 emit_label (label);
30584
30585 if (new_out)
30586 new_out = gen_lowpart (wmode, new_out);
30587 if (old_out)
30588 old_out = gen_lowpart (wmode, old_out);
30589 else
30590 old_out = new_out;
30591 value = simplify_gen_subreg (wmode, value, mode, 0);
30592
30593 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30594
30595 /* Does the operation require destination and first operand to use the same
30596 register? This is decided by register constraints of relevant insn
30597 patterns in thumb1.md. */
30598 gcc_assert (!new_out || REG_P (new_out));
30599 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
30600 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
30601 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
30602 bind_old_new =
30603 (TARGET_THUMB1
30604 && code != SET
30605 && code != MINUS
30606 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
30607
30608 /* We want to return the old value while putting the result of the operation
30609 in the same register as the old value so copy the old value over to the
30610 destination register and use that register for the operation. */
30611 if (old_out && bind_old_new)
30612 {
30613 emit_move_insn (new_out, old_out);
30614 old_out = new_out;
30615 }
30616
30617 switch (code)
30618 {
30619 case SET:
30620 new_out = value;
30621 break;
30622
30623 case NOT:
30624 x = gen_rtx_AND (wmode, old_out, value);
30625 emit_insn (gen_rtx_SET (new_out, x));
30626 x = gen_rtx_NOT (wmode, new_out);
30627 emit_insn (gen_rtx_SET (new_out, x));
30628 break;
30629
30630 case MINUS:
30631 if (CONST_INT_P (value))
30632 {
30633 value = GEN_INT (-INTVAL (value));
30634 code = PLUS;
30635 }
30636 /* FALLTHRU */
30637
30638 case PLUS:
30639 if (mode == DImode)
30640 {
30641 /* DImode plus/minus need to clobber flags. */
30642 /* The adddi3 and subdi3 patterns are incorrectly written so that
30643 they require matching operands, even when we could easily support
30644 three operands. Thankfully, this can be fixed up post-splitting,
30645 as the individual add+adc patterns do accept three operands and
30646 post-reload cprop can make these moves go away. */
30647 emit_move_insn (new_out, old_out);
30648 if (code == PLUS)
30649 x = gen_adddi3 (new_out, new_out, value);
30650 else
30651 x = gen_subdi3 (new_out, new_out, value);
30652 emit_insn (x);
30653 break;
30654 }
30655 /* FALLTHRU */
30656
30657 default:
30658 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30659 emit_insn (gen_rtx_SET (new_out, x));
30660 break;
30661 }
30662
30663 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30664 use_release);
30665
30666 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30667 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30668
30669 /* Checks whether a barrier is needed and emits one accordingly. */
30670 if (is_armv8_sync
30671 || !(use_acquire || use_release))
30672 arm_post_atomic_barrier (model);
30673 }
30674 \f
30675 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
30676 If CAN_INVERT, store either the result or its inverse in TARGET
30677 and return true if TARGET contains the inverse. If !CAN_INVERT,
30678 always store the result in TARGET, never its inverse.
30679
30680 Note that the handling of floating-point comparisons is not
30681 IEEE compliant. */
30682
30683 bool
30684 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
30685 bool can_invert)
30686 {
30687 machine_mode cmp_result_mode = GET_MODE (target);
30688 machine_mode cmp_mode = GET_MODE (op0);
30689
30690 bool inverted;
30691 switch (code)
30692 {
30693 /* For these we need to compute the inverse of the requested
30694 comparison. */
30695 case UNORDERED:
30696 case UNLT:
30697 case UNLE:
30698 case UNGT:
30699 case UNGE:
30700 case UNEQ:
30701 case NE:
30702 code = reverse_condition_maybe_unordered (code);
30703 if (!can_invert)
30704 {
30705 /* Recursively emit the inverted comparison into a temporary
30706 and then store its inverse in TARGET. This avoids reusing
30707 TARGET (which for integer NE could be one of the inputs). */
30708 rtx tmp = gen_reg_rtx (cmp_result_mode);
30709 if (arm_expand_vector_compare (tmp, code, op0, op1, true))
30710 gcc_unreachable ();
30711 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
30712 return false;
30713 }
30714 inverted = true;
30715 break;
30716
30717 default:
30718 inverted = false;
30719 break;
30720 }
30721
30722 switch (code)
30723 {
30724 /* These are natively supported for zero comparisons, but otherwise
30725 require the operands to be swapped. */
30726 case LE:
30727 case LT:
30728 if (op1 != CONST0_RTX (cmp_mode))
30729 {
30730 code = swap_condition (code);
30731 std::swap (op0, op1);
30732 }
30733 /* Fall through. */
30734
30735 /* These are natively supported for both register and zero operands. */
30736 case EQ:
30737 case GE:
30738 case GT:
30739 emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
30740 return inverted;
30741
30742 /* These are natively supported for register operands only.
30743 Comparisons with zero aren't useful and should be folded
30744 or canonicalized by target-independent code. */
30745 case GEU:
30746 case GTU:
30747 emit_insn (gen_neon_vc (code, cmp_mode, target,
30748 op0, force_reg (cmp_mode, op1)));
30749 return inverted;
30750
30751 /* These require the operands to be swapped and likewise do not
30752 support comparisons with zero. */
30753 case LEU:
30754 case LTU:
30755 emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
30756 target, force_reg (cmp_mode, op1), op0));
30757 return inverted;
30758
30759 /* These need a combination of two comparisons. */
30760 case LTGT:
30761 case ORDERED:
30762 {
30763 /* Operands are LTGT iff (a > b || a > b).
30764 Operands are ORDERED iff (a > b || a <= b). */
30765 rtx gt_res = gen_reg_rtx (cmp_result_mode);
30766 rtx alt_res = gen_reg_rtx (cmp_result_mode);
30767 rtx_code alt_code = (code == LTGT ? LT : LE);
30768 if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
30769 || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
30770 gcc_unreachable ();
30771 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
30772 gt_res, alt_res)));
30773 return inverted;
30774 }
30775
30776 default:
30777 gcc_unreachable ();
30778 }
30779 }
30780
30781 /* Expand a vcond or vcondu pattern with operands OPERANDS.
30782 CMP_RESULT_MODE is the mode of the comparison result. */
30783
30784 void
30785 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
30786 {
30787 rtx mask = gen_reg_rtx (cmp_result_mode);
30788 bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
30789 operands[4], operands[5], true);
30790 if (inverted)
30791 std::swap (operands[1], operands[2]);
30792 emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
30793 mask, operands[1], operands[2]));
30794 }
30795 \f
30796 #define MAX_VECT_LEN 16
30797
30798 struct expand_vec_perm_d
30799 {
30800 rtx target, op0, op1;
30801 vec_perm_indices perm;
30802 machine_mode vmode;
30803 bool one_vector_p;
30804 bool testing_p;
30805 };
30806
30807 /* Generate a variable permutation. */
30808
30809 static void
30810 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30811 {
30812 machine_mode vmode = GET_MODE (target);
30813 bool one_vector_p = rtx_equal_p (op0, op1);
30814
30815 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30816 gcc_checking_assert (GET_MODE (op0) == vmode);
30817 gcc_checking_assert (GET_MODE (op1) == vmode);
30818 gcc_checking_assert (GET_MODE (sel) == vmode);
30819 gcc_checking_assert (TARGET_NEON);
30820
30821 if (one_vector_p)
30822 {
30823 if (vmode == V8QImode)
30824 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30825 else
30826 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30827 }
30828 else
30829 {
30830 rtx pair;
30831
30832 if (vmode == V8QImode)
30833 {
30834 pair = gen_reg_rtx (V16QImode);
30835 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30836 pair = gen_lowpart (TImode, pair);
30837 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30838 }
30839 else
30840 {
30841 pair = gen_reg_rtx (OImode);
30842 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30843 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30844 }
30845 }
30846 }
30847
30848 void
30849 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30850 {
30851 machine_mode vmode = GET_MODE (target);
30852 unsigned int nelt = GET_MODE_NUNITS (vmode);
30853 bool one_vector_p = rtx_equal_p (op0, op1);
30854 rtx mask;
30855
30856 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30857 numbering of elements for big-endian, we must reverse the order. */
30858 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30859
30860 /* The VTBL instruction does not use a modulo index, so we must take care
30861 of that ourselves. */
30862 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30863 mask = gen_const_vec_duplicate (vmode, mask);
30864 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30865
30866 arm_expand_vec_perm_1 (target, op0, op1, sel);
30867 }
30868
30869 /* Map lane ordering between architectural lane order, and GCC lane order,
30870 taking into account ABI. See comment above output_move_neon for details. */
30871
30872 static int
30873 neon_endian_lane_map (machine_mode mode, int lane)
30874 {
30875 if (BYTES_BIG_ENDIAN)
30876 {
30877 int nelems = GET_MODE_NUNITS (mode);
30878 /* Reverse lane order. */
30879 lane = (nelems - 1 - lane);
30880 /* Reverse D register order, to match ABI. */
30881 if (GET_MODE_SIZE (mode) == 16)
30882 lane = lane ^ (nelems / 2);
30883 }
30884 return lane;
30885 }
30886
30887 /* Some permutations index into pairs of vectors, this is a helper function
30888 to map indexes into those pairs of vectors. */
30889
30890 static int
30891 neon_pair_endian_lane_map (machine_mode mode, int lane)
30892 {
30893 int nelem = GET_MODE_NUNITS (mode);
30894 if (BYTES_BIG_ENDIAN)
30895 lane =
30896 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
30897 return lane;
30898 }
30899
30900 /* Generate or test for an insn that supports a constant permutation. */
30901
30902 /* Recognize patterns for the VUZP insns. */
30903
30904 static bool
30905 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30906 {
30907 unsigned int i, odd, mask, nelt = d->perm.length ();
30908 rtx out0, out1, in0, in1;
30909 int first_elem;
30910 int swap_nelt;
30911
30912 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30913 return false;
30914
30915 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
30916 big endian pattern on 64 bit vectors, so we correct for that. */
30917 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
30918 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
30919
30920 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
30921
30922 if (first_elem == neon_endian_lane_map (d->vmode, 0))
30923 odd = 0;
30924 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
30925 odd = 1;
30926 else
30927 return false;
30928 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30929
30930 for (i = 0; i < nelt; i++)
30931 {
30932 unsigned elt =
30933 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
30934 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
30935 return false;
30936 }
30937
30938 /* Success! */
30939 if (d->testing_p)
30940 return true;
30941
30942 in0 = d->op0;
30943 in1 = d->op1;
30944 if (swap_nelt != 0)
30945 std::swap (in0, in1);
30946
30947 out0 = d->target;
30948 out1 = gen_reg_rtx (d->vmode);
30949 if (odd)
30950 std::swap (out0, out1);
30951
30952 emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
30953 return true;
30954 }
30955
30956 /* Recognize patterns for the VZIP insns. */
30957
30958 static bool
30959 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30960 {
30961 unsigned int i, high, mask, nelt = d->perm.length ();
30962 rtx out0, out1, in0, in1;
30963 int first_elem;
30964 bool is_swapped;
30965
30966 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30967 return false;
30968
30969 is_swapped = BYTES_BIG_ENDIAN;
30970
30971 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
30972
30973 high = nelt / 2;
30974 if (first_elem == neon_endian_lane_map (d->vmode, high))
30975 ;
30976 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
30977 high = 0;
30978 else
30979 return false;
30980 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30981
30982 for (i = 0; i < nelt / 2; i++)
30983 {
30984 unsigned elt =
30985 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
30986 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
30987 != elt)
30988 return false;
30989 elt =
30990 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
30991 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
30992 != elt)
30993 return false;
30994 }
30995
30996 /* Success! */
30997 if (d->testing_p)
30998 return true;
30999
31000 in0 = d->op0;
31001 in1 = d->op1;
31002 if (is_swapped)
31003 std::swap (in0, in1);
31004
31005 out0 = d->target;
31006 out1 = gen_reg_rtx (d->vmode);
31007 if (high)
31008 std::swap (out0, out1);
31009
31010 emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31011 return true;
31012 }
31013
31014 /* Recognize patterns for the VREV insns. */
31015 static bool
31016 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31017 {
31018 unsigned int i, j, diff, nelt = d->perm.length ();
31019 rtx (*gen) (machine_mode, rtx, rtx);
31020
31021 if (!d->one_vector_p)
31022 return false;
31023
31024 diff = d->perm[0];
31025 switch (diff)
31026 {
31027 case 7:
31028 switch (d->vmode)
31029 {
31030 case E_V16QImode:
31031 case E_V8QImode:
31032 gen = gen_neon_vrev64;
31033 break;
31034 default:
31035 return false;
31036 }
31037 break;
31038 case 3:
31039 switch (d->vmode)
31040 {
31041 case E_V16QImode:
31042 case E_V8QImode:
31043 gen = gen_neon_vrev32;
31044 break;
31045 case E_V8HImode:
31046 case E_V4HImode:
31047 case E_V8HFmode:
31048 case E_V4HFmode:
31049 gen = gen_neon_vrev64;
31050 break;
31051 default:
31052 return false;
31053 }
31054 break;
31055 case 1:
31056 switch (d->vmode)
31057 {
31058 case E_V16QImode:
31059 case E_V8QImode:
31060 gen = gen_neon_vrev16;
31061 break;
31062 case E_V8HImode:
31063 case E_V4HImode:
31064 gen = gen_neon_vrev32;
31065 break;
31066 case E_V4SImode:
31067 case E_V2SImode:
31068 case E_V4SFmode:
31069 case E_V2SFmode:
31070 gen = gen_neon_vrev64;
31071 break;
31072 default:
31073 return false;
31074 }
31075 break;
31076 default:
31077 return false;
31078 }
31079
31080 for (i = 0; i < nelt ; i += diff + 1)
31081 for (j = 0; j <= diff; j += 1)
31082 {
31083 /* This is guaranteed to be true as the value of diff
31084 is 7, 3, 1 and we should have enough elements in the
31085 queue to generate this. Getting a vector mask with a
31086 value of diff other than these values implies that
31087 something is wrong by the time we get here. */
31088 gcc_assert (i + j < nelt);
31089 if (d->perm[i + j] != i + diff - j)
31090 return false;
31091 }
31092
31093 /* Success! */
31094 if (d->testing_p)
31095 return true;
31096
31097 emit_insn (gen (d->vmode, d->target, d->op0));
31098 return true;
31099 }
31100
31101 /* Recognize patterns for the VTRN insns. */
31102
31103 static bool
31104 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31105 {
31106 unsigned int i, odd, mask, nelt = d->perm.length ();
31107 rtx out0, out1, in0, in1;
31108
31109 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31110 return false;
31111
31112 /* Note that these are little-endian tests. Adjust for big-endian later. */
31113 if (d->perm[0] == 0)
31114 odd = 0;
31115 else if (d->perm[0] == 1)
31116 odd = 1;
31117 else
31118 return false;
31119 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31120
31121 for (i = 0; i < nelt; i += 2)
31122 {
31123 if (d->perm[i] != i + odd)
31124 return false;
31125 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31126 return false;
31127 }
31128
31129 /* Success! */
31130 if (d->testing_p)
31131 return true;
31132
31133 in0 = d->op0;
31134 in1 = d->op1;
31135 if (BYTES_BIG_ENDIAN)
31136 {
31137 std::swap (in0, in1);
31138 odd = !odd;
31139 }
31140
31141 out0 = d->target;
31142 out1 = gen_reg_rtx (d->vmode);
31143 if (odd)
31144 std::swap (out0, out1);
31145
31146 emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
31147 return true;
31148 }
31149
31150 /* Recognize patterns for the VEXT insns. */
31151
31152 static bool
31153 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31154 {
31155 unsigned int i, nelt = d->perm.length ();
31156 rtx offset;
31157
31158 unsigned int location;
31159
31160 unsigned int next = d->perm[0] + 1;
31161
31162 /* TODO: Handle GCC's numbering of elements for big-endian. */
31163 if (BYTES_BIG_ENDIAN)
31164 return false;
31165
31166 /* Check if the extracted indexes are increasing by one. */
31167 for (i = 1; i < nelt; next++, i++)
31168 {
31169 /* If we hit the most significant element of the 2nd vector in
31170 the previous iteration, no need to test further. */
31171 if (next == 2 * nelt)
31172 return false;
31173
31174 /* If we are operating on only one vector: it could be a
31175 rotation. If there are only two elements of size < 64, let
31176 arm_evpc_neon_vrev catch it. */
31177 if (d->one_vector_p && (next == nelt))
31178 {
31179 if ((nelt == 2) && (d->vmode != V2DImode))
31180 return false;
31181 else
31182 next = 0;
31183 }
31184
31185 if (d->perm[i] != next)
31186 return false;
31187 }
31188
31189 location = d->perm[0];
31190
31191 /* Success! */
31192 if (d->testing_p)
31193 return true;
31194
31195 offset = GEN_INT (location);
31196
31197 if(d->vmode == E_DImode)
31198 return false;
31199
31200 emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
31201 return true;
31202 }
31203
31204 /* The NEON VTBL instruction is a fully variable permuation that's even
31205 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31206 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31207 can do slightly better by expanding this as a constant where we don't
31208 have to apply a mask. */
31209
31210 static bool
31211 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31212 {
31213 rtx rperm[MAX_VECT_LEN], sel;
31214 machine_mode vmode = d->vmode;
31215 unsigned int i, nelt = d->perm.length ();
31216
31217 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31218 numbering of elements for big-endian, we must reverse the order. */
31219 if (BYTES_BIG_ENDIAN)
31220 return false;
31221
31222 if (d->testing_p)
31223 return true;
31224
31225 /* Generic code will try constant permutation twice. Once with the
31226 original mode and again with the elements lowered to QImode.
31227 So wait and don't do the selector expansion ourselves. */
31228 if (vmode != V8QImode && vmode != V16QImode)
31229 return false;
31230
31231 for (i = 0; i < nelt; ++i)
31232 rperm[i] = GEN_INT (d->perm[i]);
31233 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31234 sel = force_reg (vmode, sel);
31235
31236 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31237 return true;
31238 }
31239
31240 static bool
31241 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31242 {
31243 /* Check if the input mask matches vext before reordering the
31244 operands. */
31245 if (TARGET_NEON)
31246 if (arm_evpc_neon_vext (d))
31247 return true;
31248
31249 /* The pattern matching functions above are written to look for a small
31250 number to begin the sequence (0, 1, N/2). If we begin with an index
31251 from the second operand, we can swap the operands. */
31252 unsigned int nelt = d->perm.length ();
31253 if (d->perm[0] >= nelt)
31254 {
31255 d->perm.rotate_inputs (1);
31256 std::swap (d->op0, d->op1);
31257 }
31258
31259 if (TARGET_NEON)
31260 {
31261 if (arm_evpc_neon_vuzp (d))
31262 return true;
31263 if (arm_evpc_neon_vzip (d))
31264 return true;
31265 if (arm_evpc_neon_vrev (d))
31266 return true;
31267 if (arm_evpc_neon_vtrn (d))
31268 return true;
31269 return arm_evpc_neon_vtbl (d);
31270 }
31271 return false;
31272 }
31273
31274 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
31275
31276 static bool
31277 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
31278 const vec_perm_indices &sel)
31279 {
31280 struct expand_vec_perm_d d;
31281 int i, nelt, which;
31282
31283 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
31284 return false;
31285
31286 d.target = target;
31287 d.op0 = op0;
31288 d.op1 = op1;
31289
31290 d.vmode = vmode;
31291 gcc_assert (VECTOR_MODE_P (d.vmode));
31292 d.testing_p = !target;
31293
31294 nelt = GET_MODE_NUNITS (d.vmode);
31295 for (i = which = 0; i < nelt; ++i)
31296 {
31297 int ei = sel[i] & (2 * nelt - 1);
31298 which |= (ei < nelt ? 1 : 2);
31299 }
31300
31301 switch (which)
31302 {
31303 default:
31304 gcc_unreachable();
31305
31306 case 3:
31307 d.one_vector_p = false;
31308 if (d.testing_p || !rtx_equal_p (op0, op1))
31309 break;
31310
31311 /* The elements of PERM do not suggest that only the first operand
31312 is used, but both operands are identical. Allow easier matching
31313 of the permutation by folding the permutation into the single
31314 input vector. */
31315 /* FALLTHRU */
31316 case 2:
31317 d.op0 = op1;
31318 d.one_vector_p = true;
31319 break;
31320
31321 case 1:
31322 d.op1 = op0;
31323 d.one_vector_p = true;
31324 break;
31325 }
31326
31327 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
31328
31329 if (!d.testing_p)
31330 return arm_expand_vec_perm_const_1 (&d);
31331
31332 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31333 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31334 if (!d.one_vector_p)
31335 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31336
31337 start_sequence ();
31338 bool ret = arm_expand_vec_perm_const_1 (&d);
31339 end_sequence ();
31340
31341 return ret;
31342 }
31343
31344 bool
31345 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
31346 {
31347 /* If we are soft float and we do not have ldrd
31348 then all auto increment forms are ok. */
31349 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31350 return true;
31351
31352 switch (code)
31353 {
31354 /* Post increment and Pre Decrement are supported for all
31355 instruction forms except for vector forms. */
31356 case ARM_POST_INC:
31357 case ARM_PRE_DEC:
31358 if (VECTOR_MODE_P (mode))
31359 {
31360 if (code != ARM_PRE_DEC)
31361 return true;
31362 else
31363 return false;
31364 }
31365
31366 return true;
31367
31368 case ARM_POST_DEC:
31369 case ARM_PRE_INC:
31370 /* Without LDRD and mode size greater than
31371 word size, there is no point in auto-incrementing
31372 because ldm and stm will not have these forms. */
31373 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31374 return false;
31375
31376 /* Vector and floating point modes do not support
31377 these auto increment forms. */
31378 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31379 return false;
31380
31381 return true;
31382
31383 default:
31384 return false;
31385
31386 }
31387
31388 return false;
31389 }
31390
31391 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31392 on ARM, since we know that shifts by negative amounts are no-ops.
31393 Additionally, the default expansion code is not available or suitable
31394 for post-reload insn splits (this can occur when the register allocator
31395 chooses not to do a shift in NEON).
31396
31397 This function is used in both initial expand and post-reload splits, and
31398 handles all kinds of 64-bit shifts.
31399
31400 Input requirements:
31401 - It is safe for the input and output to be the same register, but
31402 early-clobber rules apply for the shift amount and scratch registers.
31403 - Shift by register requires both scratch registers. In all other cases
31404 the scratch registers may be NULL.
31405 - Ashiftrt by a register also clobbers the CC register. */
31406 void
31407 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31408 rtx amount, rtx scratch1, rtx scratch2)
31409 {
31410 rtx out_high = gen_highpart (SImode, out);
31411 rtx out_low = gen_lowpart (SImode, out);
31412 rtx in_high = gen_highpart (SImode, in);
31413 rtx in_low = gen_lowpart (SImode, in);
31414
31415 /* Terminology:
31416 in = the register pair containing the input value.
31417 out = the destination register pair.
31418 up = the high- or low-part of each pair.
31419 down = the opposite part to "up".
31420 In a shift, we can consider bits to shift from "up"-stream to
31421 "down"-stream, so in a left-shift "up" is the low-part and "down"
31422 is the high-part of each register pair. */
31423
31424 rtx out_up = code == ASHIFT ? out_low : out_high;
31425 rtx out_down = code == ASHIFT ? out_high : out_low;
31426 rtx in_up = code == ASHIFT ? in_low : in_high;
31427 rtx in_down = code == ASHIFT ? in_high : in_low;
31428
31429 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31430 gcc_assert (out
31431 && (REG_P (out) || GET_CODE (out) == SUBREG)
31432 && GET_MODE (out) == DImode);
31433 gcc_assert (in
31434 && (REG_P (in) || GET_CODE (in) == SUBREG)
31435 && GET_MODE (in) == DImode);
31436 gcc_assert (amount
31437 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31438 && GET_MODE (amount) == SImode)
31439 || CONST_INT_P (amount)));
31440 gcc_assert (scratch1 == NULL
31441 || (GET_CODE (scratch1) == SCRATCH)
31442 || (GET_MODE (scratch1) == SImode
31443 && REG_P (scratch1)));
31444 gcc_assert (scratch2 == NULL
31445 || (GET_CODE (scratch2) == SCRATCH)
31446 || (GET_MODE (scratch2) == SImode
31447 && REG_P (scratch2)));
31448 gcc_assert (!REG_P (out) || !REG_P (amount)
31449 || !HARD_REGISTER_P (out)
31450 || (REGNO (out) != REGNO (amount)
31451 && REGNO (out) + 1 != REGNO (amount)));
31452
31453 /* Macros to make following code more readable. */
31454 #define SUB_32(DEST,SRC) \
31455 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31456 #define RSB_32(DEST,SRC) \
31457 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31458 #define SUB_S_32(DEST,SRC) \
31459 gen_addsi3_compare0 ((DEST), (SRC), \
31460 GEN_INT (-32))
31461 #define SET(DEST,SRC) \
31462 gen_rtx_SET ((DEST), (SRC))
31463 #define SHIFT(CODE,SRC,AMOUNT) \
31464 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31465 #define LSHIFT(CODE,SRC,AMOUNT) \
31466 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31467 SImode, (SRC), (AMOUNT))
31468 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31469 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31470 SImode, (SRC), (AMOUNT))
31471 #define ORR(A,B) \
31472 gen_rtx_IOR (SImode, (A), (B))
31473 #define BRANCH(COND,LABEL) \
31474 gen_arm_cond_branch ((LABEL), \
31475 gen_rtx_ ## COND (CCmode, cc_reg, \
31476 const0_rtx), \
31477 cc_reg)
31478
31479 /* Shifts by register and shifts by constant are handled separately. */
31480 if (CONST_INT_P (amount))
31481 {
31482 /* We have a shift-by-constant. */
31483
31484 /* First, handle out-of-range shift amounts.
31485 In both cases we try to match the result an ARM instruction in a
31486 shift-by-register would give. This helps reduce execution
31487 differences between optimization levels, but it won't stop other
31488 parts of the compiler doing different things. This is "undefined
31489 behavior, in any case. */
31490 if (INTVAL (amount) <= 0)
31491 emit_insn (gen_movdi (out, in));
31492 else if (INTVAL (amount) >= 64)
31493 {
31494 if (code == ASHIFTRT)
31495 {
31496 rtx const31_rtx = GEN_INT (31);
31497 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31498 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31499 }
31500 else
31501 emit_insn (gen_movdi (out, const0_rtx));
31502 }
31503
31504 /* Now handle valid shifts. */
31505 else if (INTVAL (amount) < 32)
31506 {
31507 /* Shifts by a constant less than 32. */
31508 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31509
31510 /* Clearing the out register in DImode first avoids lots
31511 of spilling and results in less stack usage.
31512 Later this redundant insn is completely removed.
31513 Do that only if "in" and "out" are different registers. */
31514 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
31515 emit_insn (SET (out, const0_rtx));
31516 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31517 emit_insn (SET (out_down,
31518 ORR (REV_LSHIFT (code, in_up, reverse_amount),
31519 out_down)));
31520 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31521 }
31522 else
31523 {
31524 /* Shifts by a constant greater than 31. */
31525 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31526
31527 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
31528 emit_insn (SET (out, const0_rtx));
31529 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31530 if (code == ASHIFTRT)
31531 emit_insn (gen_ashrsi3 (out_up, in_up,
31532 GEN_INT (31)));
31533 else
31534 emit_insn (SET (out_up, const0_rtx));
31535 }
31536 }
31537 else
31538 {
31539 /* We have a shift-by-register. */
31540 rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
31541
31542 /* This alternative requires the scratch registers. */
31543 gcc_assert (scratch1 && REG_P (scratch1));
31544 gcc_assert (scratch2 && REG_P (scratch2));
31545
31546 /* We will need the values "amount-32" and "32-amount" later.
31547 Swapping them around now allows the later code to be more general. */
31548 switch (code)
31549 {
31550 case ASHIFT:
31551 emit_insn (SUB_32 (scratch1, amount));
31552 emit_insn (RSB_32 (scratch2, amount));
31553 break;
31554 case ASHIFTRT:
31555 emit_insn (RSB_32 (scratch1, amount));
31556 /* Also set CC = amount > 32. */
31557 emit_insn (SUB_S_32 (scratch2, amount));
31558 break;
31559 case LSHIFTRT:
31560 emit_insn (RSB_32 (scratch1, amount));
31561 emit_insn (SUB_32 (scratch2, amount));
31562 break;
31563 default:
31564 gcc_unreachable ();
31565 }
31566
31567 /* Emit code like this:
31568
31569 arithmetic-left:
31570 out_down = in_down << amount;
31571 out_down = (in_up << (amount - 32)) | out_down;
31572 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31573 out_up = in_up << amount;
31574
31575 arithmetic-right:
31576 out_down = in_down >> amount;
31577 out_down = (in_up << (32 - amount)) | out_down;
31578 if (amount < 32)
31579 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31580 out_up = in_up << amount;
31581
31582 logical-right:
31583 out_down = in_down >> amount;
31584 out_down = (in_up << (32 - amount)) | out_down;
31585 if (amount < 32)
31586 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31587 out_up = in_up << amount;
31588
31589 The ARM and Thumb2 variants are the same but implemented slightly
31590 differently. If this were only called during expand we could just
31591 use the Thumb2 case and let combine do the right thing, but this
31592 can also be called from post-reload splitters. */
31593
31594 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31595
31596 if (!TARGET_THUMB2)
31597 {
31598 /* Emit code for ARM mode. */
31599 emit_insn (SET (out_down,
31600 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31601 if (code == ASHIFTRT)
31602 {
31603 rtx_code_label *done_label = gen_label_rtx ();
31604 emit_jump_insn (BRANCH (LT, done_label));
31605 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31606 out_down)));
31607 emit_label (done_label);
31608 }
31609 else
31610 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31611 out_down)));
31612 }
31613 else
31614 {
31615 /* Emit code for Thumb2 mode.
31616 Thumb2 can't do shift and or in one insn. */
31617 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31618 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31619
31620 if (code == ASHIFTRT)
31621 {
31622 rtx_code_label *done_label = gen_label_rtx ();
31623 emit_jump_insn (BRANCH (LT, done_label));
31624 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31625 emit_insn (SET (out_down, ORR (out_down, scratch2)));
31626 emit_label (done_label);
31627 }
31628 else
31629 {
31630 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31631 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31632 }
31633 }
31634
31635 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31636 }
31637
31638 #undef SUB_32
31639 #undef RSB_32
31640 #undef SUB_S_32
31641 #undef SET
31642 #undef SHIFT
31643 #undef LSHIFT
31644 #undef REV_LSHIFT
31645 #undef ORR
31646 #undef BRANCH
31647 }
31648
31649 /* Returns true if the pattern is a valid symbolic address, which is either a
31650 symbol_ref or (symbol_ref + addend).
31651
31652 According to the ARM ELF ABI, the initial addend of REL-type relocations
31653 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
31654 literal field of the instruction as a 16-bit signed value in the range
31655 -32768 <= A < 32768. */
31656
31657 bool
31658 arm_valid_symbolic_address_p (rtx addr)
31659 {
31660 rtx xop0, xop1 = NULL_RTX;
31661 rtx tmp = addr;
31662
31663 if (target_word_relocations)
31664 return false;
31665
31666 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
31667 return true;
31668
31669 /* (const (plus: symbol_ref const_int)) */
31670 if (GET_CODE (addr) == CONST)
31671 tmp = XEXP (addr, 0);
31672
31673 if (GET_CODE (tmp) == PLUS)
31674 {
31675 xop0 = XEXP (tmp, 0);
31676 xop1 = XEXP (tmp, 1);
31677
31678 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
31679 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
31680 }
31681
31682 return false;
31683 }
31684
31685 /* Returns true if a valid comparison operation and makes
31686 the operands in a form that is valid. */
31687 bool
31688 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31689 {
31690 enum rtx_code code = GET_CODE (*comparison);
31691 int code_int;
31692 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31693 ? GET_MODE (*op2) : GET_MODE (*op1);
31694
31695 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31696
31697 if (code == UNEQ || code == LTGT)
31698 return false;
31699
31700 code_int = (int)code;
31701 arm_canonicalize_comparison (&code_int, op1, op2, 0);
31702 PUT_CODE (*comparison, (enum rtx_code)code_int);
31703
31704 switch (mode)
31705 {
31706 case E_SImode:
31707 if (!arm_add_operand (*op1, mode))
31708 *op1 = force_reg (mode, *op1);
31709 if (!arm_add_operand (*op2, mode))
31710 *op2 = force_reg (mode, *op2);
31711 return true;
31712
31713 case E_DImode:
31714 /* gen_compare_reg() will sort out any invalid operands. */
31715 return true;
31716
31717 case E_HFmode:
31718 if (!TARGET_VFP_FP16INST)
31719 break;
31720 /* FP16 comparisons are done in SF mode. */
31721 mode = SFmode;
31722 *op1 = convert_to_mode (mode, *op1, 1);
31723 *op2 = convert_to_mode (mode, *op2, 1);
31724 /* Fall through. */
31725 case E_SFmode:
31726 case E_DFmode:
31727 if (!vfp_compare_operand (*op1, mode))
31728 *op1 = force_reg (mode, *op1);
31729 if (!vfp_compare_operand (*op2, mode))
31730 *op2 = force_reg (mode, *op2);
31731 return true;
31732 default:
31733 break;
31734 }
31735
31736 return false;
31737
31738 }
31739
31740 /* Maximum number of instructions to set block of memory. */
31741 static int
31742 arm_block_set_max_insns (void)
31743 {
31744 if (optimize_function_for_size_p (cfun))
31745 return 4;
31746 else
31747 return current_tune->max_insns_inline_memset;
31748 }
31749
31750 /* Return TRUE if it's profitable to set block of memory for
31751 non-vectorized case. VAL is the value to set the memory
31752 with. LENGTH is the number of bytes to set. ALIGN is the
31753 alignment of the destination memory in bytes. UNALIGNED_P
31754 is TRUE if we can only set the memory with instructions
31755 meeting alignment requirements. USE_STRD_P is TRUE if we
31756 can use strd to set the memory. */
31757 static bool
31758 arm_block_set_non_vect_profit_p (rtx val,
31759 unsigned HOST_WIDE_INT length,
31760 unsigned HOST_WIDE_INT align,
31761 bool unaligned_p, bool use_strd_p)
31762 {
31763 int num = 0;
31764 /* For leftovers in bytes of 0-7, we can set the memory block using
31765 strb/strh/str with minimum instruction number. */
31766 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
31767
31768 if (unaligned_p)
31769 {
31770 num = arm_const_inline_cost (SET, val);
31771 num += length / align + length % align;
31772 }
31773 else if (use_strd_p)
31774 {
31775 num = arm_const_double_inline_cost (val);
31776 num += (length >> 3) + leftover[length & 7];
31777 }
31778 else
31779 {
31780 num = arm_const_inline_cost (SET, val);
31781 num += (length >> 2) + leftover[length & 3];
31782 }
31783
31784 /* We may be able to combine last pair STRH/STRB into a single STR
31785 by shifting one byte back. */
31786 if (unaligned_access && length > 3 && (length & 3) == 3)
31787 num--;
31788
31789 return (num <= arm_block_set_max_insns ());
31790 }
31791
31792 /* Return TRUE if it's profitable to set block of memory for
31793 vectorized case. LENGTH is the number of bytes to set.
31794 ALIGN is the alignment of destination memory in bytes.
31795 MODE is the vector mode used to set the memory. */
31796 static bool
31797 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
31798 unsigned HOST_WIDE_INT align,
31799 machine_mode mode)
31800 {
31801 int num;
31802 bool unaligned_p = ((align & 3) != 0);
31803 unsigned int nelt = GET_MODE_NUNITS (mode);
31804
31805 /* Instruction loading constant value. */
31806 num = 1;
31807 /* Instructions storing the memory. */
31808 num += (length + nelt - 1) / nelt;
31809 /* Instructions adjusting the address expression. Only need to
31810 adjust address expression if it's 4 bytes aligned and bytes
31811 leftover can only be stored by mis-aligned store instruction. */
31812 if (!unaligned_p && (length & 3) != 0)
31813 num++;
31814
31815 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
31816 if (!unaligned_p && mode == V16QImode)
31817 num--;
31818
31819 return (num <= arm_block_set_max_insns ());
31820 }
31821
31822 /* Set a block of memory using vectorization instructions for the
31823 unaligned case. We fill the first LENGTH bytes of the memory
31824 area starting from DSTBASE with byte constant VALUE. ALIGN is
31825 the alignment requirement of memory. Return TRUE if succeeded. */
31826 static bool
31827 arm_block_set_unaligned_vect (rtx dstbase,
31828 unsigned HOST_WIDE_INT length,
31829 unsigned HOST_WIDE_INT value,
31830 unsigned HOST_WIDE_INT align)
31831 {
31832 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
31833 rtx dst, mem;
31834 rtx val_vec, reg;
31835 rtx (*gen_func) (rtx, rtx);
31836 machine_mode mode;
31837 unsigned HOST_WIDE_INT v = value;
31838 unsigned int offset = 0;
31839 gcc_assert ((align & 0x3) != 0);
31840 nelt_v8 = GET_MODE_NUNITS (V8QImode);
31841 nelt_v16 = GET_MODE_NUNITS (V16QImode);
31842 if (length >= nelt_v16)
31843 {
31844 mode = V16QImode;
31845 gen_func = gen_movmisalignv16qi;
31846 }
31847 else
31848 {
31849 mode = V8QImode;
31850 gen_func = gen_movmisalignv8qi;
31851 }
31852 nelt_mode = GET_MODE_NUNITS (mode);
31853 gcc_assert (length >= nelt_mode);
31854 /* Skip if it isn't profitable. */
31855 if (!arm_block_set_vect_profit_p (length, align, mode))
31856 return false;
31857
31858 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31859 mem = adjust_automodify_address (dstbase, mode, dst, offset);
31860
31861 v = sext_hwi (v, BITS_PER_WORD);
31862
31863 reg = gen_reg_rtx (mode);
31864 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
31865 /* Emit instruction loading the constant value. */
31866 emit_move_insn (reg, val_vec);
31867
31868 /* Handle nelt_mode bytes in a vector. */
31869 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
31870 {
31871 emit_insn ((*gen_func) (mem, reg));
31872 if (i + 2 * nelt_mode <= length)
31873 {
31874 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
31875 offset += nelt_mode;
31876 mem = adjust_automodify_address (dstbase, mode, dst, offset);
31877 }
31878 }
31879
31880 /* If there are not less than nelt_v8 bytes leftover, we must be in
31881 V16QI mode. */
31882 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
31883
31884 /* Handle (8, 16) bytes leftover. */
31885 if (i + nelt_v8 < length)
31886 {
31887 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
31888 offset += length - i;
31889 mem = adjust_automodify_address (dstbase, mode, dst, offset);
31890
31891 /* We are shifting bytes back, set the alignment accordingly. */
31892 if ((length & 1) != 0 && align >= 2)
31893 set_mem_align (mem, BITS_PER_UNIT);
31894
31895 emit_insn (gen_movmisalignv16qi (mem, reg));
31896 }
31897 /* Handle (0, 8] bytes leftover. */
31898 else if (i < length && i + nelt_v8 >= length)
31899 {
31900 if (mode == V16QImode)
31901 reg = gen_lowpart (V8QImode, reg);
31902
31903 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
31904 + (nelt_mode - nelt_v8))));
31905 offset += (length - i) + (nelt_mode - nelt_v8);
31906 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
31907
31908 /* We are shifting bytes back, set the alignment accordingly. */
31909 if ((length & 1) != 0 && align >= 2)
31910 set_mem_align (mem, BITS_PER_UNIT);
31911
31912 emit_insn (gen_movmisalignv8qi (mem, reg));
31913 }
31914
31915 return true;
31916 }
31917
31918 /* Set a block of memory using vectorization instructions for the
31919 aligned case. We fill the first LENGTH bytes of the memory area
31920 starting from DSTBASE with byte constant VALUE. ALIGN is the
31921 alignment requirement of memory. Return TRUE if succeeded. */
31922 static bool
31923 arm_block_set_aligned_vect (rtx dstbase,
31924 unsigned HOST_WIDE_INT length,
31925 unsigned HOST_WIDE_INT value,
31926 unsigned HOST_WIDE_INT align)
31927 {
31928 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
31929 rtx dst, addr, mem;
31930 rtx val_vec, reg;
31931 machine_mode mode;
31932 unsigned int offset = 0;
31933
31934 gcc_assert ((align & 0x3) == 0);
31935 nelt_v8 = GET_MODE_NUNITS (V8QImode);
31936 nelt_v16 = GET_MODE_NUNITS (V16QImode);
31937 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
31938 mode = V16QImode;
31939 else
31940 mode = V8QImode;
31941
31942 nelt_mode = GET_MODE_NUNITS (mode);
31943 gcc_assert (length >= nelt_mode);
31944 /* Skip if it isn't profitable. */
31945 if (!arm_block_set_vect_profit_p (length, align, mode))
31946 return false;
31947
31948 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31949
31950 reg = gen_reg_rtx (mode);
31951 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
31952 /* Emit instruction loading the constant value. */
31953 emit_move_insn (reg, val_vec);
31954
31955 i = 0;
31956 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
31957 if (mode == V16QImode)
31958 {
31959 mem = adjust_automodify_address (dstbase, mode, dst, offset);
31960 emit_insn (gen_movmisalignv16qi (mem, reg));
31961 i += nelt_mode;
31962 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
31963 if (i + nelt_v8 < length && i + nelt_v16 > length)
31964 {
31965 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31966 offset += length - nelt_mode;
31967 mem = adjust_automodify_address (dstbase, mode, dst, offset);
31968 /* We are shifting bytes back, set the alignment accordingly. */
31969 if ((length & 0x3) == 0)
31970 set_mem_align (mem, BITS_PER_UNIT * 4);
31971 else if ((length & 0x1) == 0)
31972 set_mem_align (mem, BITS_PER_UNIT * 2);
31973 else
31974 set_mem_align (mem, BITS_PER_UNIT);
31975
31976 emit_insn (gen_movmisalignv16qi (mem, reg));
31977 return true;
31978 }
31979 /* Fall through for bytes leftover. */
31980 mode = V8QImode;
31981 nelt_mode = GET_MODE_NUNITS (mode);
31982 reg = gen_lowpart (V8QImode, reg);
31983 }
31984
31985 /* Handle 8 bytes in a vector. */
31986 for (; (i + nelt_mode <= length); i += nelt_mode)
31987 {
31988 addr = plus_constant (Pmode, dst, i);
31989 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
31990 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
31991 emit_move_insn (mem, reg);
31992 else
31993 emit_insn (gen_unaligned_storev8qi (mem, reg));
31994 }
31995
31996 /* Handle single word leftover by shifting 4 bytes back. We can
31997 use aligned access for this case. */
31998 if (i + UNITS_PER_WORD == length)
31999 {
32000 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32001 offset += i - UNITS_PER_WORD;
32002 mem = adjust_automodify_address (dstbase, mode, addr, offset);
32003 /* We are shifting 4 bytes back, set the alignment accordingly. */
32004 if (align > UNITS_PER_WORD)
32005 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32006
32007 emit_insn (gen_unaligned_storev8qi (mem, reg));
32008 }
32009 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32010 We have to use unaligned access for this case. */
32011 else if (i < length)
32012 {
32013 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32014 offset += length - nelt_mode;
32015 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32016 /* We are shifting bytes back, set the alignment accordingly. */
32017 if ((length & 1) == 0)
32018 set_mem_align (mem, BITS_PER_UNIT * 2);
32019 else
32020 set_mem_align (mem, BITS_PER_UNIT);
32021
32022 emit_insn (gen_movmisalignv8qi (mem, reg));
32023 }
32024
32025 return true;
32026 }
32027
32028 /* Set a block of memory using plain strh/strb instructions, only
32029 using instructions allowed by ALIGN on processor. We fill the
32030 first LENGTH bytes of the memory area starting from DSTBASE
32031 with byte constant VALUE. ALIGN is the alignment requirement
32032 of memory. */
32033 static bool
32034 arm_block_set_unaligned_non_vect (rtx dstbase,
32035 unsigned HOST_WIDE_INT length,
32036 unsigned HOST_WIDE_INT value,
32037 unsigned HOST_WIDE_INT align)
32038 {
32039 unsigned int i;
32040 rtx dst, addr, mem;
32041 rtx val_exp, val_reg, reg;
32042 machine_mode mode;
32043 HOST_WIDE_INT v = value;
32044
32045 gcc_assert (align == 1 || align == 2);
32046
32047 if (align == 2)
32048 v |= (value << BITS_PER_UNIT);
32049
32050 v = sext_hwi (v, BITS_PER_WORD);
32051 val_exp = GEN_INT (v);
32052 /* Skip if it isn't profitable. */
32053 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32054 align, true, false))
32055 return false;
32056
32057 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32058 mode = (align == 2 ? HImode : QImode);
32059 val_reg = force_reg (SImode, val_exp);
32060 reg = gen_lowpart (mode, val_reg);
32061
32062 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
32063 {
32064 addr = plus_constant (Pmode, dst, i);
32065 mem = adjust_automodify_address (dstbase, mode, addr, i);
32066 emit_move_insn (mem, reg);
32067 }
32068
32069 /* Handle single byte leftover. */
32070 if (i + 1 == length)
32071 {
32072 reg = gen_lowpart (QImode, val_reg);
32073 addr = plus_constant (Pmode, dst, i);
32074 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32075 emit_move_insn (mem, reg);
32076 i++;
32077 }
32078
32079 gcc_assert (i == length);
32080 return true;
32081 }
32082
32083 /* Set a block of memory using plain strd/str/strh/strb instructions,
32084 to permit unaligned copies on processors which support unaligned
32085 semantics for those instructions. We fill the first LENGTH bytes
32086 of the memory area starting from DSTBASE with byte constant VALUE.
32087 ALIGN is the alignment requirement of memory. */
32088 static bool
32089 arm_block_set_aligned_non_vect (rtx dstbase,
32090 unsigned HOST_WIDE_INT length,
32091 unsigned HOST_WIDE_INT value,
32092 unsigned HOST_WIDE_INT align)
32093 {
32094 unsigned int i;
32095 rtx dst, addr, mem;
32096 rtx val_exp, val_reg, reg;
32097 unsigned HOST_WIDE_INT v;
32098 bool use_strd_p;
32099
32100 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32101 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32102
32103 v = (value | (value << 8) | (value << 16) | (value << 24));
32104 if (length < UNITS_PER_WORD)
32105 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32106
32107 if (use_strd_p)
32108 v |= (v << BITS_PER_WORD);
32109 else
32110 v = sext_hwi (v, BITS_PER_WORD);
32111
32112 val_exp = GEN_INT (v);
32113 /* Skip if it isn't profitable. */
32114 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32115 align, false, use_strd_p))
32116 {
32117 if (!use_strd_p)
32118 return false;
32119
32120 /* Try without strd. */
32121 v = (v >> BITS_PER_WORD);
32122 v = sext_hwi (v, BITS_PER_WORD);
32123 val_exp = GEN_INT (v);
32124 use_strd_p = false;
32125 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32126 align, false, use_strd_p))
32127 return false;
32128 }
32129
32130 i = 0;
32131 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32132 /* Handle double words using strd if possible. */
32133 if (use_strd_p)
32134 {
32135 val_reg = force_reg (DImode, val_exp);
32136 reg = val_reg;
32137 for (; (i + 8 <= length); i += 8)
32138 {
32139 addr = plus_constant (Pmode, dst, i);
32140 mem = adjust_automodify_address (dstbase, DImode, addr, i);
32141 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32142 emit_move_insn (mem, reg);
32143 else
32144 emit_insn (gen_unaligned_storedi (mem, reg));
32145 }
32146 }
32147 else
32148 val_reg = force_reg (SImode, val_exp);
32149
32150 /* Handle words. */
32151 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32152 for (; (i + 4 <= length); i += 4)
32153 {
32154 addr = plus_constant (Pmode, dst, i);
32155 mem = adjust_automodify_address (dstbase, SImode, addr, i);
32156 if ((align & 3) == 0)
32157 emit_move_insn (mem, reg);
32158 else
32159 emit_insn (gen_unaligned_storesi (mem, reg));
32160 }
32161
32162 /* Merge last pair of STRH and STRB into a STR if possible. */
32163 if (unaligned_access && i > 0 && (i + 3) == length)
32164 {
32165 addr = plus_constant (Pmode, dst, i - 1);
32166 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32167 /* We are shifting one byte back, set the alignment accordingly. */
32168 if ((align & 1) == 0)
32169 set_mem_align (mem, BITS_PER_UNIT);
32170
32171 /* Most likely this is an unaligned access, and we can't tell at
32172 compilation time. */
32173 emit_insn (gen_unaligned_storesi (mem, reg));
32174 return true;
32175 }
32176
32177 /* Handle half word leftover. */
32178 if (i + 2 <= length)
32179 {
32180 reg = gen_lowpart (HImode, val_reg);
32181 addr = plus_constant (Pmode, dst, i);
32182 mem = adjust_automodify_address (dstbase, HImode, addr, i);
32183 if ((align & 1) == 0)
32184 emit_move_insn (mem, reg);
32185 else
32186 emit_insn (gen_unaligned_storehi (mem, reg));
32187
32188 i += 2;
32189 }
32190
32191 /* Handle single byte leftover. */
32192 if (i + 1 == length)
32193 {
32194 reg = gen_lowpart (QImode, val_reg);
32195 addr = plus_constant (Pmode, dst, i);
32196 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32197 emit_move_insn (mem, reg);
32198 }
32199
32200 return true;
32201 }
32202
32203 /* Set a block of memory using vectorization instructions for both
32204 aligned and unaligned cases. We fill the first LENGTH bytes of
32205 the memory area starting from DSTBASE with byte constant VALUE.
32206 ALIGN is the alignment requirement of memory. */
32207 static bool
32208 arm_block_set_vect (rtx dstbase,
32209 unsigned HOST_WIDE_INT length,
32210 unsigned HOST_WIDE_INT value,
32211 unsigned HOST_WIDE_INT align)
32212 {
32213 /* Check whether we need to use unaligned store instruction. */
32214 if (((align & 3) != 0 || (length & 3) != 0)
32215 /* Check whether unaligned store instruction is available. */
32216 && (!unaligned_access || BYTES_BIG_ENDIAN))
32217 return false;
32218
32219 if ((align & 3) == 0)
32220 return arm_block_set_aligned_vect (dstbase, length, value, align);
32221 else
32222 return arm_block_set_unaligned_vect (dstbase, length, value, align);
32223 }
32224
32225 /* Expand string store operation. Firstly we try to do that by using
32226 vectorization instructions, then try with ARM unaligned access and
32227 double-word store if profitable. OPERANDS[0] is the destination,
32228 OPERANDS[1] is the number of bytes, operands[2] is the value to
32229 initialize the memory, OPERANDS[3] is the known alignment of the
32230 destination. */
32231 bool
32232 arm_gen_setmem (rtx *operands)
32233 {
32234 rtx dstbase = operands[0];
32235 unsigned HOST_WIDE_INT length;
32236 unsigned HOST_WIDE_INT value;
32237 unsigned HOST_WIDE_INT align;
32238
32239 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32240 return false;
32241
32242 length = UINTVAL (operands[1]);
32243 if (length > 64)
32244 return false;
32245
32246 value = (UINTVAL (operands[2]) & 0xFF);
32247 align = UINTVAL (operands[3]);
32248 if (TARGET_NEON && length >= 8
32249 && current_tune->string_ops_prefer_neon
32250 && arm_block_set_vect (dstbase, length, value, align))
32251 return true;
32252
32253 if (!unaligned_access && (align & 3) != 0)
32254 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32255
32256 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32257 }
32258
32259
32260 static bool
32261 arm_macro_fusion_p (void)
32262 {
32263 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
32264 }
32265
32266 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
32267 for MOVW / MOVT macro fusion. */
32268
32269 static bool
32270 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
32271 {
32272 /* We are trying to fuse
32273 movw imm / movt imm
32274 instructions as a group that gets scheduled together. */
32275
32276 rtx set_dest = SET_DEST (curr_set);
32277
32278 if (GET_MODE (set_dest) != SImode)
32279 return false;
32280
32281 /* We are trying to match:
32282 prev (movw) == (set (reg r0) (const_int imm16))
32283 curr (movt) == (set (zero_extract (reg r0)
32284 (const_int 16)
32285 (const_int 16))
32286 (const_int imm16_1))
32287 or
32288 prev (movw) == (set (reg r1)
32289 (high (symbol_ref ("SYM"))))
32290 curr (movt) == (set (reg r0)
32291 (lo_sum (reg r1)
32292 (symbol_ref ("SYM")))) */
32293
32294 if (GET_CODE (set_dest) == ZERO_EXTRACT)
32295 {
32296 if (CONST_INT_P (SET_SRC (curr_set))
32297 && CONST_INT_P (SET_SRC (prev_set))
32298 && REG_P (XEXP (set_dest, 0))
32299 && REG_P (SET_DEST (prev_set))
32300 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
32301 return true;
32302
32303 }
32304 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
32305 && REG_P (SET_DEST (curr_set))
32306 && REG_P (SET_DEST (prev_set))
32307 && GET_CODE (SET_SRC (prev_set)) == HIGH
32308 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
32309 return true;
32310
32311 return false;
32312 }
32313
32314 static bool
32315 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
32316 {
32317 rtx prev_set = single_set (prev);
32318 rtx curr_set = single_set (curr);
32319
32320 if (!prev_set
32321 || !curr_set)
32322 return false;
32323
32324 if (any_condjump_p (curr))
32325 return false;
32326
32327 if (!arm_macro_fusion_p ())
32328 return false;
32329
32330 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
32331 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
32332 return true;
32333
32334 return false;
32335 }
32336
32337 /* Return true iff the instruction fusion described by OP is enabled. */
32338 bool
32339 arm_fusion_enabled_p (tune_params::fuse_ops op)
32340 {
32341 return current_tune->fusible_ops & op;
32342 }
32343
32344 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
32345 scheduled for speculative execution. Reject the long-running division
32346 and square-root instructions. */
32347
32348 static bool
32349 arm_sched_can_speculate_insn (rtx_insn *insn)
32350 {
32351 switch (get_attr_type (insn))
32352 {
32353 case TYPE_SDIV:
32354 case TYPE_UDIV:
32355 case TYPE_FDIVS:
32356 case TYPE_FDIVD:
32357 case TYPE_FSQRTS:
32358 case TYPE_FSQRTD:
32359 case TYPE_NEON_FP_SQRT_S:
32360 case TYPE_NEON_FP_SQRT_D:
32361 case TYPE_NEON_FP_SQRT_S_Q:
32362 case TYPE_NEON_FP_SQRT_D_Q:
32363 case TYPE_NEON_FP_DIV_S:
32364 case TYPE_NEON_FP_DIV_D:
32365 case TYPE_NEON_FP_DIV_S_Q:
32366 case TYPE_NEON_FP_DIV_D_Q:
32367 return false;
32368 default:
32369 return true;
32370 }
32371 }
32372
32373 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
32374
32375 static unsigned HOST_WIDE_INT
32376 arm_asan_shadow_offset (void)
32377 {
32378 return HOST_WIDE_INT_1U << 29;
32379 }
32380
32381
32382 /* This is a temporary fix for PR60655. Ideally we need
32383 to handle most of these cases in the generic part but
32384 currently we reject minus (..) (sym_ref). We try to
32385 ameliorate the case with minus (sym_ref1) (sym_ref2)
32386 where they are in the same section. */
32387
32388 static bool
32389 arm_const_not_ok_for_debug_p (rtx p)
32390 {
32391 tree decl_op0 = NULL;
32392 tree decl_op1 = NULL;
32393
32394 if (GET_CODE (p) == UNSPEC)
32395 return true;
32396 if (GET_CODE (p) == MINUS)
32397 {
32398 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
32399 {
32400 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
32401 if (decl_op1
32402 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
32403 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
32404 {
32405 if ((VAR_P (decl_op1)
32406 || TREE_CODE (decl_op1) == CONST_DECL)
32407 && (VAR_P (decl_op0)
32408 || TREE_CODE (decl_op0) == CONST_DECL))
32409 return (get_variable_section (decl_op1, false)
32410 != get_variable_section (decl_op0, false));
32411
32412 if (TREE_CODE (decl_op1) == LABEL_DECL
32413 && TREE_CODE (decl_op0) == LABEL_DECL)
32414 return (DECL_CONTEXT (decl_op1)
32415 != DECL_CONTEXT (decl_op0));
32416 }
32417
32418 return true;
32419 }
32420 }
32421
32422 return false;
32423 }
32424
32425 /* return TRUE if x is a reference to a value in a constant pool */
32426 extern bool
32427 arm_is_constant_pool_ref (rtx x)
32428 {
32429 return (MEM_P (x)
32430 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
32431 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
32432 }
32433
32434 /* Remember the last target of arm_set_current_function. */
32435 static GTY(()) tree arm_previous_fndecl;
32436
32437 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
32438
32439 void
32440 save_restore_target_globals (tree new_tree)
32441 {
32442 /* If we have a previous state, use it. */
32443 if (TREE_TARGET_GLOBALS (new_tree))
32444 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32445 else if (new_tree == target_option_default_node)
32446 restore_target_globals (&default_target_globals);
32447 else
32448 {
32449 /* Call target_reinit and save the state for TARGET_GLOBALS. */
32450 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
32451 }
32452
32453 arm_option_params_internal ();
32454 }
32455
32456 /* Invalidate arm_previous_fndecl. */
32457
32458 void
32459 arm_reset_previous_fndecl (void)
32460 {
32461 arm_previous_fndecl = NULL_TREE;
32462 }
32463
32464 /* Establish appropriate back-end context for processing the function
32465 FNDECL. The argument might be NULL to indicate processing at top
32466 level, outside of any function scope. */
32467
32468 static void
32469 arm_set_current_function (tree fndecl)
32470 {
32471 if (!fndecl || fndecl == arm_previous_fndecl)
32472 return;
32473
32474 tree old_tree = (arm_previous_fndecl
32475 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
32476 : NULL_TREE);
32477
32478 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
32479
32480 /* If current function has no attributes but previous one did,
32481 use the default node. */
32482 if (! new_tree && old_tree)
32483 new_tree = target_option_default_node;
32484
32485 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
32486 the default have been handled by save_restore_target_globals from
32487 arm_pragma_target_parse. */
32488 if (old_tree == new_tree)
32489 return;
32490
32491 arm_previous_fndecl = fndecl;
32492
32493 /* First set the target options. */
32494 cl_target_option_restore (&global_options, &global_options_set,
32495 TREE_TARGET_OPTION (new_tree));
32496
32497 save_restore_target_globals (new_tree);
32498
32499 arm_override_options_after_change_1 (&global_options, &global_options_set);
32500 }
32501
32502 /* Implement TARGET_OPTION_PRINT. */
32503
32504 static void
32505 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
32506 {
32507 int flags = ptr->x_target_flags;
32508 const char *fpu_name;
32509
32510 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
32511 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
32512
32513 fprintf (file, "%*sselected isa %s\n", indent, "",
32514 TARGET_THUMB2_P (flags) ? "thumb2" :
32515 TARGET_THUMB_P (flags) ? "thumb1" :
32516 "arm");
32517
32518 if (ptr->x_arm_arch_string)
32519 fprintf (file, "%*sselected architecture %s\n", indent, "",
32520 ptr->x_arm_arch_string);
32521
32522 if (ptr->x_arm_cpu_string)
32523 fprintf (file, "%*sselected CPU %s\n", indent, "",
32524 ptr->x_arm_cpu_string);
32525
32526 if (ptr->x_arm_tune_string)
32527 fprintf (file, "%*sselected tune %s\n", indent, "",
32528 ptr->x_arm_tune_string);
32529
32530 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
32531 }
32532
32533 /* Hook to determine if one function can safely inline another. */
32534
32535 static bool
32536 arm_can_inline_p (tree caller, tree callee)
32537 {
32538 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
32539 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
32540 bool can_inline = true;
32541
32542 struct cl_target_option *caller_opts
32543 = TREE_TARGET_OPTION (caller_tree ? caller_tree
32544 : target_option_default_node);
32545
32546 struct cl_target_option *callee_opts
32547 = TREE_TARGET_OPTION (callee_tree ? callee_tree
32548 : target_option_default_node);
32549
32550 if (callee_opts == caller_opts)
32551 return true;
32552
32553 /* Callee's ISA features should be a subset of the caller's. */
32554 struct arm_build_target caller_target;
32555 struct arm_build_target callee_target;
32556 caller_target.isa = sbitmap_alloc (isa_num_bits);
32557 callee_target.isa = sbitmap_alloc (isa_num_bits);
32558
32559 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
32560 false);
32561 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
32562 false);
32563 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
32564 can_inline = false;
32565
32566 sbitmap_free (caller_target.isa);
32567 sbitmap_free (callee_target.isa);
32568
32569 /* OK to inline between different modes.
32570 Function with mode specific instructions, e.g using asm,
32571 must be explicitly protected with noinline. */
32572 return can_inline;
32573 }
32574
32575 /* Hook to fix function's alignment affected by target attribute. */
32576
32577 static void
32578 arm_relayout_function (tree fndecl)
32579 {
32580 if (DECL_USER_ALIGN (fndecl))
32581 return;
32582
32583 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
32584
32585 if (!callee_tree)
32586 callee_tree = target_option_default_node;
32587
32588 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
32589 SET_DECL_ALIGN
32590 (fndecl,
32591 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
32592 }
32593
32594 /* Inner function to process the attribute((target(...))), take an argument and
32595 set the current options from the argument. If we have a list, recursively
32596 go over the list. */
32597
32598 static bool
32599 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
32600 {
32601 if (TREE_CODE (args) == TREE_LIST)
32602 {
32603 bool ret = true;
32604
32605 for (; args; args = TREE_CHAIN (args))
32606 if (TREE_VALUE (args)
32607 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
32608 ret = false;
32609 return ret;
32610 }
32611
32612 else if (TREE_CODE (args) != STRING_CST)
32613 {
32614 error ("attribute %<target%> argument not a string");
32615 return false;
32616 }
32617
32618 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
32619 char *q;
32620
32621 while ((q = strtok (argstr, ",")) != NULL)
32622 {
32623 argstr = NULL;
32624 if (!strcmp (q, "thumb"))
32625 {
32626 opts->x_target_flags |= MASK_THUMB;
32627 if (TARGET_FDPIC && !arm_arch_thumb2)
32628 sorry ("FDPIC mode is not supported in Thumb-1 mode");
32629 }
32630
32631 else if (!strcmp (q, "arm"))
32632 opts->x_target_flags &= ~MASK_THUMB;
32633
32634 else if (!strcmp (q, "general-regs-only"))
32635 opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
32636
32637 else if (!strncmp (q, "fpu=", 4))
32638 {
32639 int fpu_index;
32640 if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
32641 &fpu_index, CL_TARGET))
32642 {
32643 error ("invalid fpu for target attribute or pragma %qs", q);
32644 return false;
32645 }
32646 if (fpu_index == TARGET_FPU_auto)
32647 {
32648 /* This doesn't really make sense until we support
32649 general dynamic selection of the architecture and all
32650 sub-features. */
32651 sorry ("auto fpu selection not currently permitted here");
32652 return false;
32653 }
32654 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
32655 }
32656 else if (!strncmp (q, "arch=", 5))
32657 {
32658 char *arch = q + 5;
32659 const arch_option *arm_selected_arch
32660 = arm_parse_arch_option_name (all_architectures, "arch", arch);
32661
32662 if (!arm_selected_arch)
32663 {
32664 error ("invalid architecture for target attribute or pragma %qs",
32665 q);
32666 return false;
32667 }
32668
32669 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
32670 }
32671 else if (q[0] == '+')
32672 {
32673 opts->x_arm_arch_string
32674 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
32675 }
32676 else
32677 {
32678 error ("unknown target attribute or pragma %qs", q);
32679 return false;
32680 }
32681 }
32682
32683 return true;
32684 }
32685
32686 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
32687
32688 tree
32689 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
32690 struct gcc_options *opts_set)
32691 {
32692 struct cl_target_option cl_opts;
32693
32694 if (!arm_valid_target_attribute_rec (args, opts))
32695 return NULL_TREE;
32696
32697 cl_target_option_save (&cl_opts, opts, opts_set);
32698 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
32699 arm_option_check_internal (opts);
32700 /* Do any overrides, such as global options arch=xxx.
32701 We do this since arm_active_target was overridden. */
32702 arm_option_reconfigure_globals ();
32703 arm_options_perform_arch_sanity_checks ();
32704 arm_option_override_internal (opts, opts_set);
32705
32706 return build_target_option_node (opts, opts_set);
32707 }
32708
32709 static void
32710 add_attribute (const char * mode, tree *attributes)
32711 {
32712 size_t len = strlen (mode);
32713 tree value = build_string (len, mode);
32714
32715 TREE_TYPE (value) = build_array_type (char_type_node,
32716 build_index_type (size_int (len)));
32717
32718 *attributes = tree_cons (get_identifier ("target"),
32719 build_tree_list (NULL_TREE, value),
32720 *attributes);
32721 }
32722
32723 /* For testing. Insert thumb or arm modes alternatively on functions. */
32724
32725 static void
32726 arm_insert_attributes (tree fndecl, tree * attributes)
32727 {
32728 const char *mode;
32729
32730 if (! TARGET_FLIP_THUMB)
32731 return;
32732
32733 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
32734 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
32735 return;
32736
32737 /* Nested definitions must inherit mode. */
32738 if (current_function_decl)
32739 {
32740 mode = TARGET_THUMB ? "thumb" : "arm";
32741 add_attribute (mode, attributes);
32742 return;
32743 }
32744
32745 /* If there is already a setting don't change it. */
32746 if (lookup_attribute ("target", *attributes) != NULL)
32747 return;
32748
32749 mode = thumb_flipper ? "thumb" : "arm";
32750 add_attribute (mode, attributes);
32751
32752 thumb_flipper = !thumb_flipper;
32753 }
32754
32755 /* Hook to validate attribute((target("string"))). */
32756
32757 static bool
32758 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
32759 tree args, int ARG_UNUSED (flags))
32760 {
32761 bool ret = true;
32762 struct gcc_options func_options, func_options_set;
32763 tree cur_tree, new_optimize;
32764 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
32765
32766 /* Get the optimization options of the current function. */
32767 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32768
32769 /* If the function changed the optimization levels as well as setting target
32770 options, start with the optimizations specified. */
32771 if (!func_optimize)
32772 func_optimize = optimization_default_node;
32773
32774 /* Init func_options. */
32775 memset (&func_options, 0, sizeof (func_options));
32776 init_options_struct (&func_options, NULL);
32777 lang_hooks.init_options_struct (&func_options);
32778 memset (&func_options_set, 0, sizeof (func_options_set));
32779
32780 /* Initialize func_options to the defaults. */
32781 cl_optimization_restore (&func_options, &func_options_set,
32782 TREE_OPTIMIZATION (func_optimize));
32783
32784 cl_target_option_restore (&func_options, &func_options_set,
32785 TREE_TARGET_OPTION (target_option_default_node));
32786
32787 /* Set func_options flags with new target mode. */
32788 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
32789 &func_options_set);
32790
32791 if (cur_tree == NULL_TREE)
32792 ret = false;
32793
32794 new_optimize = build_optimization_node (&func_options, &func_options_set);
32795
32796 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
32797
32798 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
32799
32800 return ret;
32801 }
32802
32803 /* Match an ISA feature bitmap to a named FPU. We always use the
32804 first entry that exactly matches the feature set, so that we
32805 effectively canonicalize the FPU name for the assembler. */
32806 static const char*
32807 arm_identify_fpu_from_isa (sbitmap isa)
32808 {
32809 auto_sbitmap fpubits (isa_num_bits);
32810 auto_sbitmap cand_fpubits (isa_num_bits);
32811
32812 bitmap_and (fpubits, isa, isa_all_fpubits_internal);
32813
32814 /* If there are no ISA feature bits relating to the FPU, we must be
32815 doing soft-float. */
32816 if (bitmap_empty_p (fpubits))
32817 return "softvfp";
32818
32819 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
32820 {
32821 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
32822 if (bitmap_equal_p (fpubits, cand_fpubits))
32823 return all_fpus[i].name;
32824 }
32825 /* We must find an entry, or things have gone wrong. */
32826 gcc_unreachable ();
32827 }
32828
32829 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
32830 by the function fndecl. */
32831 void
32832 arm_declare_function_name (FILE *stream, const char *name, tree decl)
32833 {
32834 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
32835
32836 struct cl_target_option *targ_options;
32837 if (target_parts)
32838 targ_options = TREE_TARGET_OPTION (target_parts);
32839 else
32840 targ_options = TREE_TARGET_OPTION (target_option_current_node);
32841 gcc_assert (targ_options);
32842
32843 /* Only update the assembler .arch string if it is distinct from the last
32844 such string we printed. arch_to_print is set conditionally in case
32845 targ_options->x_arm_arch_string is NULL which can be the case
32846 when cc1 is invoked directly without passing -march option. */
32847 std::string arch_to_print;
32848 if (targ_options->x_arm_arch_string)
32849 arch_to_print = targ_options->x_arm_arch_string;
32850
32851 if (arch_to_print != arm_last_printed_arch_string)
32852 {
32853 std::string arch_name
32854 = arch_to_print.substr (0, arch_to_print.find ("+"));
32855 asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
32856 const arch_option *arch
32857 = arm_parse_arch_option_name (all_architectures, "-march",
32858 targ_options->x_arm_arch_string);
32859 auto_sbitmap opt_bits (isa_num_bits);
32860
32861 gcc_assert (arch);
32862 if (arch->common.extensions)
32863 {
32864 for (const struct cpu_arch_extension *opt = arch->common.extensions;
32865 opt->name != NULL;
32866 opt++)
32867 {
32868 if (!opt->remove)
32869 {
32870 arm_initialize_isa (opt_bits, opt->isa_bits);
32871 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft"
32872 and "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and
32873 MVE with floating point instructions is disabled. So the
32874 following check restricts the printing of ".arch_extension
32875 mve" and ".arch_extension fp" (for mve.fp) in the assembly
32876 file. MVE needs this special behaviour because the
32877 feature bit "mve" and "mve_float" are not part of
32878 "fpu bits", so they are not cleared when -mfloat-abi=soft
32879 (i.e nofp) but the marco TARGET_HAVE_MVE and
32880 TARGET_HAVE_MVE_FLOAT are disabled. */
32881 if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
32882 || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
32883 && !TARGET_HAVE_MVE_FLOAT))
32884 continue;
32885 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
32886 && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
32887 asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
32888 opt->name);
32889 }
32890 }
32891 }
32892
32893 arm_last_printed_arch_string = arch_to_print;
32894 }
32895
32896 fprintf (stream, "\t.syntax unified\n");
32897
32898 if (TARGET_THUMB)
32899 {
32900 if (is_called_in_ARM_mode (decl)
32901 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
32902 && cfun->is_thunk))
32903 fprintf (stream, "\t.code 32\n");
32904 else if (TARGET_THUMB1)
32905 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
32906 else
32907 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
32908 }
32909 else
32910 fprintf (stream, "\t.arm\n");
32911
32912 std::string fpu_to_print
32913 = TARGET_SOFT_FLOAT
32914 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
32915
32916 if (!(!strcmp (fpu_to_print.c_str (), "softvfp") && TARGET_VFP_BASE)
32917 && (fpu_to_print != arm_last_printed_arch_string))
32918 {
32919 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
32920 arm_last_printed_fpu_string = fpu_to_print;
32921 }
32922
32923 if (TARGET_POKE_FUNCTION_NAME)
32924 arm_poke_function_name (stream, (const char *) name);
32925 }
32926
32927 /* If MEM is in the form of [base+offset], extract the two parts
32928 of address and set to BASE and OFFSET, otherwise return false
32929 after clearing BASE and OFFSET. */
32930
32931 static bool
32932 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
32933 {
32934 rtx addr;
32935
32936 gcc_assert (MEM_P (mem));
32937
32938 addr = XEXP (mem, 0);
32939
32940 /* Strip off const from addresses like (const (addr)). */
32941 if (GET_CODE (addr) == CONST)
32942 addr = XEXP (addr, 0);
32943
32944 if (GET_CODE (addr) == REG)
32945 {
32946 *base = addr;
32947 *offset = const0_rtx;
32948 return true;
32949 }
32950
32951 if (GET_CODE (addr) == PLUS
32952 && GET_CODE (XEXP (addr, 0)) == REG
32953 && CONST_INT_P (XEXP (addr, 1)))
32954 {
32955 *base = XEXP (addr, 0);
32956 *offset = XEXP (addr, 1);
32957 return true;
32958 }
32959
32960 *base = NULL_RTX;
32961 *offset = NULL_RTX;
32962
32963 return false;
32964 }
32965
32966 /* If INSN is a load or store of address in the form of [base+offset],
32967 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
32968 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
32969 otherwise return FALSE. */
32970
32971 static bool
32972 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
32973 {
32974 rtx x, dest, src;
32975
32976 gcc_assert (INSN_P (insn));
32977 x = PATTERN (insn);
32978 if (GET_CODE (x) != SET)
32979 return false;
32980
32981 src = SET_SRC (x);
32982 dest = SET_DEST (x);
32983 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
32984 {
32985 *is_load = false;
32986 extract_base_offset_in_addr (dest, base, offset);
32987 }
32988 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
32989 {
32990 *is_load = true;
32991 extract_base_offset_in_addr (src, base, offset);
32992 }
32993 else
32994 return false;
32995
32996 return (*base != NULL_RTX && *offset != NULL_RTX);
32997 }
32998
32999 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33000
33001 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33002 and PRI are only calculated for these instructions. For other instruction,
33003 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
33004 instruction fusion can be supported by returning different priorities.
33005
33006 It's important that irrelevant instructions get the largest FUSION_PRI. */
33007
33008 static void
33009 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33010 int *fusion_pri, int *pri)
33011 {
33012 int tmp, off_val;
33013 bool is_load;
33014 rtx base, offset;
33015
33016 gcc_assert (INSN_P (insn));
33017
33018 tmp = max_pri - 1;
33019 if (!fusion_load_store (insn, &base, &offset, &is_load))
33020 {
33021 *pri = tmp;
33022 *fusion_pri = tmp;
33023 return;
33024 }
33025
33026 /* Load goes first. */
33027 if (is_load)
33028 *fusion_pri = tmp - 1;
33029 else
33030 *fusion_pri = tmp - 2;
33031
33032 tmp /= 2;
33033
33034 /* INSN with smaller base register goes first. */
33035 tmp -= ((REGNO (base) & 0xff) << 20);
33036
33037 /* INSN with smaller offset goes first. */
33038 off_val = (int)(INTVAL (offset));
33039 if (off_val >= 0)
33040 tmp -= (off_val & 0xfffff);
33041 else
33042 tmp += ((- off_val) & 0xfffff);
33043
33044 *pri = tmp;
33045 return;
33046 }
33047
33048
33049 /* Construct and return a PARALLEL RTX vector with elements numbering the
33050 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
33051 the vector - from the perspective of the architecture. This does not
33052 line up with GCC's perspective on lane numbers, so we end up with
33053 different masks depending on our target endian-ness. The diagram
33054 below may help. We must draw the distinction when building masks
33055 which select one half of the vector. An instruction selecting
33056 architectural low-lanes for a big-endian target, must be described using
33057 a mask selecting GCC high-lanes.
33058
33059 Big-Endian Little-Endian
33060
33061 GCC 0 1 2 3 3 2 1 0
33062 | x | x | x | x | | x | x | x | x |
33063 Architecture 3 2 1 0 3 2 1 0
33064
33065 Low Mask: { 2, 3 } { 0, 1 }
33066 High Mask: { 0, 1 } { 2, 3 }
33067 */
33068
33069 rtx
33070 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
33071 {
33072 int nunits = GET_MODE_NUNITS (mode);
33073 rtvec v = rtvec_alloc (nunits / 2);
33074 int high_base = nunits / 2;
33075 int low_base = 0;
33076 int base;
33077 rtx t1;
33078 int i;
33079
33080 if (BYTES_BIG_ENDIAN)
33081 base = high ? low_base : high_base;
33082 else
33083 base = high ? high_base : low_base;
33084
33085 for (i = 0; i < nunits / 2; i++)
33086 RTVEC_ELT (v, i) = GEN_INT (base + i);
33087
33088 t1 = gen_rtx_PARALLEL (mode, v);
33089 return t1;
33090 }
33091
33092 /* Check OP for validity as a PARALLEL RTX vector with elements
33093 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
33094 from the perspective of the architecture. See the diagram above
33095 arm_simd_vect_par_cnst_half_p for more details. */
33096
33097 bool
33098 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
33099 bool high)
33100 {
33101 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
33102 HOST_WIDE_INT count_op = XVECLEN (op, 0);
33103 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
33104 int i = 0;
33105
33106 if (!VECTOR_MODE_P (mode))
33107 return false;
33108
33109 if (count_op != count_ideal)
33110 return false;
33111
33112 for (i = 0; i < count_ideal; i++)
33113 {
33114 rtx elt_op = XVECEXP (op, 0, i);
33115 rtx elt_ideal = XVECEXP (ideal, 0, i);
33116
33117 if (!CONST_INT_P (elt_op)
33118 || INTVAL (elt_ideal) != INTVAL (elt_op))
33119 return false;
33120 }
33121 return true;
33122 }
33123
33124 /* Can output mi_thunk for all cases except for non-zero vcall_offset
33125 in Thumb1. */
33126 static bool
33127 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
33128 const_tree)
33129 {
33130 /* For now, we punt and not handle this for TARGET_THUMB1. */
33131 if (vcall_offset && TARGET_THUMB1)
33132 return false;
33133
33134 /* Otherwise ok. */
33135 return true;
33136 }
33137
33138 /* Generate RTL for a conditional branch with rtx comparison CODE in
33139 mode CC_MODE. The destination of the unlikely conditional branch
33140 is LABEL_REF. */
33141
33142 void
33143 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
33144 rtx label_ref)
33145 {
33146 rtx x;
33147 x = gen_rtx_fmt_ee (code, VOIDmode,
33148 gen_rtx_REG (cc_mode, CC_REGNUM),
33149 const0_rtx);
33150
33151 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
33152 gen_rtx_LABEL_REF (VOIDmode, label_ref),
33153 pc_rtx);
33154 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
33155 }
33156
33157 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
33158
33159 For pure-code sections there is no letter code for this attribute, so
33160 output all the section flags numerically when this is needed. */
33161
33162 static bool
33163 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
33164 {
33165
33166 if (flags & SECTION_ARM_PURECODE)
33167 {
33168 *num = 0x20000000;
33169
33170 if (!(flags & SECTION_DEBUG))
33171 *num |= 0x2;
33172 if (flags & SECTION_EXCLUDE)
33173 *num |= 0x80000000;
33174 if (flags & SECTION_WRITE)
33175 *num |= 0x1;
33176 if (flags & SECTION_CODE)
33177 *num |= 0x4;
33178 if (flags & SECTION_MERGE)
33179 *num |= 0x10;
33180 if (flags & SECTION_STRINGS)
33181 *num |= 0x20;
33182 if (flags & SECTION_TLS)
33183 *num |= 0x400;
33184 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
33185 *num |= 0x200;
33186
33187 return true;
33188 }
33189
33190 return false;
33191 }
33192
33193 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
33194
33195 If pure-code is passed as an option, make sure all functions are in
33196 sections that have the SHF_ARM_PURECODE attribute. */
33197
33198 static section *
33199 arm_function_section (tree decl, enum node_frequency freq,
33200 bool startup, bool exit)
33201 {
33202 const char * section_name;
33203 section * sec;
33204
33205 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
33206 return default_function_section (decl, freq, startup, exit);
33207
33208 if (!target_pure_code)
33209 return default_function_section (decl, freq, startup, exit);
33210
33211
33212 section_name = DECL_SECTION_NAME (decl);
33213
33214 /* If a function is not in a named section then it falls under the 'default'
33215 text section, also known as '.text'. We can preserve previous behavior as
33216 the default text section already has the SHF_ARM_PURECODE section
33217 attribute. */
33218 if (!section_name)
33219 {
33220 section *default_sec = default_function_section (decl, freq, startup,
33221 exit);
33222
33223 /* If default_sec is not null, then it must be a special section like for
33224 example .text.startup. We set the pure-code attribute and return the
33225 same section to preserve existing behavior. */
33226 if (default_sec)
33227 default_sec->common.flags |= SECTION_ARM_PURECODE;
33228 return default_sec;
33229 }
33230
33231 /* Otherwise look whether a section has already been created with
33232 'section_name'. */
33233 sec = get_named_section (decl, section_name, 0);
33234 if (!sec)
33235 /* If that is not the case passing NULL as the section's name to
33236 'get_named_section' will create a section with the declaration's
33237 section name. */
33238 sec = get_named_section (decl, NULL, 0);
33239
33240 /* Set the SHF_ARM_PURECODE attribute. */
33241 sec->common.flags |= SECTION_ARM_PURECODE;
33242
33243 return sec;
33244 }
33245
33246 /* Implements the TARGET_SECTION_FLAGS hook.
33247
33248 If DECL is a function declaration and pure-code is passed as an option
33249 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
33250 section's name and RELOC indicates whether the declarations initializer may
33251 contain runtime relocations. */
33252
33253 static unsigned int
33254 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
33255 {
33256 unsigned int flags = default_section_type_flags (decl, name, reloc);
33257
33258 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
33259 flags |= SECTION_ARM_PURECODE;
33260
33261 return flags;
33262 }
33263
33264 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
33265
33266 static void
33267 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
33268 rtx op0, rtx op1,
33269 rtx *quot_p, rtx *rem_p)
33270 {
33271 if (mode == SImode)
33272 gcc_assert (!TARGET_IDIV);
33273
33274 scalar_int_mode libval_mode
33275 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
33276
33277 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
33278 libval_mode, op0, mode, op1, mode);
33279
33280 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
33281 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
33282 GET_MODE_SIZE (mode));
33283
33284 gcc_assert (quotient);
33285 gcc_assert (remainder);
33286
33287 *quot_p = quotient;
33288 *rem_p = remainder;
33289 }
33290
33291 /* This function checks for the availability of the coprocessor builtin passed
33292 in BUILTIN for the current target. Returns true if it is available and
33293 false otherwise. If a BUILTIN is passed for which this function has not
33294 been implemented it will cause an exception. */
33295
33296 bool
33297 arm_coproc_builtin_available (enum unspecv builtin)
33298 {
33299 /* None of these builtins are available in Thumb mode if the target only
33300 supports Thumb-1. */
33301 if (TARGET_THUMB1)
33302 return false;
33303
33304 switch (builtin)
33305 {
33306 case VUNSPEC_CDP:
33307 case VUNSPEC_LDC:
33308 case VUNSPEC_LDCL:
33309 case VUNSPEC_STC:
33310 case VUNSPEC_STCL:
33311 case VUNSPEC_MCR:
33312 case VUNSPEC_MRC:
33313 if (arm_arch4)
33314 return true;
33315 break;
33316 case VUNSPEC_CDP2:
33317 case VUNSPEC_LDC2:
33318 case VUNSPEC_LDC2L:
33319 case VUNSPEC_STC2:
33320 case VUNSPEC_STC2L:
33321 case VUNSPEC_MCR2:
33322 case VUNSPEC_MRC2:
33323 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
33324 ARMv8-{A,M}. */
33325 if (arm_arch5t)
33326 return true;
33327 break;
33328 case VUNSPEC_MCRR:
33329 case VUNSPEC_MRRC:
33330 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
33331 ARMv8-{A,M}. */
33332 if (arm_arch6 || arm_arch5te)
33333 return true;
33334 break;
33335 case VUNSPEC_MCRR2:
33336 case VUNSPEC_MRRC2:
33337 if (arm_arch6)
33338 return true;
33339 break;
33340 default:
33341 gcc_unreachable ();
33342 }
33343 return false;
33344 }
33345
33346 /* This function returns true if OP is a valid memory operand for the ldc and
33347 stc coprocessor instructions and false otherwise. */
33348
33349 bool
33350 arm_coproc_ldc_stc_legitimate_address (rtx op)
33351 {
33352 HOST_WIDE_INT range;
33353 /* Has to be a memory operand. */
33354 if (!MEM_P (op))
33355 return false;
33356
33357 op = XEXP (op, 0);
33358
33359 /* We accept registers. */
33360 if (REG_P (op))
33361 return true;
33362
33363 switch GET_CODE (op)
33364 {
33365 case PLUS:
33366 {
33367 /* Or registers with an offset. */
33368 if (!REG_P (XEXP (op, 0)))
33369 return false;
33370
33371 op = XEXP (op, 1);
33372
33373 /* The offset must be an immediate though. */
33374 if (!CONST_INT_P (op))
33375 return false;
33376
33377 range = INTVAL (op);
33378
33379 /* Within the range of [-1020,1020]. */
33380 if (!IN_RANGE (range, -1020, 1020))
33381 return false;
33382
33383 /* And a multiple of 4. */
33384 return (range % 4) == 0;
33385 }
33386 case PRE_INC:
33387 case POST_INC:
33388 case PRE_DEC:
33389 case POST_DEC:
33390 return REG_P (XEXP (op, 0));
33391 default:
33392 gcc_unreachable ();
33393 }
33394 return false;
33395 }
33396
33397 /* Return the diagnostic message string if conversion from FROMTYPE to
33398 TOTYPE is not allowed, NULL otherwise. */
33399
33400 static const char *
33401 arm_invalid_conversion (const_tree fromtype, const_tree totype)
33402 {
33403 if (element_mode (fromtype) != element_mode (totype))
33404 {
33405 /* Do no allow conversions to/from BFmode scalar types. */
33406 if (TYPE_MODE (fromtype) == BFmode)
33407 return N_("invalid conversion from type %<bfloat16_t%>");
33408 if (TYPE_MODE (totype) == BFmode)
33409 return N_("invalid conversion to type %<bfloat16_t%>");
33410 }
33411
33412 /* Conversion allowed. */
33413 return NULL;
33414 }
33415
33416 /* Return the diagnostic message string if the unary operation OP is
33417 not permitted on TYPE, NULL otherwise. */
33418
33419 static const char *
33420 arm_invalid_unary_op (int op, const_tree type)
33421 {
33422 /* Reject all single-operand operations on BFmode except for &. */
33423 if (element_mode (type) == BFmode && op != ADDR_EXPR)
33424 return N_("operation not permitted on type %<bfloat16_t%>");
33425
33426 /* Operation allowed. */
33427 return NULL;
33428 }
33429
33430 /* Return the diagnostic message string if the binary operation OP is
33431 not permitted on TYPE1 and TYPE2, NULL otherwise. */
33432
33433 static const char *
33434 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
33435 const_tree type2)
33436 {
33437 /* Reject all 2-operand operations on BFmode. */
33438 if (element_mode (type1) == BFmode
33439 || element_mode (type2) == BFmode)
33440 return N_("operation not permitted on type %<bfloat16_t%>");
33441
33442 /* Operation allowed. */
33443 return NULL;
33444 }
33445
33446 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
33447
33448 In VFPv1, VFP registers could only be accessed in the mode they were
33449 set, so subregs would be invalid there. However, we don't support
33450 VFPv1 at the moment, and the restriction was lifted in VFPv2.
33451
33452 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
33453 VFP registers in little-endian order. We can't describe that accurately to
33454 GCC, so avoid taking subregs of such values.
33455
33456 The only exception is going from a 128-bit to a 64-bit type. In that
33457 case the data layout happens to be consistent for big-endian, so we
33458 explicitly allow that case. */
33459
33460 static bool
33461 arm_can_change_mode_class (machine_mode from, machine_mode to,
33462 reg_class_t rclass)
33463 {
33464 if (TARGET_BIG_END
33465 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
33466 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
33467 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
33468 && reg_classes_intersect_p (VFP_REGS, rclass))
33469 return false;
33470 return true;
33471 }
33472
33473 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
33474 strcpy from constants will be faster. */
33475
33476 static HOST_WIDE_INT
33477 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
33478 {
33479 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
33480 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
33481 return MAX (align, BITS_PER_WORD * factor);
33482 return align;
33483 }
33484
33485 /* Emit a speculation barrier on target architectures that do not have
33486 DSB/ISB directly. Such systems probably don't need a barrier
33487 themselves, but if the code is ever run on a later architecture, it
33488 might become a problem. */
33489 void
33490 arm_emit_speculation_barrier_function ()
33491 {
33492 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
33493 }
33494
33495 /* Have we recorded an explicit access to the Q bit of APSR?. */
33496 bool
33497 arm_q_bit_access (void)
33498 {
33499 if (cfun && cfun->decl)
33500 return lookup_attribute ("acle qbit",
33501 DECL_ATTRIBUTES (cfun->decl));
33502 return true;
33503 }
33504
33505 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
33506 bool
33507 arm_ge_bits_access (void)
33508 {
33509 if (cfun && cfun->decl)
33510 return lookup_attribute ("acle gebits",
33511 DECL_ATTRIBUTES (cfun->decl));
33512 return true;
33513 }
33514
33515 /* NULL if insn INSN is valid within a low-overhead loop.
33516 Otherwise return why doloop cannot be applied. */
33517
33518 static const char *
33519 arm_invalid_within_doloop (const rtx_insn *insn)
33520 {
33521 if (!TARGET_HAVE_LOB)
33522 return default_invalid_within_doloop (insn);
33523
33524 if (CALL_P (insn))
33525 return "Function call in the loop.";
33526
33527 if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
33528 return "LR is used inside loop.";
33529
33530 return NULL;
33531 }
33532
33533 bool
33534 arm_target_insn_ok_for_lob (rtx insn)
33535 {
33536 basic_block bb = BLOCK_FOR_INSN (insn);
33537 /* Make sure the basic block of the target insn is a simple latch
33538 having as single predecessor and successor the body of the loop
33539 itself. Only simple loops with a single basic block as body are
33540 supported for 'low over head loop' making sure that LE target is
33541 above LE itself in the generated code. */
33542
33543 return single_succ_p (bb)
33544 && single_pred_p (bb)
33545 && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
33546 && contains_no_active_insn_p (bb);
33547 }
33548
33549 #if CHECKING_P
33550 namespace selftest {
33551
33552 /* Scan the static data tables generated by parsecpu.awk looking for
33553 potential issues with the data. We primarily check for
33554 inconsistencies in the option extensions at present (extensions
33555 that duplicate others but aren't marked as aliases). Furthermore,
33556 for correct canonicalization later options must never be a subset
33557 of an earlier option. Any extension should also only specify other
33558 feature bits and never an architecture bit. The architecture is inferred
33559 from the declaration of the extension. */
33560 static void
33561 arm_test_cpu_arch_data (void)
33562 {
33563 const arch_option *arch;
33564 const cpu_option *cpu;
33565 auto_sbitmap target_isa (isa_num_bits);
33566 auto_sbitmap isa1 (isa_num_bits);
33567 auto_sbitmap isa2 (isa_num_bits);
33568
33569 for (arch = all_architectures; arch->common.name != NULL; ++arch)
33570 {
33571 const cpu_arch_extension *ext1, *ext2;
33572
33573 if (arch->common.extensions == NULL)
33574 continue;
33575
33576 arm_initialize_isa (target_isa, arch->common.isa_bits);
33577
33578 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
33579 {
33580 if (ext1->alias)
33581 continue;
33582
33583 arm_initialize_isa (isa1, ext1->isa_bits);
33584 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
33585 {
33586 if (ext2->alias || ext1->remove != ext2->remove)
33587 continue;
33588
33589 arm_initialize_isa (isa2, ext2->isa_bits);
33590 /* If the option is a subset of the parent option, it doesn't
33591 add anything and so isn't useful. */
33592 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
33593
33594 /* If the extension specifies any architectural bits then
33595 disallow it. Extensions should only specify feature bits. */
33596 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
33597 }
33598 }
33599 }
33600
33601 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
33602 {
33603 const cpu_arch_extension *ext1, *ext2;
33604
33605 if (cpu->common.extensions == NULL)
33606 continue;
33607
33608 arm_initialize_isa (target_isa, arch->common.isa_bits);
33609
33610 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
33611 {
33612 if (ext1->alias)
33613 continue;
33614
33615 arm_initialize_isa (isa1, ext1->isa_bits);
33616 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
33617 {
33618 if (ext2->alias || ext1->remove != ext2->remove)
33619 continue;
33620
33621 arm_initialize_isa (isa2, ext2->isa_bits);
33622 /* If the option is a subset of the parent option, it doesn't
33623 add anything and so isn't useful. */
33624 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
33625
33626 /* If the extension specifies any architectural bits then
33627 disallow it. Extensions should only specify feature bits. */
33628 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
33629 }
33630 }
33631 }
33632 }
33633
33634 /* Scan the static data tables generated by parsecpu.awk looking for
33635 potential issues with the data. Here we check for consistency between the
33636 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
33637 a feature bit that is not defined by any FPU flag. */
33638 static void
33639 arm_test_fpu_data (void)
33640 {
33641 auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
33642 auto_sbitmap fpubits (isa_num_bits);
33643 auto_sbitmap tmpset (isa_num_bits);
33644
33645 static const enum isa_feature fpu_bitlist_internal[]
33646 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
33647 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
33648
33649 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33650 {
33651 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
33652 bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
33653 bitmap_clear (isa_all_fpubits_internal);
33654 bitmap_copy (isa_all_fpubits_internal, tmpset);
33655 }
33656
33657 if (!bitmap_empty_p (isa_all_fpubits_internal))
33658 {
33659 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
33660 " group that are not defined by any FPU.\n"
33661 " Check your arm-cpus.in.\n");
33662 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
33663 }
33664 }
33665
33666 static void
33667 arm_run_selftests (void)
33668 {
33669 arm_test_cpu_arch_data ();
33670 arm_test_fpu_data ();
33671 }
33672 } /* Namespace selftest. */
33673
33674 #undef TARGET_RUN_TARGET_SELFTESTS
33675 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
33676 #endif /* CHECKING_P */
33677
33678 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
33679 Unlike the arm version, we do NOT implement asm flag outputs. */
33680
33681 rtx_insn *
33682 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
33683 vec<const char *> &constraints,
33684 vec<rtx> &/*clobbers*/, HARD_REG_SET &/*clobbered_regs*/)
33685 {
33686 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
33687 if (strncmp (constraints[i], "=@cc", 4) == 0)
33688 {
33689 sorry ("asm flags not supported in thumb1 mode");
33690 break;
33691 }
33692 return NULL;
33693 }
33694
33695 /* Generate code to enable conditional branches in functions over 1 MiB.
33696 Parameters are:
33697 operands: is the operands list of the asm insn (see arm_cond_branch or
33698 arm_cond_branch_reversed).
33699 pos_label: is an index into the operands array where operands[pos_label] is
33700 the asm label of the final jump destination.
33701 dest: is a string which is used to generate the asm label of the intermediate
33702 destination
33703 branch_format: is a string denoting the intermediate branch format, e.g.
33704 "beq", "bne", etc. */
33705
33706 const char *
33707 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
33708 const char * branch_format)
33709 {
33710 rtx_code_label * tmp_label = gen_label_rtx ();
33711 char label_buf[256];
33712 char buffer[128];
33713 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
33714 CODE_LABEL_NUMBER (tmp_label));
33715 const char *label_ptr = arm_strip_name_encoding (label_buf);
33716 rtx dest_label = operands[pos_label];
33717 operands[pos_label] = tmp_label;
33718
33719 snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
33720 output_asm_insn (buffer, operands);
33721
33722 snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
33723 operands[pos_label] = dest_label;
33724 output_asm_insn (buffer, operands);
33725 return "";
33726 }
33727
33728 /* If given mode matches, load from memory to LO_REGS.
33729 (i.e [Rn], Rn <= LO_REGS). */
33730 enum reg_class
33731 arm_mode_base_reg_class (machine_mode mode)
33732 {
33733 if (TARGET_HAVE_MVE
33734 && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
33735 return LO_REGS;
33736
33737 return MODE_BASE_REG_REG_CLASS (mode);
33738 }
33739
33740 struct gcc_target targetm = TARGET_INITIALIZER;
33741
33742 #include "gt-arm.h"