Remove DImode expansions for 1-bit shifts
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "reload.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "cfgrtl.h"
54 #include "sched-int.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
62 #include "target-globals.h"
63 #include "builtins.h"
64 #include "tm-constrs.h"
65 #include "rtl-iter.h"
66 #include "optabs-libfuncs.h"
67 #include "gimplify.h"
68 #include "gimple.h"
69 #include "selftest.h"
70
71 /* This file should be included last. */
72 #include "target-def.h"
73
74 /* Forward definitions of types. */
75 typedef struct minipool_node Mnode;
76 typedef struct minipool_fixup Mfix;
77
78 void (*arm_lang_output_object_attributes_hook)(void);
79
80 struct four_ints
81 {
82 int i[4];
83 };
84
85 /* Forward function declarations. */
86 static bool arm_const_not_ok_for_debug_p (rtx);
87 static int arm_needs_doubleword_align (machine_mode, const_tree);
88 static int arm_compute_static_chain_stack_bytes (void);
89 static arm_stack_offsets *arm_get_frame_offsets (void);
90 static void arm_compute_frame_layout (void);
91 static void arm_add_gc_roots (void);
92 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
93 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
94 static unsigned bit_count (unsigned long);
95 static unsigned bitmap_popcount (const sbitmap);
96 static int arm_address_register_rtx_p (rtx, int);
97 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
98 static bool is_called_in_ARM_mode (tree);
99 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
100 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
101 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
102 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
103 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
104 inline static int thumb1_index_register_rtx_p (rtx, int);
105 static int thumb_far_jump_used_p (void);
106 static bool thumb_force_lr_save (void);
107 static unsigned arm_size_return_regs (void);
108 static bool arm_assemble_integer (rtx, unsigned int, int);
109 static void arm_print_operand (FILE *, rtx, int);
110 static void arm_print_operand_address (FILE *, machine_mode, rtx);
111 static bool arm_print_operand_punct_valid_p (unsigned char code);
112 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
113 static arm_cc get_arm_condition_code (rtx);
114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
115 static const char *output_multi_immediate (rtx *, const char *, const char *,
116 int, HOST_WIDE_INT);
117 static const char *shift_op (rtx, HOST_WIDE_INT *);
118 static struct machine_function *arm_init_machine_status (void);
119 static void thumb_exit (FILE *, int);
120 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
121 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
122 static Mnode *add_minipool_forward_ref (Mfix *);
123 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
124 static Mnode *add_minipool_backward_ref (Mfix *);
125 static void assign_minipool_offsets (Mfix *);
126 static void arm_print_value (FILE *, rtx);
127 static void dump_minipool (rtx_insn *);
128 static int arm_barrier_cost (rtx_insn *);
129 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
130 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
131 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
132 machine_mode, rtx);
133 static void arm_reorg (void);
134 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
136 static unsigned long arm_compute_save_core_reg_mask (void);
137 static unsigned long arm_isr_value (tree);
138 static unsigned long arm_compute_func_type (void);
139 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
140 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
141 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
143 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
144 #endif
145 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
146 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
147 static void arm_output_function_epilogue (FILE *);
148 static void arm_output_function_prologue (FILE *);
149 static int arm_comp_type_attributes (const_tree, const_tree);
150 static void arm_set_default_type_attributes (tree);
151 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
152 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
153 static int optimal_immediate_sequence (enum rtx_code code,
154 unsigned HOST_WIDE_INT val,
155 struct four_ints *return_sequence);
156 static int optimal_immediate_sequence_1 (enum rtx_code code,
157 unsigned HOST_WIDE_INT val,
158 struct four_ints *return_sequence,
159 int i);
160 static int arm_get_strip_length (int);
161 static bool arm_function_ok_for_sibcall (tree, tree);
162 static machine_mode arm_promote_function_mode (const_tree,
163 machine_mode, int *,
164 const_tree, int);
165 static bool arm_return_in_memory (const_tree, const_tree);
166 static rtx arm_function_value (const_tree, const_tree, bool);
167 static rtx arm_libcall_value_1 (machine_mode);
168 static rtx arm_libcall_value (machine_mode, const_rtx);
169 static bool arm_function_value_regno_p (const unsigned int);
170 static void arm_internal_label (FILE *, const char *, unsigned long);
171 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
172 tree);
173 static bool arm_have_conditional_execution (void);
174 static bool arm_cannot_force_const_mem (machine_mode, rtx);
175 static bool arm_legitimate_constant_p (machine_mode, rtx);
176 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
177 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
178 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
179 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
180 static void emit_constant_insn (rtx cond, rtx pattern);
181 static rtx_insn *emit_set_insn (rtx, rtx);
182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
183 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
184 tree, bool);
185 static rtx arm_function_arg (cumulative_args_t, machine_mode,
186 const_tree, bool);
187 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
188 const_tree, bool);
189 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
190 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
191 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
192 const_tree);
193 static rtx aapcs_libcall_value (machine_mode);
194 static int aapcs_select_return_coproc (const_tree, const_tree);
195
196 #ifdef OBJECT_FORMAT_ELF
197 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
198 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
199 #endif
200 #ifndef ARM_PE
201 static void arm_encode_section_info (tree, rtx, int);
202 #endif
203
204 static void arm_file_end (void);
205 static void arm_file_start (void);
206 static void arm_insert_attributes (tree, tree *);
207
208 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
209 tree, int *, int);
210 static bool arm_pass_by_reference (cumulative_args_t,
211 machine_mode, const_tree, bool);
212 static bool arm_promote_prototypes (const_tree);
213 static bool arm_default_short_enums (void);
214 static bool arm_align_anon_bitfield (void);
215 static bool arm_return_in_msb (const_tree);
216 static bool arm_must_pass_in_stack (machine_mode, const_tree);
217 static bool arm_return_in_memory (const_tree, const_tree);
218 #if ARM_UNWIND_INFO
219 static void arm_unwind_emit (FILE *, rtx_insn *);
220 static bool arm_output_ttype (rtx);
221 static void arm_asm_emit_except_personality (rtx);
222 #endif
223 static void arm_asm_init_sections (void);
224 static rtx arm_dwarf_register_span (rtx);
225
226 static tree arm_cxx_guard_type (void);
227 static bool arm_cxx_guard_mask_bit (void);
228 static tree arm_get_cookie_size (tree);
229 static bool arm_cookie_has_size (void);
230 static bool arm_cxx_cdtor_returns_this (void);
231 static bool arm_cxx_key_method_may_be_inline (void);
232 static void arm_cxx_determine_class_data_visibility (tree);
233 static bool arm_cxx_class_data_always_comdat (void);
234 static bool arm_cxx_use_aeabi_atexit (void);
235 static void arm_init_libfuncs (void);
236 static tree arm_build_builtin_va_list (void);
237 static void arm_expand_builtin_va_start (tree, rtx);
238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
239 static void arm_option_override (void);
240 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
241 static void arm_option_restore (struct gcc_options *,
242 struct cl_target_option *);
243 static void arm_override_options_after_change (void);
244 static void arm_option_print (FILE *, int, struct cl_target_option *);
245 static void arm_set_current_function (tree);
246 static bool arm_can_inline_p (tree, tree);
247 static void arm_relayout_function (tree);
248 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
249 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
250 static bool arm_sched_can_speculate_insn (rtx_insn *);
251 static bool arm_macro_fusion_p (void);
252 static bool arm_cannot_copy_insn_p (rtx_insn *);
253 static int arm_issue_rate (void);
254 static int arm_first_cycle_multipass_dfa_lookahead (void);
255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
257 static bool arm_output_addr_const_extra (FILE *, rtx);
258 static bool arm_allocate_stack_slots_for_args (void);
259 static bool arm_warn_func_return (tree);
260 static tree arm_promoted_type (const_tree t);
261 static bool arm_scalar_mode_supported_p (scalar_mode);
262 static bool arm_frame_pointer_required (void);
263 static bool arm_can_eliminate (const int, const int);
264 static void arm_asm_trampoline_template (FILE *);
265 static void arm_trampoline_init (rtx, tree, rtx);
266 static rtx arm_trampoline_adjust_address (rtx);
267 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
268 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
269 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
270 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
271 static bool arm_array_mode_supported_p (machine_mode,
272 unsigned HOST_WIDE_INT);
273 static machine_mode arm_preferred_simd_mode (scalar_mode);
274 static bool arm_class_likely_spilled_p (reg_class_t);
275 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
276 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
277 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
278 const_tree type,
279 int misalignment,
280 bool is_packed);
281 static void arm_conditional_register_usage (void);
282 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
283 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
284 static unsigned int arm_autovectorize_vector_sizes (void);
285 static int arm_default_branch_cost (bool, bool);
286 static int arm_cortex_a5_branch_cost (bool, bool);
287 static int arm_cortex_m_branch_cost (bool, bool);
288 static int arm_cortex_m7_branch_cost (bool, bool);
289
290 static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
291
292 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
293
294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
295 tree vectype,
296 int misalign ATTRIBUTE_UNUSED);
297 static unsigned arm_add_stmt_cost (void *data, int count,
298 enum vect_cost_for_stmt kind,
299 struct _stmt_vec_info *stmt_info,
300 int misalign,
301 enum vect_cost_model_location where);
302
303 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
304 bool op0_preserve_value);
305 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
306
307 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
308 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
309 const_tree);
310 static section *arm_function_section (tree, enum node_frequency, bool, bool);
311 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
312 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
313 int reloc);
314 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
315 static opt_scalar_float_mode arm_floatn_mode (int, bool);
316 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
317 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
318 static bool arm_modes_tieable_p (machine_mode, machine_mode);
319 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
320 \f
321 /* Table of machine attributes. */
322 static const struct attribute_spec arm_attribute_table[] =
323 {
324 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
325 affects_type_identity } */
326 /* Function calls made to this symbol must be done indirectly, because
327 it may lie outside of the 26 bit addressing range of a normal function
328 call. */
329 { "long_call", 0, 0, false, true, true, NULL, false },
330 /* Whereas these functions are always known to reside within the 26 bit
331 addressing range. */
332 { "short_call", 0, 0, false, true, true, NULL, false },
333 /* Specify the procedure call conventions for a function. */
334 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
335 false },
336 /* Interrupt Service Routines have special prologue and epilogue requirements. */
337 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
338 false },
339 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
340 false },
341 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
342 false },
343 #ifdef ARM_PE
344 /* ARM/PE has three new attributes:
345 interfacearm - ?
346 dllexport - for exporting a function/variable that will live in a dll
347 dllimport - for importing a function/variable from a dll
348
349 Microsoft allows multiple declspecs in one __declspec, separating
350 them with spaces. We do NOT support this. Instead, use __declspec
351 multiple times.
352 */
353 { "dllimport", 0, 0, true, false, false, NULL, false },
354 { "dllexport", 0, 0, true, false, false, NULL, false },
355 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
356 false },
357 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
358 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
359 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
360 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
361 false },
362 #endif
363 /* ARMv8-M Security Extensions support. */
364 { "cmse_nonsecure_entry", 0, 0, true, false, false,
365 arm_handle_cmse_nonsecure_entry, false },
366 { "cmse_nonsecure_call", 0, 0, true, false, false,
367 arm_handle_cmse_nonsecure_call, true },
368 { NULL, 0, 0, false, false, false, NULL, false }
369 };
370 \f
371 /* Initialize the GCC target structure. */
372 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
373 #undef TARGET_MERGE_DECL_ATTRIBUTES
374 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
375 #endif
376
377 #undef TARGET_LEGITIMIZE_ADDRESS
378 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
379
380 #undef TARGET_ATTRIBUTE_TABLE
381 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
382
383 #undef TARGET_INSERT_ATTRIBUTES
384 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
385
386 #undef TARGET_ASM_FILE_START
387 #define TARGET_ASM_FILE_START arm_file_start
388 #undef TARGET_ASM_FILE_END
389 #define TARGET_ASM_FILE_END arm_file_end
390
391 #undef TARGET_ASM_ALIGNED_SI_OP
392 #define TARGET_ASM_ALIGNED_SI_OP NULL
393 #undef TARGET_ASM_INTEGER
394 #define TARGET_ASM_INTEGER arm_assemble_integer
395
396 #undef TARGET_PRINT_OPERAND
397 #define TARGET_PRINT_OPERAND arm_print_operand
398 #undef TARGET_PRINT_OPERAND_ADDRESS
399 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
400 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
401 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
402
403 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
404 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
405
406 #undef TARGET_ASM_FUNCTION_PROLOGUE
407 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
408
409 #undef TARGET_ASM_FUNCTION_EPILOGUE
410 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
411
412 #undef TARGET_CAN_INLINE_P
413 #define TARGET_CAN_INLINE_P arm_can_inline_p
414
415 #undef TARGET_RELAYOUT_FUNCTION
416 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
417
418 #undef TARGET_OPTION_OVERRIDE
419 #define TARGET_OPTION_OVERRIDE arm_option_override
420
421 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
422 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
423
424 #undef TARGET_OPTION_SAVE
425 #define TARGET_OPTION_SAVE arm_option_save
426
427 #undef TARGET_OPTION_RESTORE
428 #define TARGET_OPTION_RESTORE arm_option_restore
429
430 #undef TARGET_OPTION_PRINT
431 #define TARGET_OPTION_PRINT arm_option_print
432
433 #undef TARGET_COMP_TYPE_ATTRIBUTES
434 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
435
436 #undef TARGET_SCHED_CAN_SPECULATE_INSN
437 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
438
439 #undef TARGET_SCHED_MACRO_FUSION_P
440 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
441
442 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
443 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
444
445 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
446 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
447
448 #undef TARGET_SCHED_ADJUST_COST
449 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
450
451 #undef TARGET_SET_CURRENT_FUNCTION
452 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
453
454 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
455 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
456
457 #undef TARGET_SCHED_REORDER
458 #define TARGET_SCHED_REORDER arm_sched_reorder
459
460 #undef TARGET_REGISTER_MOVE_COST
461 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
462
463 #undef TARGET_MEMORY_MOVE_COST
464 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
465
466 #undef TARGET_ENCODE_SECTION_INFO
467 #ifdef ARM_PE
468 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
469 #else
470 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
471 #endif
472
473 #undef TARGET_STRIP_NAME_ENCODING
474 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
475
476 #undef TARGET_ASM_INTERNAL_LABEL
477 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
478
479 #undef TARGET_FLOATN_MODE
480 #define TARGET_FLOATN_MODE arm_floatn_mode
481
482 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
483 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
484
485 #undef TARGET_FUNCTION_VALUE
486 #define TARGET_FUNCTION_VALUE arm_function_value
487
488 #undef TARGET_LIBCALL_VALUE
489 #define TARGET_LIBCALL_VALUE arm_libcall_value
490
491 #undef TARGET_FUNCTION_VALUE_REGNO_P
492 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
493
494 #undef TARGET_ASM_OUTPUT_MI_THUNK
495 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
496 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
497 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
498
499 #undef TARGET_RTX_COSTS
500 #define TARGET_RTX_COSTS arm_rtx_costs
501 #undef TARGET_ADDRESS_COST
502 #define TARGET_ADDRESS_COST arm_address_cost
503
504 #undef TARGET_SHIFT_TRUNCATION_MASK
505 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
506 #undef TARGET_VECTOR_MODE_SUPPORTED_P
507 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
508 #undef TARGET_ARRAY_MODE_SUPPORTED_P
509 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
510 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
511 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
512 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
513 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
514 arm_autovectorize_vector_sizes
515
516 #undef TARGET_MACHINE_DEPENDENT_REORG
517 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
518
519 #undef TARGET_INIT_BUILTINS
520 #define TARGET_INIT_BUILTINS arm_init_builtins
521 #undef TARGET_EXPAND_BUILTIN
522 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
523 #undef TARGET_BUILTIN_DECL
524 #define TARGET_BUILTIN_DECL arm_builtin_decl
525
526 #undef TARGET_INIT_LIBFUNCS
527 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
528
529 #undef TARGET_PROMOTE_FUNCTION_MODE
530 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
531 #undef TARGET_PROMOTE_PROTOTYPES
532 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
533 #undef TARGET_PASS_BY_REFERENCE
534 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
535 #undef TARGET_ARG_PARTIAL_BYTES
536 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
537 #undef TARGET_FUNCTION_ARG
538 #define TARGET_FUNCTION_ARG arm_function_arg
539 #undef TARGET_FUNCTION_ARG_ADVANCE
540 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
541 #undef TARGET_FUNCTION_ARG_PADDING
542 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
543 #undef TARGET_FUNCTION_ARG_BOUNDARY
544 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
545
546 #undef TARGET_SETUP_INCOMING_VARARGS
547 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
548
549 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
550 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
551
552 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
553 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
554 #undef TARGET_TRAMPOLINE_INIT
555 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
556 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
557 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
558
559 #undef TARGET_WARN_FUNC_RETURN
560 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
561
562 #undef TARGET_DEFAULT_SHORT_ENUMS
563 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
564
565 #undef TARGET_ALIGN_ANON_BITFIELD
566 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
567
568 #undef TARGET_NARROW_VOLATILE_BITFIELD
569 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
570
571 #undef TARGET_CXX_GUARD_TYPE
572 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
573
574 #undef TARGET_CXX_GUARD_MASK_BIT
575 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
576
577 #undef TARGET_CXX_GET_COOKIE_SIZE
578 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
579
580 #undef TARGET_CXX_COOKIE_HAS_SIZE
581 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
582
583 #undef TARGET_CXX_CDTOR_RETURNS_THIS
584 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
585
586 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
587 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
588
589 #undef TARGET_CXX_USE_AEABI_ATEXIT
590 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
591
592 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
593 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
594 arm_cxx_determine_class_data_visibility
595
596 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
597 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
598
599 #undef TARGET_RETURN_IN_MSB
600 #define TARGET_RETURN_IN_MSB arm_return_in_msb
601
602 #undef TARGET_RETURN_IN_MEMORY
603 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
604
605 #undef TARGET_MUST_PASS_IN_STACK
606 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
607
608 #if ARM_UNWIND_INFO
609 #undef TARGET_ASM_UNWIND_EMIT
610 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
611
612 /* EABI unwinding tables use a different format for the typeinfo tables. */
613 #undef TARGET_ASM_TTYPE
614 #define TARGET_ASM_TTYPE arm_output_ttype
615
616 #undef TARGET_ARM_EABI_UNWINDER
617 #define TARGET_ARM_EABI_UNWINDER true
618
619 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
620 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
621
622 #endif /* ARM_UNWIND_INFO */
623
624 #undef TARGET_ASM_INIT_SECTIONS
625 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
626
627 #undef TARGET_DWARF_REGISTER_SPAN
628 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
629
630 #undef TARGET_CANNOT_COPY_INSN_P
631 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
632
633 #ifdef HAVE_AS_TLS
634 #undef TARGET_HAVE_TLS
635 #define TARGET_HAVE_TLS true
636 #endif
637
638 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
639 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
640
641 #undef TARGET_LEGITIMATE_CONSTANT_P
642 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
643
644 #undef TARGET_CANNOT_FORCE_CONST_MEM
645 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
646
647 #undef TARGET_MAX_ANCHOR_OFFSET
648 #define TARGET_MAX_ANCHOR_OFFSET 4095
649
650 /* The minimum is set such that the total size of the block
651 for a particular anchor is -4088 + 1 + 4095 bytes, which is
652 divisible by eight, ensuring natural spacing of anchors. */
653 #undef TARGET_MIN_ANCHOR_OFFSET
654 #define TARGET_MIN_ANCHOR_OFFSET -4088
655
656 #undef TARGET_SCHED_ISSUE_RATE
657 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
658
659 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
660 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
661 arm_first_cycle_multipass_dfa_lookahead
662
663 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
664 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
665 arm_first_cycle_multipass_dfa_lookahead_guard
666
667 #undef TARGET_MANGLE_TYPE
668 #define TARGET_MANGLE_TYPE arm_mangle_type
669
670 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
671 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
672
673 #undef TARGET_BUILD_BUILTIN_VA_LIST
674 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
675 #undef TARGET_EXPAND_BUILTIN_VA_START
676 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
677 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
678 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
679
680 #ifdef HAVE_AS_TLS
681 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
682 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
683 #endif
684
685 #undef TARGET_LEGITIMATE_ADDRESS_P
686 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
687
688 #undef TARGET_PREFERRED_RELOAD_CLASS
689 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
690
691 #undef TARGET_PROMOTED_TYPE
692 #define TARGET_PROMOTED_TYPE arm_promoted_type
693
694 #undef TARGET_SCALAR_MODE_SUPPORTED_P
695 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
696
697 #undef TARGET_COMPUTE_FRAME_LAYOUT
698 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
699
700 #undef TARGET_FRAME_POINTER_REQUIRED
701 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
702
703 #undef TARGET_CAN_ELIMINATE
704 #define TARGET_CAN_ELIMINATE arm_can_eliminate
705
706 #undef TARGET_CONDITIONAL_REGISTER_USAGE
707 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
708
709 #undef TARGET_CLASS_LIKELY_SPILLED_P
710 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
711
712 #undef TARGET_VECTORIZE_BUILTINS
713 #define TARGET_VECTORIZE_BUILTINS
714
715 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
716 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
717 arm_builtin_vectorized_function
718
719 #undef TARGET_VECTOR_ALIGNMENT
720 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
721
722 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
723 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
724 arm_vector_alignment_reachable
725
726 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
727 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
728 arm_builtin_support_vector_misalignment
729
730 #undef TARGET_PREFERRED_RENAME_CLASS
731 #define TARGET_PREFERRED_RENAME_CLASS \
732 arm_preferred_rename_class
733
734 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
735 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
736 arm_vectorize_vec_perm_const_ok
737
738 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
739 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
740 arm_builtin_vectorization_cost
741 #undef TARGET_VECTORIZE_ADD_STMT_COST
742 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
743
744 #undef TARGET_CANONICALIZE_COMPARISON
745 #define TARGET_CANONICALIZE_COMPARISON \
746 arm_canonicalize_comparison
747
748 #undef TARGET_ASAN_SHADOW_OFFSET
749 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
750
751 #undef MAX_INSN_PER_IT_BLOCK
752 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
753
754 #undef TARGET_CAN_USE_DOLOOP_P
755 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
756
757 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
758 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
759
760 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
761 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
762
763 #undef TARGET_SCHED_FUSION_PRIORITY
764 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
765
766 #undef TARGET_ASM_FUNCTION_SECTION
767 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
768
769 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
770 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
771
772 #undef TARGET_SECTION_TYPE_FLAGS
773 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
774
775 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
776 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
777
778 #undef TARGET_C_EXCESS_PRECISION
779 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
780
781 /* Although the architecture reserves bits 0 and 1, only the former is
782 used for ARM/Thumb ISA selection in v7 and earlier versions. */
783 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
784 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
785
786 #undef TARGET_FIXED_CONDITION_CODE_REGS
787 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
788
789 #undef TARGET_HARD_REGNO_NREGS
790 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
791 #undef TARGET_HARD_REGNO_MODE_OK
792 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
793
794 #undef TARGET_MODES_TIEABLE_P
795 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
796
797 #undef TARGET_CAN_CHANGE_MODE_CLASS
798 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
799
800 #undef TARGET_CONSTANT_ALIGNMENT
801 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
802 \f
803 /* Obstack for minipool constant handling. */
804 static struct obstack minipool_obstack;
805 static char * minipool_startobj;
806
807 /* The maximum number of insns skipped which
808 will be conditionalised if possible. */
809 static int max_insns_skipped = 5;
810
811 extern FILE * asm_out_file;
812
813 /* True if we are currently building a constant table. */
814 int making_const_table;
815
816 /* The processor for which instructions should be scheduled. */
817 enum processor_type arm_tune = TARGET_CPU_arm_none;
818
819 /* The current tuning set. */
820 const struct tune_params *current_tune;
821
822 /* Which floating point hardware to schedule for. */
823 int arm_fpu_attr;
824
825 /* Used for Thumb call_via trampolines. */
826 rtx thumb_call_via_label[14];
827 static int thumb_call_reg_needed;
828
829 /* The bits in this mask specify which instruction scheduling options should
830 be used. */
831 unsigned int tune_flags = 0;
832
833 /* The highest ARM architecture version supported by the
834 target. */
835 enum base_architecture arm_base_arch = BASE_ARCH_0;
836
837 /* Active target architecture and tuning. */
838
839 struct arm_build_target arm_active_target;
840
841 /* The following are used in the arm.md file as equivalents to bits
842 in the above two flag variables. */
843
844 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
845 int arm_arch3m = 0;
846
847 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
848 int arm_arch4 = 0;
849
850 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
851 int arm_arch4t = 0;
852
853 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
854 int arm_arch5 = 0;
855
856 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
857 int arm_arch5e = 0;
858
859 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
860 int arm_arch5te = 0;
861
862 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
863 int arm_arch6 = 0;
864
865 /* Nonzero if this chip supports the ARM 6K extensions. */
866 int arm_arch6k = 0;
867
868 /* Nonzero if this chip supports the ARM 6KZ extensions. */
869 int arm_arch6kz = 0;
870
871 /* Nonzero if instructions present in ARMv6-M can be used. */
872 int arm_arch6m = 0;
873
874 /* Nonzero if this chip supports the ARM 7 extensions. */
875 int arm_arch7 = 0;
876
877 /* Nonzero if this chip supports the Large Physical Address Extension. */
878 int arm_arch_lpae = 0;
879
880 /* Nonzero if instructions not present in the 'M' profile can be used. */
881 int arm_arch_notm = 0;
882
883 /* Nonzero if instructions present in ARMv7E-M can be used. */
884 int arm_arch7em = 0;
885
886 /* Nonzero if instructions present in ARMv8 can be used. */
887 int arm_arch8 = 0;
888
889 /* Nonzero if this chip supports the ARMv8.1 extensions. */
890 int arm_arch8_1 = 0;
891
892 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
893 int arm_arch8_2 = 0;
894
895 /* Nonzero if this chip supports the FP16 instructions extension of ARM
896 Architecture 8.2. */
897 int arm_fp16_inst = 0;
898
899 /* Nonzero if this chip can benefit from load scheduling. */
900 int arm_ld_sched = 0;
901
902 /* Nonzero if this chip is a StrongARM. */
903 int arm_tune_strongarm = 0;
904
905 /* Nonzero if this chip supports Intel Wireless MMX technology. */
906 int arm_arch_iwmmxt = 0;
907
908 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
909 int arm_arch_iwmmxt2 = 0;
910
911 /* Nonzero if this chip is an XScale. */
912 int arm_arch_xscale = 0;
913
914 /* Nonzero if tuning for XScale */
915 int arm_tune_xscale = 0;
916
917 /* Nonzero if we want to tune for stores that access the write-buffer.
918 This typically means an ARM6 or ARM7 with MMU or MPU. */
919 int arm_tune_wbuf = 0;
920
921 /* Nonzero if tuning for Cortex-A9. */
922 int arm_tune_cortex_a9 = 0;
923
924 /* Nonzero if we should define __THUMB_INTERWORK__ in the
925 preprocessor.
926 XXX This is a bit of a hack, it's intended to help work around
927 problems in GLD which doesn't understand that armv5t code is
928 interworking clean. */
929 int arm_cpp_interwork = 0;
930
931 /* Nonzero if chip supports Thumb 1. */
932 int arm_arch_thumb1;
933
934 /* Nonzero if chip supports Thumb 2. */
935 int arm_arch_thumb2;
936
937 /* Nonzero if chip supports integer division instruction. */
938 int arm_arch_arm_hwdiv;
939 int arm_arch_thumb_hwdiv;
940
941 /* Nonzero if chip disallows volatile memory access in IT block. */
942 int arm_arch_no_volatile_ce;
943
944 /* Nonzero if we should use Neon to handle 64-bits operations rather
945 than core registers. */
946 int prefer_neon_for_64bits = 0;
947
948 /* Nonzero if we shouldn't use literal pools. */
949 bool arm_disable_literal_pool = false;
950
951 /* The register number to be used for the PIC offset register. */
952 unsigned arm_pic_register = INVALID_REGNUM;
953
954 enum arm_pcs arm_pcs_default;
955
956 /* For an explanation of these variables, see final_prescan_insn below. */
957 int arm_ccfsm_state;
958 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
959 enum arm_cond_code arm_current_cc;
960
961 rtx arm_target_insn;
962 int arm_target_label;
963 /* The number of conditionally executed insns, including the current insn. */
964 int arm_condexec_count = 0;
965 /* A bitmask specifying the patterns for the IT block.
966 Zero means do not output an IT block before this insn. */
967 int arm_condexec_mask = 0;
968 /* The number of bits used in arm_condexec_mask. */
969 int arm_condexec_masklen = 0;
970
971 /* Nonzero if chip supports the ARMv8 CRC instructions. */
972 int arm_arch_crc = 0;
973
974 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
975 int arm_arch_dotprod = 0;
976
977 /* Nonzero if chip supports the ARMv8-M security extensions. */
978 int arm_arch_cmse = 0;
979
980 /* Nonzero if the core has a very small, high-latency, multiply unit. */
981 int arm_m_profile_small_mul = 0;
982
983 /* The condition codes of the ARM, and the inverse function. */
984 static const char * const arm_condition_codes[] =
985 {
986 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
987 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
988 };
989
990 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
991 int arm_regs_in_sequence[] =
992 {
993 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
994 };
995
996 #define ARM_LSL_NAME "lsl"
997 #define streq(string1, string2) (strcmp (string1, string2) == 0)
998
999 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1000 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1001 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1002 \f
1003 /* Initialization code. */
1004
1005 struct cpu_tune
1006 {
1007 enum processor_type scheduler;
1008 unsigned int tune_flags;
1009 const struct tune_params *tune;
1010 };
1011
1012 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1013 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1014 { \
1015 num_slots, \
1016 l1_size, \
1017 l1_line_size \
1018 }
1019
1020 /* arm generic vectorizer costs. */
1021 static const
1022 struct cpu_vec_costs arm_default_vec_cost = {
1023 1, /* scalar_stmt_cost. */
1024 1, /* scalar load_cost. */
1025 1, /* scalar_store_cost. */
1026 1, /* vec_stmt_cost. */
1027 1, /* vec_to_scalar_cost. */
1028 1, /* scalar_to_vec_cost. */
1029 1, /* vec_align_load_cost. */
1030 1, /* vec_unalign_load_cost. */
1031 1, /* vec_unalign_store_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
1035 };
1036
1037 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1038 #include "aarch-cost-tables.h"
1039
1040
1041
1042 const struct cpu_cost_table cortexa9_extra_costs =
1043 {
1044 /* ALU */
1045 {
1046 0, /* arith. */
1047 0, /* logical. */
1048 0, /* shift. */
1049 COSTS_N_INSNS (1), /* shift_reg. */
1050 COSTS_N_INSNS (1), /* arith_shift. */
1051 COSTS_N_INSNS (2), /* arith_shift_reg. */
1052 0, /* log_shift. */
1053 COSTS_N_INSNS (1), /* log_shift_reg. */
1054 COSTS_N_INSNS (1), /* extend. */
1055 COSTS_N_INSNS (2), /* extend_arith. */
1056 COSTS_N_INSNS (1), /* bfi. */
1057 COSTS_N_INSNS (1), /* bfx. */
1058 0, /* clz. */
1059 0, /* rev. */
1060 0, /* non_exec. */
1061 true /* non_exec_costs_exec. */
1062 },
1063 {
1064 /* MULT SImode */
1065 {
1066 COSTS_N_INSNS (3), /* simple. */
1067 COSTS_N_INSNS (3), /* flag_setting. */
1068 COSTS_N_INSNS (2), /* extend. */
1069 COSTS_N_INSNS (3), /* add. */
1070 COSTS_N_INSNS (2), /* extend_add. */
1071 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1072 },
1073 /* MULT DImode */
1074 {
1075 0, /* simple (N/A). */
1076 0, /* flag_setting (N/A). */
1077 COSTS_N_INSNS (4), /* extend. */
1078 0, /* add (N/A). */
1079 COSTS_N_INSNS (4), /* extend_add. */
1080 0 /* idiv (N/A). */
1081 }
1082 },
1083 /* LD/ST */
1084 {
1085 COSTS_N_INSNS (2), /* load. */
1086 COSTS_N_INSNS (2), /* load_sign_extend. */
1087 COSTS_N_INSNS (2), /* ldrd. */
1088 COSTS_N_INSNS (2), /* ldm_1st. */
1089 1, /* ldm_regs_per_insn_1st. */
1090 2, /* ldm_regs_per_insn_subsequent. */
1091 COSTS_N_INSNS (5), /* loadf. */
1092 COSTS_N_INSNS (5), /* loadd. */
1093 COSTS_N_INSNS (1), /* load_unaligned. */
1094 COSTS_N_INSNS (2), /* store. */
1095 COSTS_N_INSNS (2), /* strd. */
1096 COSTS_N_INSNS (2), /* stm_1st. */
1097 1, /* stm_regs_per_insn_1st. */
1098 2, /* stm_regs_per_insn_subsequent. */
1099 COSTS_N_INSNS (1), /* storef. */
1100 COSTS_N_INSNS (1), /* stored. */
1101 COSTS_N_INSNS (1), /* store_unaligned. */
1102 COSTS_N_INSNS (1), /* loadv. */
1103 COSTS_N_INSNS (1) /* storev. */
1104 },
1105 {
1106 /* FP SFmode */
1107 {
1108 COSTS_N_INSNS (14), /* div. */
1109 COSTS_N_INSNS (4), /* mult. */
1110 COSTS_N_INSNS (7), /* mult_addsub. */
1111 COSTS_N_INSNS (30), /* fma. */
1112 COSTS_N_INSNS (3), /* addsub. */
1113 COSTS_N_INSNS (1), /* fpconst. */
1114 COSTS_N_INSNS (1), /* neg. */
1115 COSTS_N_INSNS (3), /* compare. */
1116 COSTS_N_INSNS (3), /* widen. */
1117 COSTS_N_INSNS (3), /* narrow. */
1118 COSTS_N_INSNS (3), /* toint. */
1119 COSTS_N_INSNS (3), /* fromint. */
1120 COSTS_N_INSNS (3) /* roundint. */
1121 },
1122 /* FP DFmode */
1123 {
1124 COSTS_N_INSNS (24), /* div. */
1125 COSTS_N_INSNS (5), /* mult. */
1126 COSTS_N_INSNS (8), /* mult_addsub. */
1127 COSTS_N_INSNS (30), /* fma. */
1128 COSTS_N_INSNS (3), /* addsub. */
1129 COSTS_N_INSNS (1), /* fpconst. */
1130 COSTS_N_INSNS (1), /* neg. */
1131 COSTS_N_INSNS (3), /* compare. */
1132 COSTS_N_INSNS (3), /* widen. */
1133 COSTS_N_INSNS (3), /* narrow. */
1134 COSTS_N_INSNS (3), /* toint. */
1135 COSTS_N_INSNS (3), /* fromint. */
1136 COSTS_N_INSNS (3) /* roundint. */
1137 }
1138 },
1139 /* Vector */
1140 {
1141 COSTS_N_INSNS (1) /* alu. */
1142 }
1143 };
1144
1145 const struct cpu_cost_table cortexa8_extra_costs =
1146 {
1147 /* ALU */
1148 {
1149 0, /* arith. */
1150 0, /* logical. */
1151 COSTS_N_INSNS (1), /* shift. */
1152 0, /* shift_reg. */
1153 COSTS_N_INSNS (1), /* arith_shift. */
1154 0, /* arith_shift_reg. */
1155 COSTS_N_INSNS (1), /* log_shift. */
1156 0, /* log_shift_reg. */
1157 0, /* extend. */
1158 0, /* extend_arith. */
1159 0, /* bfi. */
1160 0, /* bfx. */
1161 0, /* clz. */
1162 0, /* rev. */
1163 0, /* non_exec. */
1164 true /* non_exec_costs_exec. */
1165 },
1166 {
1167 /* MULT SImode */
1168 {
1169 COSTS_N_INSNS (1), /* simple. */
1170 COSTS_N_INSNS (1), /* flag_setting. */
1171 COSTS_N_INSNS (1), /* extend. */
1172 COSTS_N_INSNS (1), /* add. */
1173 COSTS_N_INSNS (1), /* extend_add. */
1174 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1175 },
1176 /* MULT DImode */
1177 {
1178 0, /* simple (N/A). */
1179 0, /* flag_setting (N/A). */
1180 COSTS_N_INSNS (2), /* extend. */
1181 0, /* add (N/A). */
1182 COSTS_N_INSNS (2), /* extend_add. */
1183 0 /* idiv (N/A). */
1184 }
1185 },
1186 /* LD/ST */
1187 {
1188 COSTS_N_INSNS (1), /* load. */
1189 COSTS_N_INSNS (1), /* load_sign_extend. */
1190 COSTS_N_INSNS (1), /* ldrd. */
1191 COSTS_N_INSNS (1), /* ldm_1st. */
1192 1, /* ldm_regs_per_insn_1st. */
1193 2, /* ldm_regs_per_insn_subsequent. */
1194 COSTS_N_INSNS (1), /* loadf. */
1195 COSTS_N_INSNS (1), /* loadd. */
1196 COSTS_N_INSNS (1), /* load_unaligned. */
1197 COSTS_N_INSNS (1), /* store. */
1198 COSTS_N_INSNS (1), /* strd. */
1199 COSTS_N_INSNS (1), /* stm_1st. */
1200 1, /* stm_regs_per_insn_1st. */
1201 2, /* stm_regs_per_insn_subsequent. */
1202 COSTS_N_INSNS (1), /* storef. */
1203 COSTS_N_INSNS (1), /* stored. */
1204 COSTS_N_INSNS (1), /* store_unaligned. */
1205 COSTS_N_INSNS (1), /* loadv. */
1206 COSTS_N_INSNS (1) /* storev. */
1207 },
1208 {
1209 /* FP SFmode */
1210 {
1211 COSTS_N_INSNS (36), /* div. */
1212 COSTS_N_INSNS (11), /* mult. */
1213 COSTS_N_INSNS (20), /* mult_addsub. */
1214 COSTS_N_INSNS (30), /* fma. */
1215 COSTS_N_INSNS (9), /* addsub. */
1216 COSTS_N_INSNS (3), /* fpconst. */
1217 COSTS_N_INSNS (3), /* neg. */
1218 COSTS_N_INSNS (6), /* compare. */
1219 COSTS_N_INSNS (4), /* widen. */
1220 COSTS_N_INSNS (4), /* narrow. */
1221 COSTS_N_INSNS (8), /* toint. */
1222 COSTS_N_INSNS (8), /* fromint. */
1223 COSTS_N_INSNS (8) /* roundint. */
1224 },
1225 /* FP DFmode */
1226 {
1227 COSTS_N_INSNS (64), /* div. */
1228 COSTS_N_INSNS (16), /* mult. */
1229 COSTS_N_INSNS (25), /* mult_addsub. */
1230 COSTS_N_INSNS (30), /* fma. */
1231 COSTS_N_INSNS (9), /* addsub. */
1232 COSTS_N_INSNS (3), /* fpconst. */
1233 COSTS_N_INSNS (3), /* neg. */
1234 COSTS_N_INSNS (6), /* compare. */
1235 COSTS_N_INSNS (6), /* widen. */
1236 COSTS_N_INSNS (6), /* narrow. */
1237 COSTS_N_INSNS (8), /* toint. */
1238 COSTS_N_INSNS (8), /* fromint. */
1239 COSTS_N_INSNS (8) /* roundint. */
1240 }
1241 },
1242 /* Vector */
1243 {
1244 COSTS_N_INSNS (1) /* alu. */
1245 }
1246 };
1247
1248 const struct cpu_cost_table cortexa5_extra_costs =
1249 {
1250 /* ALU */
1251 {
1252 0, /* arith. */
1253 0, /* logical. */
1254 COSTS_N_INSNS (1), /* shift. */
1255 COSTS_N_INSNS (1), /* shift_reg. */
1256 COSTS_N_INSNS (1), /* arith_shift. */
1257 COSTS_N_INSNS (1), /* arith_shift_reg. */
1258 COSTS_N_INSNS (1), /* log_shift. */
1259 COSTS_N_INSNS (1), /* log_shift_reg. */
1260 COSTS_N_INSNS (1), /* extend. */
1261 COSTS_N_INSNS (1), /* extend_arith. */
1262 COSTS_N_INSNS (1), /* bfi. */
1263 COSTS_N_INSNS (1), /* bfx. */
1264 COSTS_N_INSNS (1), /* clz. */
1265 COSTS_N_INSNS (1), /* rev. */
1266 0, /* non_exec. */
1267 true /* non_exec_costs_exec. */
1268 },
1269
1270 {
1271 /* MULT SImode */
1272 {
1273 0, /* simple. */
1274 COSTS_N_INSNS (1), /* flag_setting. */
1275 COSTS_N_INSNS (1), /* extend. */
1276 COSTS_N_INSNS (1), /* add. */
1277 COSTS_N_INSNS (1), /* extend_add. */
1278 COSTS_N_INSNS (7) /* idiv. */
1279 },
1280 /* MULT DImode */
1281 {
1282 0, /* simple (N/A). */
1283 0, /* flag_setting (N/A). */
1284 COSTS_N_INSNS (1), /* extend. */
1285 0, /* add. */
1286 COSTS_N_INSNS (2), /* extend_add. */
1287 0 /* idiv (N/A). */
1288 }
1289 },
1290 /* LD/ST */
1291 {
1292 COSTS_N_INSNS (1), /* load. */
1293 COSTS_N_INSNS (1), /* load_sign_extend. */
1294 COSTS_N_INSNS (6), /* ldrd. */
1295 COSTS_N_INSNS (1), /* ldm_1st. */
1296 1, /* ldm_regs_per_insn_1st. */
1297 2, /* ldm_regs_per_insn_subsequent. */
1298 COSTS_N_INSNS (2), /* loadf. */
1299 COSTS_N_INSNS (4), /* loadd. */
1300 COSTS_N_INSNS (1), /* load_unaligned. */
1301 COSTS_N_INSNS (1), /* store. */
1302 COSTS_N_INSNS (3), /* strd. */
1303 COSTS_N_INSNS (1), /* stm_1st. */
1304 1, /* stm_regs_per_insn_1st. */
1305 2, /* stm_regs_per_insn_subsequent. */
1306 COSTS_N_INSNS (2), /* storef. */
1307 COSTS_N_INSNS (2), /* stored. */
1308 COSTS_N_INSNS (1), /* store_unaligned. */
1309 COSTS_N_INSNS (1), /* loadv. */
1310 COSTS_N_INSNS (1) /* storev. */
1311 },
1312 {
1313 /* FP SFmode */
1314 {
1315 COSTS_N_INSNS (15), /* div. */
1316 COSTS_N_INSNS (3), /* mult. */
1317 COSTS_N_INSNS (7), /* mult_addsub. */
1318 COSTS_N_INSNS (7), /* fma. */
1319 COSTS_N_INSNS (3), /* addsub. */
1320 COSTS_N_INSNS (3), /* fpconst. */
1321 COSTS_N_INSNS (3), /* neg. */
1322 COSTS_N_INSNS (3), /* compare. */
1323 COSTS_N_INSNS (3), /* widen. */
1324 COSTS_N_INSNS (3), /* narrow. */
1325 COSTS_N_INSNS (3), /* toint. */
1326 COSTS_N_INSNS (3), /* fromint. */
1327 COSTS_N_INSNS (3) /* roundint. */
1328 },
1329 /* FP DFmode */
1330 {
1331 COSTS_N_INSNS (30), /* div. */
1332 COSTS_N_INSNS (6), /* mult. */
1333 COSTS_N_INSNS (10), /* mult_addsub. */
1334 COSTS_N_INSNS (7), /* fma. */
1335 COSTS_N_INSNS (3), /* addsub. */
1336 COSTS_N_INSNS (3), /* fpconst. */
1337 COSTS_N_INSNS (3), /* neg. */
1338 COSTS_N_INSNS (3), /* compare. */
1339 COSTS_N_INSNS (3), /* widen. */
1340 COSTS_N_INSNS (3), /* narrow. */
1341 COSTS_N_INSNS (3), /* toint. */
1342 COSTS_N_INSNS (3), /* fromint. */
1343 COSTS_N_INSNS (3) /* roundint. */
1344 }
1345 },
1346 /* Vector */
1347 {
1348 COSTS_N_INSNS (1) /* alu. */
1349 }
1350 };
1351
1352
1353 const struct cpu_cost_table cortexa7_extra_costs =
1354 {
1355 /* ALU */
1356 {
1357 0, /* arith. */
1358 0, /* logical. */
1359 COSTS_N_INSNS (1), /* shift. */
1360 COSTS_N_INSNS (1), /* shift_reg. */
1361 COSTS_N_INSNS (1), /* arith_shift. */
1362 COSTS_N_INSNS (1), /* arith_shift_reg. */
1363 COSTS_N_INSNS (1), /* log_shift. */
1364 COSTS_N_INSNS (1), /* log_shift_reg. */
1365 COSTS_N_INSNS (1), /* extend. */
1366 COSTS_N_INSNS (1), /* extend_arith. */
1367 COSTS_N_INSNS (1), /* bfi. */
1368 COSTS_N_INSNS (1), /* bfx. */
1369 COSTS_N_INSNS (1), /* clz. */
1370 COSTS_N_INSNS (1), /* rev. */
1371 0, /* non_exec. */
1372 true /* non_exec_costs_exec. */
1373 },
1374
1375 {
1376 /* MULT SImode */
1377 {
1378 0, /* simple. */
1379 COSTS_N_INSNS (1), /* flag_setting. */
1380 COSTS_N_INSNS (1), /* extend. */
1381 COSTS_N_INSNS (1), /* add. */
1382 COSTS_N_INSNS (1), /* extend_add. */
1383 COSTS_N_INSNS (7) /* idiv. */
1384 },
1385 /* MULT DImode */
1386 {
1387 0, /* simple (N/A). */
1388 0, /* flag_setting (N/A). */
1389 COSTS_N_INSNS (1), /* extend. */
1390 0, /* add. */
1391 COSTS_N_INSNS (2), /* extend_add. */
1392 0 /* idiv (N/A). */
1393 }
1394 },
1395 /* LD/ST */
1396 {
1397 COSTS_N_INSNS (1), /* load. */
1398 COSTS_N_INSNS (1), /* load_sign_extend. */
1399 COSTS_N_INSNS (3), /* ldrd. */
1400 COSTS_N_INSNS (1), /* ldm_1st. */
1401 1, /* ldm_regs_per_insn_1st. */
1402 2, /* ldm_regs_per_insn_subsequent. */
1403 COSTS_N_INSNS (2), /* loadf. */
1404 COSTS_N_INSNS (2), /* loadd. */
1405 COSTS_N_INSNS (1), /* load_unaligned. */
1406 COSTS_N_INSNS (1), /* store. */
1407 COSTS_N_INSNS (3), /* strd. */
1408 COSTS_N_INSNS (1), /* stm_1st. */
1409 1, /* stm_regs_per_insn_1st. */
1410 2, /* stm_regs_per_insn_subsequent. */
1411 COSTS_N_INSNS (2), /* storef. */
1412 COSTS_N_INSNS (2), /* stored. */
1413 COSTS_N_INSNS (1), /* store_unaligned. */
1414 COSTS_N_INSNS (1), /* loadv. */
1415 COSTS_N_INSNS (1) /* storev. */
1416 },
1417 {
1418 /* FP SFmode */
1419 {
1420 COSTS_N_INSNS (15), /* div. */
1421 COSTS_N_INSNS (3), /* mult. */
1422 COSTS_N_INSNS (7), /* mult_addsub. */
1423 COSTS_N_INSNS (7), /* fma. */
1424 COSTS_N_INSNS (3), /* addsub. */
1425 COSTS_N_INSNS (3), /* fpconst. */
1426 COSTS_N_INSNS (3), /* neg. */
1427 COSTS_N_INSNS (3), /* compare. */
1428 COSTS_N_INSNS (3), /* widen. */
1429 COSTS_N_INSNS (3), /* narrow. */
1430 COSTS_N_INSNS (3), /* toint. */
1431 COSTS_N_INSNS (3), /* fromint. */
1432 COSTS_N_INSNS (3) /* roundint. */
1433 },
1434 /* FP DFmode */
1435 {
1436 COSTS_N_INSNS (30), /* div. */
1437 COSTS_N_INSNS (6), /* mult. */
1438 COSTS_N_INSNS (10), /* mult_addsub. */
1439 COSTS_N_INSNS (7), /* fma. */
1440 COSTS_N_INSNS (3), /* addsub. */
1441 COSTS_N_INSNS (3), /* fpconst. */
1442 COSTS_N_INSNS (3), /* neg. */
1443 COSTS_N_INSNS (3), /* compare. */
1444 COSTS_N_INSNS (3), /* widen. */
1445 COSTS_N_INSNS (3), /* narrow. */
1446 COSTS_N_INSNS (3), /* toint. */
1447 COSTS_N_INSNS (3), /* fromint. */
1448 COSTS_N_INSNS (3) /* roundint. */
1449 }
1450 },
1451 /* Vector */
1452 {
1453 COSTS_N_INSNS (1) /* alu. */
1454 }
1455 };
1456
1457 const struct cpu_cost_table cortexa12_extra_costs =
1458 {
1459 /* ALU */
1460 {
1461 0, /* arith. */
1462 0, /* logical. */
1463 0, /* shift. */
1464 COSTS_N_INSNS (1), /* shift_reg. */
1465 COSTS_N_INSNS (1), /* arith_shift. */
1466 COSTS_N_INSNS (1), /* arith_shift_reg. */
1467 COSTS_N_INSNS (1), /* log_shift. */
1468 COSTS_N_INSNS (1), /* log_shift_reg. */
1469 0, /* extend. */
1470 COSTS_N_INSNS (1), /* extend_arith. */
1471 0, /* bfi. */
1472 COSTS_N_INSNS (1), /* bfx. */
1473 COSTS_N_INSNS (1), /* clz. */
1474 COSTS_N_INSNS (1), /* rev. */
1475 0, /* non_exec. */
1476 true /* non_exec_costs_exec. */
1477 },
1478 /* MULT SImode */
1479 {
1480 {
1481 COSTS_N_INSNS (2), /* simple. */
1482 COSTS_N_INSNS (3), /* flag_setting. */
1483 COSTS_N_INSNS (2), /* extend. */
1484 COSTS_N_INSNS (3), /* add. */
1485 COSTS_N_INSNS (2), /* extend_add. */
1486 COSTS_N_INSNS (18) /* idiv. */
1487 },
1488 /* MULT DImode */
1489 {
1490 0, /* simple (N/A). */
1491 0, /* flag_setting (N/A). */
1492 COSTS_N_INSNS (3), /* extend. */
1493 0, /* add (N/A). */
1494 COSTS_N_INSNS (3), /* extend_add. */
1495 0 /* idiv (N/A). */
1496 }
1497 },
1498 /* LD/ST */
1499 {
1500 COSTS_N_INSNS (3), /* load. */
1501 COSTS_N_INSNS (3), /* load_sign_extend. */
1502 COSTS_N_INSNS (3), /* ldrd. */
1503 COSTS_N_INSNS (3), /* ldm_1st. */
1504 1, /* ldm_regs_per_insn_1st. */
1505 2, /* ldm_regs_per_insn_subsequent. */
1506 COSTS_N_INSNS (3), /* loadf. */
1507 COSTS_N_INSNS (3), /* loadd. */
1508 0, /* load_unaligned. */
1509 0, /* store. */
1510 0, /* strd. */
1511 0, /* stm_1st. */
1512 1, /* stm_regs_per_insn_1st. */
1513 2, /* stm_regs_per_insn_subsequent. */
1514 COSTS_N_INSNS (2), /* storef. */
1515 COSTS_N_INSNS (2), /* stored. */
1516 0, /* store_unaligned. */
1517 COSTS_N_INSNS (1), /* loadv. */
1518 COSTS_N_INSNS (1) /* storev. */
1519 },
1520 {
1521 /* FP SFmode */
1522 {
1523 COSTS_N_INSNS (17), /* div. */
1524 COSTS_N_INSNS (4), /* mult. */
1525 COSTS_N_INSNS (8), /* mult_addsub. */
1526 COSTS_N_INSNS (8), /* fma. */
1527 COSTS_N_INSNS (4), /* addsub. */
1528 COSTS_N_INSNS (2), /* fpconst. */
1529 COSTS_N_INSNS (2), /* neg. */
1530 COSTS_N_INSNS (2), /* compare. */
1531 COSTS_N_INSNS (4), /* widen. */
1532 COSTS_N_INSNS (4), /* narrow. */
1533 COSTS_N_INSNS (4), /* toint. */
1534 COSTS_N_INSNS (4), /* fromint. */
1535 COSTS_N_INSNS (4) /* roundint. */
1536 },
1537 /* FP DFmode */
1538 {
1539 COSTS_N_INSNS (31), /* div. */
1540 COSTS_N_INSNS (4), /* mult. */
1541 COSTS_N_INSNS (8), /* mult_addsub. */
1542 COSTS_N_INSNS (8), /* fma. */
1543 COSTS_N_INSNS (4), /* addsub. */
1544 COSTS_N_INSNS (2), /* fpconst. */
1545 COSTS_N_INSNS (2), /* neg. */
1546 COSTS_N_INSNS (2), /* compare. */
1547 COSTS_N_INSNS (4), /* widen. */
1548 COSTS_N_INSNS (4), /* narrow. */
1549 COSTS_N_INSNS (4), /* toint. */
1550 COSTS_N_INSNS (4), /* fromint. */
1551 COSTS_N_INSNS (4) /* roundint. */
1552 }
1553 },
1554 /* Vector */
1555 {
1556 COSTS_N_INSNS (1) /* alu. */
1557 }
1558 };
1559
1560 const struct cpu_cost_table cortexa15_extra_costs =
1561 {
1562 /* ALU */
1563 {
1564 0, /* arith. */
1565 0, /* logical. */
1566 0, /* shift. */
1567 0, /* shift_reg. */
1568 COSTS_N_INSNS (1), /* arith_shift. */
1569 COSTS_N_INSNS (1), /* arith_shift_reg. */
1570 COSTS_N_INSNS (1), /* log_shift. */
1571 COSTS_N_INSNS (1), /* log_shift_reg. */
1572 0, /* extend. */
1573 COSTS_N_INSNS (1), /* extend_arith. */
1574 COSTS_N_INSNS (1), /* bfi. */
1575 0, /* bfx. */
1576 0, /* clz. */
1577 0, /* rev. */
1578 0, /* non_exec. */
1579 true /* non_exec_costs_exec. */
1580 },
1581 /* MULT SImode */
1582 {
1583 {
1584 COSTS_N_INSNS (2), /* simple. */
1585 COSTS_N_INSNS (3), /* flag_setting. */
1586 COSTS_N_INSNS (2), /* extend. */
1587 COSTS_N_INSNS (2), /* add. */
1588 COSTS_N_INSNS (2), /* extend_add. */
1589 COSTS_N_INSNS (18) /* idiv. */
1590 },
1591 /* MULT DImode */
1592 {
1593 0, /* simple (N/A). */
1594 0, /* flag_setting (N/A). */
1595 COSTS_N_INSNS (3), /* extend. */
1596 0, /* add (N/A). */
1597 COSTS_N_INSNS (3), /* extend_add. */
1598 0 /* idiv (N/A). */
1599 }
1600 },
1601 /* LD/ST */
1602 {
1603 COSTS_N_INSNS (3), /* load. */
1604 COSTS_N_INSNS (3), /* load_sign_extend. */
1605 COSTS_N_INSNS (3), /* ldrd. */
1606 COSTS_N_INSNS (4), /* ldm_1st. */
1607 1, /* ldm_regs_per_insn_1st. */
1608 2, /* ldm_regs_per_insn_subsequent. */
1609 COSTS_N_INSNS (4), /* loadf. */
1610 COSTS_N_INSNS (4), /* loadd. */
1611 0, /* load_unaligned. */
1612 0, /* store. */
1613 0, /* strd. */
1614 COSTS_N_INSNS (1), /* stm_1st. */
1615 1, /* stm_regs_per_insn_1st. */
1616 2, /* stm_regs_per_insn_subsequent. */
1617 0, /* storef. */
1618 0, /* stored. */
1619 0, /* store_unaligned. */
1620 COSTS_N_INSNS (1), /* loadv. */
1621 COSTS_N_INSNS (1) /* storev. */
1622 },
1623 {
1624 /* FP SFmode */
1625 {
1626 COSTS_N_INSNS (17), /* div. */
1627 COSTS_N_INSNS (4), /* mult. */
1628 COSTS_N_INSNS (8), /* mult_addsub. */
1629 COSTS_N_INSNS (8), /* fma. */
1630 COSTS_N_INSNS (4), /* addsub. */
1631 COSTS_N_INSNS (2), /* fpconst. */
1632 COSTS_N_INSNS (2), /* neg. */
1633 COSTS_N_INSNS (5), /* compare. */
1634 COSTS_N_INSNS (4), /* widen. */
1635 COSTS_N_INSNS (4), /* narrow. */
1636 COSTS_N_INSNS (4), /* toint. */
1637 COSTS_N_INSNS (4), /* fromint. */
1638 COSTS_N_INSNS (4) /* roundint. */
1639 },
1640 /* FP DFmode */
1641 {
1642 COSTS_N_INSNS (31), /* div. */
1643 COSTS_N_INSNS (4), /* mult. */
1644 COSTS_N_INSNS (8), /* mult_addsub. */
1645 COSTS_N_INSNS (8), /* fma. */
1646 COSTS_N_INSNS (4), /* addsub. */
1647 COSTS_N_INSNS (2), /* fpconst. */
1648 COSTS_N_INSNS (2), /* neg. */
1649 COSTS_N_INSNS (2), /* compare. */
1650 COSTS_N_INSNS (4), /* widen. */
1651 COSTS_N_INSNS (4), /* narrow. */
1652 COSTS_N_INSNS (4), /* toint. */
1653 COSTS_N_INSNS (4), /* fromint. */
1654 COSTS_N_INSNS (4) /* roundint. */
1655 }
1656 },
1657 /* Vector */
1658 {
1659 COSTS_N_INSNS (1) /* alu. */
1660 }
1661 };
1662
1663 const struct cpu_cost_table v7m_extra_costs =
1664 {
1665 /* ALU */
1666 {
1667 0, /* arith. */
1668 0, /* logical. */
1669 0, /* shift. */
1670 0, /* shift_reg. */
1671 0, /* arith_shift. */
1672 COSTS_N_INSNS (1), /* arith_shift_reg. */
1673 0, /* log_shift. */
1674 COSTS_N_INSNS (1), /* log_shift_reg. */
1675 0, /* extend. */
1676 COSTS_N_INSNS (1), /* extend_arith. */
1677 0, /* bfi. */
1678 0, /* bfx. */
1679 0, /* clz. */
1680 0, /* rev. */
1681 COSTS_N_INSNS (1), /* non_exec. */
1682 false /* non_exec_costs_exec. */
1683 },
1684 {
1685 /* MULT SImode */
1686 {
1687 COSTS_N_INSNS (1), /* simple. */
1688 COSTS_N_INSNS (1), /* flag_setting. */
1689 COSTS_N_INSNS (2), /* extend. */
1690 COSTS_N_INSNS (1), /* add. */
1691 COSTS_N_INSNS (3), /* extend_add. */
1692 COSTS_N_INSNS (8) /* idiv. */
1693 },
1694 /* MULT DImode */
1695 {
1696 0, /* simple (N/A). */
1697 0, /* flag_setting (N/A). */
1698 COSTS_N_INSNS (2), /* extend. */
1699 0, /* add (N/A). */
1700 COSTS_N_INSNS (3), /* extend_add. */
1701 0 /* idiv (N/A). */
1702 }
1703 },
1704 /* LD/ST */
1705 {
1706 COSTS_N_INSNS (2), /* load. */
1707 0, /* load_sign_extend. */
1708 COSTS_N_INSNS (3), /* ldrd. */
1709 COSTS_N_INSNS (2), /* ldm_1st. */
1710 1, /* ldm_regs_per_insn_1st. */
1711 1, /* ldm_regs_per_insn_subsequent. */
1712 COSTS_N_INSNS (2), /* loadf. */
1713 COSTS_N_INSNS (3), /* loadd. */
1714 COSTS_N_INSNS (1), /* load_unaligned. */
1715 COSTS_N_INSNS (2), /* store. */
1716 COSTS_N_INSNS (3), /* strd. */
1717 COSTS_N_INSNS (2), /* stm_1st. */
1718 1, /* stm_regs_per_insn_1st. */
1719 1, /* stm_regs_per_insn_subsequent. */
1720 COSTS_N_INSNS (2), /* storef. */
1721 COSTS_N_INSNS (3), /* stored. */
1722 COSTS_N_INSNS (1), /* store_unaligned. */
1723 COSTS_N_INSNS (1), /* loadv. */
1724 COSTS_N_INSNS (1) /* storev. */
1725 },
1726 {
1727 /* FP SFmode */
1728 {
1729 COSTS_N_INSNS (7), /* div. */
1730 COSTS_N_INSNS (2), /* mult. */
1731 COSTS_N_INSNS (5), /* mult_addsub. */
1732 COSTS_N_INSNS (3), /* fma. */
1733 COSTS_N_INSNS (1), /* addsub. */
1734 0, /* fpconst. */
1735 0, /* neg. */
1736 0, /* compare. */
1737 0, /* widen. */
1738 0, /* narrow. */
1739 0, /* toint. */
1740 0, /* fromint. */
1741 0 /* roundint. */
1742 },
1743 /* FP DFmode */
1744 {
1745 COSTS_N_INSNS (15), /* div. */
1746 COSTS_N_INSNS (5), /* mult. */
1747 COSTS_N_INSNS (7), /* mult_addsub. */
1748 COSTS_N_INSNS (7), /* fma. */
1749 COSTS_N_INSNS (3), /* addsub. */
1750 0, /* fpconst. */
1751 0, /* neg. */
1752 0, /* compare. */
1753 0, /* widen. */
1754 0, /* narrow. */
1755 0, /* toint. */
1756 0, /* fromint. */
1757 0 /* roundint. */
1758 }
1759 },
1760 /* Vector */
1761 {
1762 COSTS_N_INSNS (1) /* alu. */
1763 }
1764 };
1765
1766 const struct tune_params arm_slowmul_tune =
1767 {
1768 &generic_extra_costs, /* Insn extra costs. */
1769 NULL, /* Sched adj cost. */
1770 arm_default_branch_cost,
1771 &arm_default_vec_cost,
1772 3, /* Constant limit. */
1773 5, /* Max cond insns. */
1774 8, /* Memset max inline. */
1775 1, /* Issue rate. */
1776 ARM_PREFETCH_NOT_BENEFICIAL,
1777 tune_params::PREF_CONST_POOL_TRUE,
1778 tune_params::PREF_LDRD_FALSE,
1779 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1780 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1781 tune_params::DISPARAGE_FLAGS_NEITHER,
1782 tune_params::PREF_NEON_64_FALSE,
1783 tune_params::PREF_NEON_STRINGOPS_FALSE,
1784 tune_params::FUSE_NOTHING,
1785 tune_params::SCHED_AUTOPREF_OFF
1786 };
1787
1788 const struct tune_params arm_fastmul_tune =
1789 {
1790 &generic_extra_costs, /* Insn extra costs. */
1791 NULL, /* Sched adj cost. */
1792 arm_default_branch_cost,
1793 &arm_default_vec_cost,
1794 1, /* Constant limit. */
1795 5, /* Max cond insns. */
1796 8, /* Memset max inline. */
1797 1, /* Issue rate. */
1798 ARM_PREFETCH_NOT_BENEFICIAL,
1799 tune_params::PREF_CONST_POOL_TRUE,
1800 tune_params::PREF_LDRD_FALSE,
1801 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1802 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1803 tune_params::DISPARAGE_FLAGS_NEITHER,
1804 tune_params::PREF_NEON_64_FALSE,
1805 tune_params::PREF_NEON_STRINGOPS_FALSE,
1806 tune_params::FUSE_NOTHING,
1807 tune_params::SCHED_AUTOPREF_OFF
1808 };
1809
1810 /* StrongARM has early execution of branches, so a sequence that is worth
1811 skipping is shorter. Set max_insns_skipped to a lower value. */
1812
1813 const struct tune_params arm_strongarm_tune =
1814 {
1815 &generic_extra_costs, /* Insn extra costs. */
1816 NULL, /* Sched adj cost. */
1817 arm_default_branch_cost,
1818 &arm_default_vec_cost,
1819 1, /* Constant limit. */
1820 3, /* Max cond insns. */
1821 8, /* Memset max inline. */
1822 1, /* Issue rate. */
1823 ARM_PREFETCH_NOT_BENEFICIAL,
1824 tune_params::PREF_CONST_POOL_TRUE,
1825 tune_params::PREF_LDRD_FALSE,
1826 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1827 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1828 tune_params::DISPARAGE_FLAGS_NEITHER,
1829 tune_params::PREF_NEON_64_FALSE,
1830 tune_params::PREF_NEON_STRINGOPS_FALSE,
1831 tune_params::FUSE_NOTHING,
1832 tune_params::SCHED_AUTOPREF_OFF
1833 };
1834
1835 const struct tune_params arm_xscale_tune =
1836 {
1837 &generic_extra_costs, /* Insn extra costs. */
1838 xscale_sched_adjust_cost,
1839 arm_default_branch_cost,
1840 &arm_default_vec_cost,
1841 2, /* Constant limit. */
1842 3, /* Max cond insns. */
1843 8, /* Memset max inline. */
1844 1, /* Issue rate. */
1845 ARM_PREFETCH_NOT_BENEFICIAL,
1846 tune_params::PREF_CONST_POOL_TRUE,
1847 tune_params::PREF_LDRD_FALSE,
1848 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1849 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1850 tune_params::DISPARAGE_FLAGS_NEITHER,
1851 tune_params::PREF_NEON_64_FALSE,
1852 tune_params::PREF_NEON_STRINGOPS_FALSE,
1853 tune_params::FUSE_NOTHING,
1854 tune_params::SCHED_AUTOPREF_OFF
1855 };
1856
1857 const struct tune_params arm_9e_tune =
1858 {
1859 &generic_extra_costs, /* Insn extra costs. */
1860 NULL, /* Sched adj cost. */
1861 arm_default_branch_cost,
1862 &arm_default_vec_cost,
1863 1, /* Constant limit. */
1864 5, /* Max cond insns. */
1865 8, /* Memset max inline. */
1866 1, /* Issue rate. */
1867 ARM_PREFETCH_NOT_BENEFICIAL,
1868 tune_params::PREF_CONST_POOL_TRUE,
1869 tune_params::PREF_LDRD_FALSE,
1870 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1871 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1872 tune_params::DISPARAGE_FLAGS_NEITHER,
1873 tune_params::PREF_NEON_64_FALSE,
1874 tune_params::PREF_NEON_STRINGOPS_FALSE,
1875 tune_params::FUSE_NOTHING,
1876 tune_params::SCHED_AUTOPREF_OFF
1877 };
1878
1879 const struct tune_params arm_marvell_pj4_tune =
1880 {
1881 &generic_extra_costs, /* Insn extra costs. */
1882 NULL, /* Sched adj cost. */
1883 arm_default_branch_cost,
1884 &arm_default_vec_cost,
1885 1, /* Constant limit. */
1886 5, /* Max cond insns. */
1887 8, /* Memset max inline. */
1888 2, /* Issue rate. */
1889 ARM_PREFETCH_NOT_BENEFICIAL,
1890 tune_params::PREF_CONST_POOL_TRUE,
1891 tune_params::PREF_LDRD_FALSE,
1892 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1893 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1894 tune_params::DISPARAGE_FLAGS_NEITHER,
1895 tune_params::PREF_NEON_64_FALSE,
1896 tune_params::PREF_NEON_STRINGOPS_FALSE,
1897 tune_params::FUSE_NOTHING,
1898 tune_params::SCHED_AUTOPREF_OFF
1899 };
1900
1901 const struct tune_params arm_v6t2_tune =
1902 {
1903 &generic_extra_costs, /* Insn extra costs. */
1904 NULL, /* Sched adj cost. */
1905 arm_default_branch_cost,
1906 &arm_default_vec_cost,
1907 1, /* Constant limit. */
1908 5, /* Max cond insns. */
1909 8, /* Memset max inline. */
1910 1, /* Issue rate. */
1911 ARM_PREFETCH_NOT_BENEFICIAL,
1912 tune_params::PREF_CONST_POOL_FALSE,
1913 tune_params::PREF_LDRD_FALSE,
1914 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1915 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1916 tune_params::DISPARAGE_FLAGS_NEITHER,
1917 tune_params::PREF_NEON_64_FALSE,
1918 tune_params::PREF_NEON_STRINGOPS_FALSE,
1919 tune_params::FUSE_NOTHING,
1920 tune_params::SCHED_AUTOPREF_OFF
1921 };
1922
1923
1924 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1925 const struct tune_params arm_cortex_tune =
1926 {
1927 &generic_extra_costs,
1928 NULL, /* Sched adj cost. */
1929 arm_default_branch_cost,
1930 &arm_default_vec_cost,
1931 1, /* Constant limit. */
1932 5, /* Max cond insns. */
1933 8, /* Memset max inline. */
1934 2, /* Issue rate. */
1935 ARM_PREFETCH_NOT_BENEFICIAL,
1936 tune_params::PREF_CONST_POOL_FALSE,
1937 tune_params::PREF_LDRD_FALSE,
1938 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1939 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1940 tune_params::DISPARAGE_FLAGS_NEITHER,
1941 tune_params::PREF_NEON_64_FALSE,
1942 tune_params::PREF_NEON_STRINGOPS_FALSE,
1943 tune_params::FUSE_NOTHING,
1944 tune_params::SCHED_AUTOPREF_OFF
1945 };
1946
1947 const struct tune_params arm_cortex_a8_tune =
1948 {
1949 &cortexa8_extra_costs,
1950 NULL, /* Sched adj cost. */
1951 arm_default_branch_cost,
1952 &arm_default_vec_cost,
1953 1, /* Constant limit. */
1954 5, /* Max cond insns. */
1955 8, /* Memset max inline. */
1956 2, /* Issue rate. */
1957 ARM_PREFETCH_NOT_BENEFICIAL,
1958 tune_params::PREF_CONST_POOL_FALSE,
1959 tune_params::PREF_LDRD_FALSE,
1960 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1961 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1962 tune_params::DISPARAGE_FLAGS_NEITHER,
1963 tune_params::PREF_NEON_64_FALSE,
1964 tune_params::PREF_NEON_STRINGOPS_TRUE,
1965 tune_params::FUSE_NOTHING,
1966 tune_params::SCHED_AUTOPREF_OFF
1967 };
1968
1969 const struct tune_params arm_cortex_a7_tune =
1970 {
1971 &cortexa7_extra_costs,
1972 NULL, /* Sched adj cost. */
1973 arm_default_branch_cost,
1974 &arm_default_vec_cost,
1975 1, /* Constant limit. */
1976 5, /* Max cond insns. */
1977 8, /* Memset max inline. */
1978 2, /* Issue rate. */
1979 ARM_PREFETCH_NOT_BENEFICIAL,
1980 tune_params::PREF_CONST_POOL_FALSE,
1981 tune_params::PREF_LDRD_FALSE,
1982 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1983 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1984 tune_params::DISPARAGE_FLAGS_NEITHER,
1985 tune_params::PREF_NEON_64_FALSE,
1986 tune_params::PREF_NEON_STRINGOPS_TRUE,
1987 tune_params::FUSE_NOTHING,
1988 tune_params::SCHED_AUTOPREF_OFF
1989 };
1990
1991 const struct tune_params arm_cortex_a15_tune =
1992 {
1993 &cortexa15_extra_costs,
1994 NULL, /* Sched adj cost. */
1995 arm_default_branch_cost,
1996 &arm_default_vec_cost,
1997 1, /* Constant limit. */
1998 2, /* Max cond insns. */
1999 8, /* Memset max inline. */
2000 3, /* Issue rate. */
2001 ARM_PREFETCH_NOT_BENEFICIAL,
2002 tune_params::PREF_CONST_POOL_FALSE,
2003 tune_params::PREF_LDRD_TRUE,
2004 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2005 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2006 tune_params::DISPARAGE_FLAGS_ALL,
2007 tune_params::PREF_NEON_64_FALSE,
2008 tune_params::PREF_NEON_STRINGOPS_TRUE,
2009 tune_params::FUSE_NOTHING,
2010 tune_params::SCHED_AUTOPREF_FULL
2011 };
2012
2013 const struct tune_params arm_cortex_a35_tune =
2014 {
2015 &cortexa53_extra_costs,
2016 NULL, /* Sched adj cost. */
2017 arm_default_branch_cost,
2018 &arm_default_vec_cost,
2019 1, /* Constant limit. */
2020 5, /* Max cond insns. */
2021 8, /* Memset max inline. */
2022 1, /* Issue rate. */
2023 ARM_PREFETCH_NOT_BENEFICIAL,
2024 tune_params::PREF_CONST_POOL_FALSE,
2025 tune_params::PREF_LDRD_FALSE,
2026 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2027 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2028 tune_params::DISPARAGE_FLAGS_NEITHER,
2029 tune_params::PREF_NEON_64_FALSE,
2030 tune_params::PREF_NEON_STRINGOPS_TRUE,
2031 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2032 tune_params::SCHED_AUTOPREF_OFF
2033 };
2034
2035 const struct tune_params arm_cortex_a53_tune =
2036 {
2037 &cortexa53_extra_costs,
2038 NULL, /* Sched adj cost. */
2039 arm_default_branch_cost,
2040 &arm_default_vec_cost,
2041 1, /* Constant limit. */
2042 5, /* Max cond insns. */
2043 8, /* Memset max inline. */
2044 2, /* Issue rate. */
2045 ARM_PREFETCH_NOT_BENEFICIAL,
2046 tune_params::PREF_CONST_POOL_FALSE,
2047 tune_params::PREF_LDRD_FALSE,
2048 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2049 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2050 tune_params::DISPARAGE_FLAGS_NEITHER,
2051 tune_params::PREF_NEON_64_FALSE,
2052 tune_params::PREF_NEON_STRINGOPS_TRUE,
2053 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2054 tune_params::SCHED_AUTOPREF_OFF
2055 };
2056
2057 const struct tune_params arm_cortex_a57_tune =
2058 {
2059 &cortexa57_extra_costs,
2060 NULL, /* Sched adj cost. */
2061 arm_default_branch_cost,
2062 &arm_default_vec_cost,
2063 1, /* Constant limit. */
2064 2, /* Max cond insns. */
2065 8, /* Memset max inline. */
2066 3, /* Issue rate. */
2067 ARM_PREFETCH_NOT_BENEFICIAL,
2068 tune_params::PREF_CONST_POOL_FALSE,
2069 tune_params::PREF_LDRD_TRUE,
2070 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2071 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2072 tune_params::DISPARAGE_FLAGS_ALL,
2073 tune_params::PREF_NEON_64_FALSE,
2074 tune_params::PREF_NEON_STRINGOPS_TRUE,
2075 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2076 tune_params::SCHED_AUTOPREF_FULL
2077 };
2078
2079 const struct tune_params arm_exynosm1_tune =
2080 {
2081 &exynosm1_extra_costs,
2082 NULL, /* Sched adj cost. */
2083 arm_default_branch_cost,
2084 &arm_default_vec_cost,
2085 1, /* Constant limit. */
2086 2, /* Max cond insns. */
2087 8, /* Memset max inline. */
2088 3, /* Issue rate. */
2089 ARM_PREFETCH_NOT_BENEFICIAL,
2090 tune_params::PREF_CONST_POOL_FALSE,
2091 tune_params::PREF_LDRD_TRUE,
2092 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2094 tune_params::DISPARAGE_FLAGS_ALL,
2095 tune_params::PREF_NEON_64_FALSE,
2096 tune_params::PREF_NEON_STRINGOPS_TRUE,
2097 tune_params::FUSE_NOTHING,
2098 tune_params::SCHED_AUTOPREF_OFF
2099 };
2100
2101 const struct tune_params arm_xgene1_tune =
2102 {
2103 &xgene1_extra_costs,
2104 NULL, /* Sched adj cost. */
2105 arm_default_branch_cost,
2106 &arm_default_vec_cost,
2107 1, /* Constant limit. */
2108 2, /* Max cond insns. */
2109 32, /* Memset max inline. */
2110 4, /* Issue rate. */
2111 ARM_PREFETCH_NOT_BENEFICIAL,
2112 tune_params::PREF_CONST_POOL_FALSE,
2113 tune_params::PREF_LDRD_TRUE,
2114 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2116 tune_params::DISPARAGE_FLAGS_ALL,
2117 tune_params::PREF_NEON_64_FALSE,
2118 tune_params::PREF_NEON_STRINGOPS_FALSE,
2119 tune_params::FUSE_NOTHING,
2120 tune_params::SCHED_AUTOPREF_OFF
2121 };
2122
2123 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2124 less appealing. Set max_insns_skipped to a low value. */
2125
2126 const struct tune_params arm_cortex_a5_tune =
2127 {
2128 &cortexa5_extra_costs,
2129 NULL, /* Sched adj cost. */
2130 arm_cortex_a5_branch_cost,
2131 &arm_default_vec_cost,
2132 1, /* Constant limit. */
2133 1, /* Max cond insns. */
2134 8, /* Memset max inline. */
2135 2, /* Issue rate. */
2136 ARM_PREFETCH_NOT_BENEFICIAL,
2137 tune_params::PREF_CONST_POOL_FALSE,
2138 tune_params::PREF_LDRD_FALSE,
2139 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2140 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2141 tune_params::DISPARAGE_FLAGS_NEITHER,
2142 tune_params::PREF_NEON_64_FALSE,
2143 tune_params::PREF_NEON_STRINGOPS_TRUE,
2144 tune_params::FUSE_NOTHING,
2145 tune_params::SCHED_AUTOPREF_OFF
2146 };
2147
2148 const struct tune_params arm_cortex_a9_tune =
2149 {
2150 &cortexa9_extra_costs,
2151 cortex_a9_sched_adjust_cost,
2152 arm_default_branch_cost,
2153 &arm_default_vec_cost,
2154 1, /* Constant limit. */
2155 5, /* Max cond insns. */
2156 8, /* Memset max inline. */
2157 2, /* Issue rate. */
2158 ARM_PREFETCH_BENEFICIAL(4,32,32),
2159 tune_params::PREF_CONST_POOL_FALSE,
2160 tune_params::PREF_LDRD_FALSE,
2161 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2162 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2163 tune_params::DISPARAGE_FLAGS_NEITHER,
2164 tune_params::PREF_NEON_64_FALSE,
2165 tune_params::PREF_NEON_STRINGOPS_FALSE,
2166 tune_params::FUSE_NOTHING,
2167 tune_params::SCHED_AUTOPREF_OFF
2168 };
2169
2170 const struct tune_params arm_cortex_a12_tune =
2171 {
2172 &cortexa12_extra_costs,
2173 NULL, /* Sched adj cost. */
2174 arm_default_branch_cost,
2175 &arm_default_vec_cost, /* Vectorizer costs. */
2176 1, /* Constant limit. */
2177 2, /* Max cond insns. */
2178 8, /* Memset max inline. */
2179 2, /* Issue rate. */
2180 ARM_PREFETCH_NOT_BENEFICIAL,
2181 tune_params::PREF_CONST_POOL_FALSE,
2182 tune_params::PREF_LDRD_TRUE,
2183 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2184 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2185 tune_params::DISPARAGE_FLAGS_ALL,
2186 tune_params::PREF_NEON_64_FALSE,
2187 tune_params::PREF_NEON_STRINGOPS_TRUE,
2188 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2189 tune_params::SCHED_AUTOPREF_OFF
2190 };
2191
2192 const struct tune_params arm_cortex_a73_tune =
2193 {
2194 &cortexa57_extra_costs,
2195 NULL, /* Sched adj cost. */
2196 arm_default_branch_cost,
2197 &arm_default_vec_cost, /* Vectorizer costs. */
2198 1, /* Constant limit. */
2199 2, /* Max cond insns. */
2200 8, /* Memset max inline. */
2201 2, /* Issue rate. */
2202 ARM_PREFETCH_NOT_BENEFICIAL,
2203 tune_params::PREF_CONST_POOL_FALSE,
2204 tune_params::PREF_LDRD_TRUE,
2205 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2206 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2207 tune_params::DISPARAGE_FLAGS_ALL,
2208 tune_params::PREF_NEON_64_FALSE,
2209 tune_params::PREF_NEON_STRINGOPS_TRUE,
2210 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2211 tune_params::SCHED_AUTOPREF_FULL
2212 };
2213
2214 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2215 cycle to execute each. An LDR from the constant pool also takes two cycles
2216 to execute, but mildly increases pipelining opportunity (consecutive
2217 loads/stores can be pipelined together, saving one cycle), and may also
2218 improve icache utilisation. Hence we prefer the constant pool for such
2219 processors. */
2220
2221 const struct tune_params arm_v7m_tune =
2222 {
2223 &v7m_extra_costs,
2224 NULL, /* Sched adj cost. */
2225 arm_cortex_m_branch_cost,
2226 &arm_default_vec_cost,
2227 1, /* Constant limit. */
2228 2, /* Max cond insns. */
2229 8, /* Memset max inline. */
2230 1, /* Issue rate. */
2231 ARM_PREFETCH_NOT_BENEFICIAL,
2232 tune_params::PREF_CONST_POOL_TRUE,
2233 tune_params::PREF_LDRD_FALSE,
2234 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2235 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2236 tune_params::DISPARAGE_FLAGS_NEITHER,
2237 tune_params::PREF_NEON_64_FALSE,
2238 tune_params::PREF_NEON_STRINGOPS_FALSE,
2239 tune_params::FUSE_NOTHING,
2240 tune_params::SCHED_AUTOPREF_OFF
2241 };
2242
2243 /* Cortex-M7 tuning. */
2244
2245 const struct tune_params arm_cortex_m7_tune =
2246 {
2247 &v7m_extra_costs,
2248 NULL, /* Sched adj cost. */
2249 arm_cortex_m7_branch_cost,
2250 &arm_default_vec_cost,
2251 0, /* Constant limit. */
2252 1, /* Max cond insns. */
2253 8, /* Memset max inline. */
2254 2, /* Issue rate. */
2255 ARM_PREFETCH_NOT_BENEFICIAL,
2256 tune_params::PREF_CONST_POOL_TRUE,
2257 tune_params::PREF_LDRD_FALSE,
2258 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2259 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2260 tune_params::DISPARAGE_FLAGS_NEITHER,
2261 tune_params::PREF_NEON_64_FALSE,
2262 tune_params::PREF_NEON_STRINGOPS_FALSE,
2263 tune_params::FUSE_NOTHING,
2264 tune_params::SCHED_AUTOPREF_OFF
2265 };
2266
2267 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2268 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2269 cortex-m23. */
2270 const struct tune_params arm_v6m_tune =
2271 {
2272 &generic_extra_costs, /* Insn extra costs. */
2273 NULL, /* Sched adj cost. */
2274 arm_default_branch_cost,
2275 &arm_default_vec_cost, /* Vectorizer costs. */
2276 1, /* Constant limit. */
2277 5, /* Max cond insns. */
2278 8, /* Memset max inline. */
2279 1, /* Issue rate. */
2280 ARM_PREFETCH_NOT_BENEFICIAL,
2281 tune_params::PREF_CONST_POOL_FALSE,
2282 tune_params::PREF_LDRD_FALSE,
2283 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2284 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2285 tune_params::DISPARAGE_FLAGS_NEITHER,
2286 tune_params::PREF_NEON_64_FALSE,
2287 tune_params::PREF_NEON_STRINGOPS_FALSE,
2288 tune_params::FUSE_NOTHING,
2289 tune_params::SCHED_AUTOPREF_OFF
2290 };
2291
2292 const struct tune_params arm_fa726te_tune =
2293 {
2294 &generic_extra_costs, /* Insn extra costs. */
2295 fa726te_sched_adjust_cost,
2296 arm_default_branch_cost,
2297 &arm_default_vec_cost,
2298 1, /* Constant limit. */
2299 5, /* Max cond insns. */
2300 8, /* Memset max inline. */
2301 2, /* Issue rate. */
2302 ARM_PREFETCH_NOT_BENEFICIAL,
2303 tune_params::PREF_CONST_POOL_TRUE,
2304 tune_params::PREF_LDRD_FALSE,
2305 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2306 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2307 tune_params::DISPARAGE_FLAGS_NEITHER,
2308 tune_params::PREF_NEON_64_FALSE,
2309 tune_params::PREF_NEON_STRINGOPS_FALSE,
2310 tune_params::FUSE_NOTHING,
2311 tune_params::SCHED_AUTOPREF_OFF
2312 };
2313
2314 /* Auto-generated CPU, FPU and architecture tables. */
2315 #include "arm-cpu-data.h"
2316
2317 /* The name of the preprocessor macro to define for this architecture. PROFILE
2318 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2319 is thus chosen to be big enough to hold the longest architecture name. */
2320
2321 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2322
2323 /* Supported TLS relocations. */
2324
2325 enum tls_reloc {
2326 TLS_GD32,
2327 TLS_LDM32,
2328 TLS_LDO32,
2329 TLS_IE32,
2330 TLS_LE32,
2331 TLS_DESCSEQ /* GNU scheme */
2332 };
2333
2334 /* The maximum number of insns to be used when loading a constant. */
2335 inline static int
2336 arm_constant_limit (bool size_p)
2337 {
2338 return size_p ? 1 : current_tune->constant_limit;
2339 }
2340
2341 /* Emit an insn that's a simple single-set. Both the operands must be known
2342 to be valid. */
2343 inline static rtx_insn *
2344 emit_set_insn (rtx x, rtx y)
2345 {
2346 return emit_insn (gen_rtx_SET (x, y));
2347 }
2348
2349 /* Return the number of bits set in VALUE. */
2350 static unsigned
2351 bit_count (unsigned long value)
2352 {
2353 unsigned long count = 0;
2354
2355 while (value)
2356 {
2357 count++;
2358 value &= value - 1; /* Clear the least-significant set bit. */
2359 }
2360
2361 return count;
2362 }
2363
2364 /* Return the number of bits set in BMAP. */
2365 static unsigned
2366 bitmap_popcount (const sbitmap bmap)
2367 {
2368 unsigned int count = 0;
2369 unsigned int n = 0;
2370 sbitmap_iterator sbi;
2371
2372 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2373 count++;
2374 return count;
2375 }
2376
2377 typedef struct
2378 {
2379 machine_mode mode;
2380 const char *name;
2381 } arm_fixed_mode_set;
2382
2383 /* A small helper for setting fixed-point library libfuncs. */
2384
2385 static void
2386 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2387 const char *funcname, const char *modename,
2388 int num_suffix)
2389 {
2390 char buffer[50];
2391
2392 if (num_suffix == 0)
2393 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2394 else
2395 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2396
2397 set_optab_libfunc (optable, mode, buffer);
2398 }
2399
2400 static void
2401 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2402 machine_mode from, const char *funcname,
2403 const char *toname, const char *fromname)
2404 {
2405 char buffer[50];
2406 const char *maybe_suffix_2 = "";
2407
2408 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2409 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2410 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2411 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2412 maybe_suffix_2 = "2";
2413
2414 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2415 maybe_suffix_2);
2416
2417 set_conv_libfunc (optable, to, from, buffer);
2418 }
2419
2420 /* Set up library functions unique to ARM. */
2421
2422 static void
2423 arm_init_libfuncs (void)
2424 {
2425 /* For Linux, we have access to kernel support for atomic operations. */
2426 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2427 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2428
2429 /* There are no special library functions unless we are using the
2430 ARM BPABI. */
2431 if (!TARGET_BPABI)
2432 return;
2433
2434 /* The functions below are described in Section 4 of the "Run-Time
2435 ABI for the ARM architecture", Version 1.0. */
2436
2437 /* Double-precision floating-point arithmetic. Table 2. */
2438 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2439 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2440 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2441 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2442 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2443
2444 /* Double-precision comparisons. Table 3. */
2445 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2446 set_optab_libfunc (ne_optab, DFmode, NULL);
2447 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2448 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2449 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2450 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2451 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2452
2453 /* Single-precision floating-point arithmetic. Table 4. */
2454 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2455 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2456 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2457 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2458 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2459
2460 /* Single-precision comparisons. Table 5. */
2461 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2462 set_optab_libfunc (ne_optab, SFmode, NULL);
2463 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2464 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2465 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2466 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2467 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2468
2469 /* Floating-point to integer conversions. Table 6. */
2470 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2471 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2472 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2473 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2474 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2475 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2476 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2477 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2478
2479 /* Conversions between floating types. Table 7. */
2480 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2481 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2482
2483 /* Integer to floating-point conversions. Table 8. */
2484 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2485 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2486 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2487 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2488 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2489 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2490 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2491 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2492
2493 /* Long long. Table 9. */
2494 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2495 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2496 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2497 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2498 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2499 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2500 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2501 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2502
2503 /* Integer (32/32->32) division. \S 4.3.1. */
2504 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2505 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2506
2507 /* The divmod functions are designed so that they can be used for
2508 plain division, even though they return both the quotient and the
2509 remainder. The quotient is returned in the usual location (i.e.,
2510 r0 for SImode, {r0, r1} for DImode), just as would be expected
2511 for an ordinary division routine. Because the AAPCS calling
2512 conventions specify that all of { r0, r1, r2, r3 } are
2513 callee-saved registers, there is no need to tell the compiler
2514 explicitly that those registers are clobbered by these
2515 routines. */
2516 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2517 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2518
2519 /* For SImode division the ABI provides div-without-mod routines,
2520 which are faster. */
2521 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2522 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2523
2524 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2525 divmod libcalls instead. */
2526 set_optab_libfunc (smod_optab, DImode, NULL);
2527 set_optab_libfunc (umod_optab, DImode, NULL);
2528 set_optab_libfunc (smod_optab, SImode, NULL);
2529 set_optab_libfunc (umod_optab, SImode, NULL);
2530
2531 /* Half-precision float operations. The compiler handles all operations
2532 with NULL libfuncs by converting the SFmode. */
2533 switch (arm_fp16_format)
2534 {
2535 case ARM_FP16_FORMAT_IEEE:
2536 case ARM_FP16_FORMAT_ALTERNATIVE:
2537
2538 /* Conversions. */
2539 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2540 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2541 ? "__gnu_f2h_ieee"
2542 : "__gnu_f2h_alternative"));
2543 set_conv_libfunc (sext_optab, SFmode, HFmode,
2544 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2545 ? "__gnu_h2f_ieee"
2546 : "__gnu_h2f_alternative"));
2547
2548 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2549 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2550 ? "__gnu_d2h_ieee"
2551 : "__gnu_d2h_alternative"));
2552
2553 /* Arithmetic. */
2554 set_optab_libfunc (add_optab, HFmode, NULL);
2555 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2556 set_optab_libfunc (smul_optab, HFmode, NULL);
2557 set_optab_libfunc (neg_optab, HFmode, NULL);
2558 set_optab_libfunc (sub_optab, HFmode, NULL);
2559
2560 /* Comparisons. */
2561 set_optab_libfunc (eq_optab, HFmode, NULL);
2562 set_optab_libfunc (ne_optab, HFmode, NULL);
2563 set_optab_libfunc (lt_optab, HFmode, NULL);
2564 set_optab_libfunc (le_optab, HFmode, NULL);
2565 set_optab_libfunc (ge_optab, HFmode, NULL);
2566 set_optab_libfunc (gt_optab, HFmode, NULL);
2567 set_optab_libfunc (unord_optab, HFmode, NULL);
2568 break;
2569
2570 default:
2571 break;
2572 }
2573
2574 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2575 {
2576 const arm_fixed_mode_set fixed_arith_modes[] =
2577 {
2578 { E_QQmode, "qq" },
2579 { E_UQQmode, "uqq" },
2580 { E_HQmode, "hq" },
2581 { E_UHQmode, "uhq" },
2582 { E_SQmode, "sq" },
2583 { E_USQmode, "usq" },
2584 { E_DQmode, "dq" },
2585 { E_UDQmode, "udq" },
2586 { E_TQmode, "tq" },
2587 { E_UTQmode, "utq" },
2588 { E_HAmode, "ha" },
2589 { E_UHAmode, "uha" },
2590 { E_SAmode, "sa" },
2591 { E_USAmode, "usa" },
2592 { E_DAmode, "da" },
2593 { E_UDAmode, "uda" },
2594 { E_TAmode, "ta" },
2595 { E_UTAmode, "uta" }
2596 };
2597 const arm_fixed_mode_set fixed_conv_modes[] =
2598 {
2599 { E_QQmode, "qq" },
2600 { E_UQQmode, "uqq" },
2601 { E_HQmode, "hq" },
2602 { E_UHQmode, "uhq" },
2603 { E_SQmode, "sq" },
2604 { E_USQmode, "usq" },
2605 { E_DQmode, "dq" },
2606 { E_UDQmode, "udq" },
2607 { E_TQmode, "tq" },
2608 { E_UTQmode, "utq" },
2609 { E_HAmode, "ha" },
2610 { E_UHAmode, "uha" },
2611 { E_SAmode, "sa" },
2612 { E_USAmode, "usa" },
2613 { E_DAmode, "da" },
2614 { E_UDAmode, "uda" },
2615 { E_TAmode, "ta" },
2616 { E_UTAmode, "uta" },
2617 { E_QImode, "qi" },
2618 { E_HImode, "hi" },
2619 { E_SImode, "si" },
2620 { E_DImode, "di" },
2621 { E_TImode, "ti" },
2622 { E_SFmode, "sf" },
2623 { E_DFmode, "df" }
2624 };
2625 unsigned int i, j;
2626
2627 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2628 {
2629 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2630 "add", fixed_arith_modes[i].name, 3);
2631 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2632 "ssadd", fixed_arith_modes[i].name, 3);
2633 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2634 "usadd", fixed_arith_modes[i].name, 3);
2635 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2636 "sub", fixed_arith_modes[i].name, 3);
2637 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2638 "sssub", fixed_arith_modes[i].name, 3);
2639 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2640 "ussub", fixed_arith_modes[i].name, 3);
2641 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2642 "mul", fixed_arith_modes[i].name, 3);
2643 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2644 "ssmul", fixed_arith_modes[i].name, 3);
2645 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2646 "usmul", fixed_arith_modes[i].name, 3);
2647 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2648 "div", fixed_arith_modes[i].name, 3);
2649 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2650 "udiv", fixed_arith_modes[i].name, 3);
2651 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2652 "ssdiv", fixed_arith_modes[i].name, 3);
2653 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2654 "usdiv", fixed_arith_modes[i].name, 3);
2655 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2656 "neg", fixed_arith_modes[i].name, 2);
2657 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2658 "ssneg", fixed_arith_modes[i].name, 2);
2659 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2660 "usneg", fixed_arith_modes[i].name, 2);
2661 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2662 "ashl", fixed_arith_modes[i].name, 3);
2663 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2664 "ashr", fixed_arith_modes[i].name, 3);
2665 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2666 "lshr", fixed_arith_modes[i].name, 3);
2667 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2668 "ssashl", fixed_arith_modes[i].name, 3);
2669 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2670 "usashl", fixed_arith_modes[i].name, 3);
2671 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2672 "cmp", fixed_arith_modes[i].name, 2);
2673 }
2674
2675 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2676 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2677 {
2678 if (i == j
2679 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2680 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2681 continue;
2682
2683 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2684 fixed_conv_modes[j].mode, "fract",
2685 fixed_conv_modes[i].name,
2686 fixed_conv_modes[j].name);
2687 arm_set_fixed_conv_libfunc (satfract_optab,
2688 fixed_conv_modes[i].mode,
2689 fixed_conv_modes[j].mode, "satfract",
2690 fixed_conv_modes[i].name,
2691 fixed_conv_modes[j].name);
2692 arm_set_fixed_conv_libfunc (fractuns_optab,
2693 fixed_conv_modes[i].mode,
2694 fixed_conv_modes[j].mode, "fractuns",
2695 fixed_conv_modes[i].name,
2696 fixed_conv_modes[j].name);
2697 arm_set_fixed_conv_libfunc (satfractuns_optab,
2698 fixed_conv_modes[i].mode,
2699 fixed_conv_modes[j].mode, "satfractuns",
2700 fixed_conv_modes[i].name,
2701 fixed_conv_modes[j].name);
2702 }
2703 }
2704
2705 if (TARGET_AAPCS_BASED)
2706 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2707 }
2708
2709 /* On AAPCS systems, this is the "struct __va_list". */
2710 static GTY(()) tree va_list_type;
2711
2712 /* Return the type to use as __builtin_va_list. */
2713 static tree
2714 arm_build_builtin_va_list (void)
2715 {
2716 tree va_list_name;
2717 tree ap_field;
2718
2719 if (!TARGET_AAPCS_BASED)
2720 return std_build_builtin_va_list ();
2721
2722 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2723 defined as:
2724
2725 struct __va_list
2726 {
2727 void *__ap;
2728 };
2729
2730 The C Library ABI further reinforces this definition in \S
2731 4.1.
2732
2733 We must follow this definition exactly. The structure tag
2734 name is visible in C++ mangled names, and thus forms a part
2735 of the ABI. The field name may be used by people who
2736 #include <stdarg.h>. */
2737 /* Create the type. */
2738 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2739 /* Give it the required name. */
2740 va_list_name = build_decl (BUILTINS_LOCATION,
2741 TYPE_DECL,
2742 get_identifier ("__va_list"),
2743 va_list_type);
2744 DECL_ARTIFICIAL (va_list_name) = 1;
2745 TYPE_NAME (va_list_type) = va_list_name;
2746 TYPE_STUB_DECL (va_list_type) = va_list_name;
2747 /* Create the __ap field. */
2748 ap_field = build_decl (BUILTINS_LOCATION,
2749 FIELD_DECL,
2750 get_identifier ("__ap"),
2751 ptr_type_node);
2752 DECL_ARTIFICIAL (ap_field) = 1;
2753 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2754 TYPE_FIELDS (va_list_type) = ap_field;
2755 /* Compute its layout. */
2756 layout_type (va_list_type);
2757
2758 return va_list_type;
2759 }
2760
2761 /* Return an expression of type "void *" pointing to the next
2762 available argument in a variable-argument list. VALIST is the
2763 user-level va_list object, of type __builtin_va_list. */
2764 static tree
2765 arm_extract_valist_ptr (tree valist)
2766 {
2767 if (TREE_TYPE (valist) == error_mark_node)
2768 return error_mark_node;
2769
2770 /* On an AAPCS target, the pointer is stored within "struct
2771 va_list". */
2772 if (TARGET_AAPCS_BASED)
2773 {
2774 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2775 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2776 valist, ap_field, NULL_TREE);
2777 }
2778
2779 return valist;
2780 }
2781
2782 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2783 static void
2784 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2785 {
2786 valist = arm_extract_valist_ptr (valist);
2787 std_expand_builtin_va_start (valist, nextarg);
2788 }
2789
2790 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2791 static tree
2792 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2793 gimple_seq *post_p)
2794 {
2795 valist = arm_extract_valist_ptr (valist);
2796 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2797 }
2798
2799 /* Check any incompatible options that the user has specified. */
2800 static void
2801 arm_option_check_internal (struct gcc_options *opts)
2802 {
2803 int flags = opts->x_target_flags;
2804
2805 /* iWMMXt and NEON are incompatible. */
2806 if (TARGET_IWMMXT
2807 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2808 error ("iWMMXt and NEON are incompatible");
2809
2810 /* Make sure that the processor choice does not conflict with any of the
2811 other command line choices. */
2812 if (TARGET_ARM_P (flags)
2813 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2814 error ("target CPU does not support ARM mode");
2815
2816 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2817 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2818 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2819
2820 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2821 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2822
2823 /* If this target is normally configured to use APCS frames, warn if they
2824 are turned off and debugging is turned on. */
2825 if (TARGET_ARM_P (flags)
2826 && write_symbols != NO_DEBUG
2827 && !TARGET_APCS_FRAME
2828 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2829 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2830
2831 /* iWMMXt unsupported under Thumb mode. */
2832 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2833 error ("iWMMXt unsupported under Thumb mode");
2834
2835 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2836 error ("can not use -mtp=cp15 with 16-bit Thumb");
2837
2838 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2839 {
2840 error ("RTP PIC is incompatible with Thumb");
2841 flag_pic = 0;
2842 }
2843
2844 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2845 with MOVT. */
2846 if ((target_pure_code || target_slow_flash_data)
2847 && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2848 {
2849 const char *flag = (target_pure_code ? "-mpure-code" :
2850 "-mslow-flash-data");
2851 error ("%s only supports non-pic code on M-profile targets with the "
2852 "MOVT instruction", flag);
2853 }
2854
2855 }
2856
2857 /* Recompute the global settings depending on target attribute options. */
2858
2859 static void
2860 arm_option_params_internal (void)
2861 {
2862 /* If we are not using the default (ARM mode) section anchor offset
2863 ranges, then set the correct ranges now. */
2864 if (TARGET_THUMB1)
2865 {
2866 /* Thumb-1 LDR instructions cannot have negative offsets.
2867 Permissible positive offset ranges are 5-bit (for byte loads),
2868 6-bit (for halfword loads), or 7-bit (for word loads).
2869 Empirical results suggest a 7-bit anchor range gives the best
2870 overall code size. */
2871 targetm.min_anchor_offset = 0;
2872 targetm.max_anchor_offset = 127;
2873 }
2874 else if (TARGET_THUMB2)
2875 {
2876 /* The minimum is set such that the total size of the block
2877 for a particular anchor is 248 + 1 + 4095 bytes, which is
2878 divisible by eight, ensuring natural spacing of anchors. */
2879 targetm.min_anchor_offset = -248;
2880 targetm.max_anchor_offset = 4095;
2881 }
2882 else
2883 {
2884 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2885 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2886 }
2887
2888 /* Increase the number of conditional instructions with -Os. */
2889 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2890
2891 /* For THUMB2, we limit the conditional sequence to one IT block. */
2892 if (TARGET_THUMB2)
2893 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2894 }
2895
2896 /* True if -mflip-thumb should next add an attribute for the default
2897 mode, false if it should next add an attribute for the opposite mode. */
2898 static GTY(()) bool thumb_flipper;
2899
2900 /* Options after initial target override. */
2901 static GTY(()) tree init_optimize;
2902
2903 static void
2904 arm_override_options_after_change_1 (struct gcc_options *opts)
2905 {
2906 if (opts->x_align_functions <= 0)
2907 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2908 && opts->x_optimize_size ? 2 : 4;
2909 }
2910
2911 /* Implement targetm.override_options_after_change. */
2912
2913 static void
2914 arm_override_options_after_change (void)
2915 {
2916 arm_configure_build_target (&arm_active_target,
2917 TREE_TARGET_OPTION (target_option_default_node),
2918 &global_options_set, false);
2919
2920 arm_override_options_after_change_1 (&global_options);
2921 }
2922
2923 /* Implement TARGET_OPTION_SAVE. */
2924 static void
2925 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2926 {
2927 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2928 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2929 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2930 }
2931
2932 /* Implement TARGET_OPTION_RESTORE. */
2933 static void
2934 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2935 {
2936 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2937 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2938 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2939 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2940 false);
2941 }
2942
2943 /* Reset options between modes that the user has specified. */
2944 static void
2945 arm_option_override_internal (struct gcc_options *opts,
2946 struct gcc_options *opts_set)
2947 {
2948 arm_override_options_after_change_1 (opts);
2949
2950 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2951 {
2952 /* The default is to enable interworking, so this warning message would
2953 be confusing to users who have just compiled with, eg, -march=armv3. */
2954 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2955 opts->x_target_flags &= ~MASK_INTERWORK;
2956 }
2957
2958 if (TARGET_THUMB_P (opts->x_target_flags)
2959 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2960 {
2961 warning (0, "target CPU does not support THUMB instructions");
2962 opts->x_target_flags &= ~MASK_THUMB;
2963 }
2964
2965 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2966 {
2967 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2968 opts->x_target_flags &= ~MASK_APCS_FRAME;
2969 }
2970
2971 /* Callee super interworking implies thumb interworking. Adding
2972 this to the flags here simplifies the logic elsewhere. */
2973 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2974 opts->x_target_flags |= MASK_INTERWORK;
2975
2976 /* need to remember initial values so combinaisons of options like
2977 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2978 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2979
2980 if (! opts_set->x_arm_restrict_it)
2981 opts->x_arm_restrict_it = arm_arch8;
2982
2983 /* ARM execution state and M profile don't have [restrict] IT. */
2984 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2985 opts->x_arm_restrict_it = 0;
2986
2987 /* Enable -munaligned-access by default for
2988 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2989 i.e. Thumb2 and ARM state only.
2990 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2991 - ARMv8 architecture-base processors.
2992
2993 Disable -munaligned-access by default for
2994 - all pre-ARMv6 architecture-based processors
2995 - ARMv6-M architecture-based processors
2996 - ARMv8-M Baseline processors. */
2997
2998 if (! opts_set->x_unaligned_access)
2999 {
3000 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3001 && arm_arch6 && (arm_arch_notm || arm_arch7));
3002 }
3003 else if (opts->x_unaligned_access == 1
3004 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3005 {
3006 warning (0, "target CPU does not support unaligned accesses");
3007 opts->x_unaligned_access = 0;
3008 }
3009
3010 /* Don't warn since it's on by default in -O2. */
3011 if (TARGET_THUMB1_P (opts->x_target_flags))
3012 opts->x_flag_schedule_insns = 0;
3013 else
3014 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3015
3016 /* Disable shrink-wrap when optimizing function for size, since it tends to
3017 generate additional returns. */
3018 if (optimize_function_for_size_p (cfun)
3019 && TARGET_THUMB2_P (opts->x_target_flags))
3020 opts->x_flag_shrink_wrap = false;
3021 else
3022 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3023
3024 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3025 - epilogue_insns - does not accurately model the corresponding insns
3026 emitted in the asm file. In particular, see the comment in thumb_exit
3027 'Find out how many of the (return) argument registers we can corrupt'.
3028 As a consequence, the epilogue may clobber registers without fipa-ra
3029 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3030 TODO: Accurately model clobbers for epilogue_insns and reenable
3031 fipa-ra. */
3032 if (TARGET_THUMB1_P (opts->x_target_flags))
3033 opts->x_flag_ipa_ra = 0;
3034 else
3035 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3036
3037 /* Thumb2 inline assembly code should always use unified syntax.
3038 This will apply to ARM and Thumb1 eventually. */
3039 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3040
3041 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3042 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3043 #endif
3044 }
3045
3046 static sbitmap isa_all_fpubits;
3047 static sbitmap isa_quirkbits;
3048
3049 /* Configure a build target TARGET from the user-specified options OPTS and
3050 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3051 architecture have been specified, but the two are not identical. */
3052 void
3053 arm_configure_build_target (struct arm_build_target *target,
3054 struct cl_target_option *opts,
3055 struct gcc_options *opts_set,
3056 bool warn_compatible)
3057 {
3058 const cpu_option *arm_selected_tune = NULL;
3059 const arch_option *arm_selected_arch = NULL;
3060 const cpu_option *arm_selected_cpu = NULL;
3061 const arm_fpu_desc *arm_selected_fpu = NULL;
3062 const char *tune_opts = NULL;
3063 const char *arch_opts = NULL;
3064 const char *cpu_opts = NULL;
3065
3066 bitmap_clear (target->isa);
3067 target->core_name = NULL;
3068 target->arch_name = NULL;
3069
3070 if (opts_set->x_arm_arch_string)
3071 {
3072 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3073 "-march",
3074 opts->x_arm_arch_string);
3075 arch_opts = strchr (opts->x_arm_arch_string, '+');
3076 }
3077
3078 if (opts_set->x_arm_cpu_string)
3079 {
3080 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3081 opts->x_arm_cpu_string);
3082 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3083 arm_selected_tune = arm_selected_cpu;
3084 /* If taking the tuning from -mcpu, we don't need to rescan the
3085 options for tuning. */
3086 }
3087
3088 if (opts_set->x_arm_tune_string)
3089 {
3090 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3091 opts->x_arm_tune_string);
3092 tune_opts = strchr (opts->x_arm_tune_string, '+');
3093 }
3094
3095 if (arm_selected_arch)
3096 {
3097 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3098 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3099 arch_opts);
3100
3101 if (arm_selected_cpu)
3102 {
3103 auto_sbitmap cpu_isa (isa_num_bits);
3104 auto_sbitmap isa_delta (isa_num_bits);
3105
3106 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3107 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3108 cpu_opts);
3109 bitmap_xor (isa_delta, cpu_isa, target->isa);
3110 /* Ignore any bits that are quirk bits. */
3111 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3112 /* Ignore (for now) any bits that might be set by -mfpu. */
3113 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3114
3115 if (!bitmap_empty_p (isa_delta))
3116 {
3117 if (warn_compatible)
3118 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3119 arm_selected_cpu->common.name,
3120 arm_selected_arch->common.name);
3121 /* -march wins for code generation.
3122 -mcpu wins for default tuning. */
3123 if (!arm_selected_tune)
3124 arm_selected_tune = arm_selected_cpu;
3125
3126 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3127 target->arch_name = arm_selected_arch->common.name;
3128 }
3129 else
3130 {
3131 /* Architecture and CPU are essentially the same.
3132 Prefer the CPU setting. */
3133 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3134 target->core_name = arm_selected_cpu->common.name;
3135 /* Copy the CPU's capabilities, so that we inherit the
3136 appropriate extensions and quirks. */
3137 bitmap_copy (target->isa, cpu_isa);
3138 }
3139 }
3140 else
3141 {
3142 /* Pick a CPU based on the architecture. */
3143 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3144 target->arch_name = arm_selected_arch->common.name;
3145 /* Note: target->core_name is left unset in this path. */
3146 }
3147 }
3148 else if (arm_selected_cpu)
3149 {
3150 target->core_name = arm_selected_cpu->common.name;
3151 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3152 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3153 cpu_opts);
3154 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3155 }
3156 /* If the user did not specify a processor or architecture, choose
3157 one for them. */
3158 else
3159 {
3160 const cpu_option *sel;
3161 auto_sbitmap sought_isa (isa_num_bits);
3162 bitmap_clear (sought_isa);
3163 auto_sbitmap default_isa (isa_num_bits);
3164
3165 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3166 TARGET_CPU_DEFAULT);
3167 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3168 gcc_assert (arm_selected_cpu->common.name);
3169
3170 /* RWE: All of the selection logic below (to the end of this
3171 'if' clause) looks somewhat suspect. It appears to be mostly
3172 there to support forcing thumb support when the default CPU
3173 does not have thumb (somewhat dubious in terms of what the
3174 user might be expecting). I think it should be removed once
3175 support for the pre-thumb era cores is removed. */
3176 sel = arm_selected_cpu;
3177 arm_initialize_isa (default_isa, sel->common.isa_bits);
3178 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3179 cpu_opts);
3180
3181 /* Now check to see if the user has specified any command line
3182 switches that require certain abilities from the cpu. */
3183
3184 if (TARGET_INTERWORK || TARGET_THUMB)
3185 {
3186 bitmap_set_bit (sought_isa, isa_bit_thumb);
3187 bitmap_set_bit (sought_isa, isa_bit_mode32);
3188
3189 /* There are no ARM processors that support both APCS-26 and
3190 interworking. Therefore we forcibly remove MODE26 from
3191 from the isa features here (if it was set), so that the
3192 search below will always be able to find a compatible
3193 processor. */
3194 bitmap_clear_bit (default_isa, isa_bit_mode26);
3195 }
3196
3197 /* If there are such requirements and the default CPU does not
3198 satisfy them, we need to run over the complete list of
3199 cores looking for one that is satisfactory. */
3200 if (!bitmap_empty_p (sought_isa)
3201 && !bitmap_subset_p (sought_isa, default_isa))
3202 {
3203 auto_sbitmap candidate_isa (isa_num_bits);
3204 /* We're only interested in a CPU with at least the
3205 capabilities of the default CPU and the required
3206 additional features. */
3207 bitmap_ior (default_isa, default_isa, sought_isa);
3208
3209 /* Try to locate a CPU type that supports all of the abilities
3210 of the default CPU, plus the extra abilities requested by
3211 the user. */
3212 for (sel = all_cores; sel->common.name != NULL; sel++)
3213 {
3214 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3215 /* An exact match? */
3216 if (bitmap_equal_p (default_isa, candidate_isa))
3217 break;
3218 }
3219
3220 if (sel->common.name == NULL)
3221 {
3222 unsigned current_bit_count = isa_num_bits;
3223 const cpu_option *best_fit = NULL;
3224
3225 /* Ideally we would like to issue an error message here
3226 saying that it was not possible to find a CPU compatible
3227 with the default CPU, but which also supports the command
3228 line options specified by the programmer, and so they
3229 ought to use the -mcpu=<name> command line option to
3230 override the default CPU type.
3231
3232 If we cannot find a CPU that has exactly the
3233 characteristics of the default CPU and the given
3234 command line options we scan the array again looking
3235 for a best match. The best match must have at least
3236 the capabilities of the perfect match. */
3237 for (sel = all_cores; sel->common.name != NULL; sel++)
3238 {
3239 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3240
3241 if (bitmap_subset_p (default_isa, candidate_isa))
3242 {
3243 unsigned count;
3244
3245 bitmap_and_compl (candidate_isa, candidate_isa,
3246 default_isa);
3247 count = bitmap_popcount (candidate_isa);
3248
3249 if (count < current_bit_count)
3250 {
3251 best_fit = sel;
3252 current_bit_count = count;
3253 }
3254 }
3255
3256 gcc_assert (best_fit);
3257 sel = best_fit;
3258 }
3259 }
3260 arm_selected_cpu = sel;
3261 }
3262
3263 /* Now we know the CPU, we can finally initialize the target
3264 structure. */
3265 target->core_name = arm_selected_cpu->common.name;
3266 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3267 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3268 cpu_opts);
3269 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3270 }
3271
3272 gcc_assert (arm_selected_cpu);
3273 gcc_assert (arm_selected_arch);
3274
3275 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3276 {
3277 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3278 auto_sbitmap fpu_bits (isa_num_bits);
3279
3280 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3281 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3282 bitmap_ior (target->isa, target->isa, fpu_bits);
3283 }
3284
3285 if (!arm_selected_tune)
3286 arm_selected_tune = arm_selected_cpu;
3287 else /* Validate the features passed to -mtune. */
3288 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3289
3290 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3291
3292 /* Finish initializing the target structure. */
3293 target->arch_pp_name = arm_selected_arch->arch;
3294 target->base_arch = arm_selected_arch->base_arch;
3295 target->profile = arm_selected_arch->profile;
3296
3297 target->tune_flags = tune_data->tune_flags;
3298 target->tune = tune_data->tune;
3299 target->tune_core = tune_data->scheduler;
3300 }
3301
3302 /* Fix up any incompatible options that the user has specified. */
3303 static void
3304 arm_option_override (void)
3305 {
3306 static const enum isa_feature fpu_bitlist[]
3307 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3308 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3309 cl_target_option opts;
3310
3311 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3312 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3313
3314 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3315 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3316
3317 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3318
3319 if (!global_options_set.x_arm_fpu_index)
3320 {
3321 bool ok;
3322 int fpu_index;
3323
3324 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3325 CL_TARGET);
3326 gcc_assert (ok);
3327 arm_fpu_index = (enum fpu_type) fpu_index;
3328 }
3329
3330 cl_target_option_save (&opts, &global_options);
3331 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3332 true);
3333
3334 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3335 SUBTARGET_OVERRIDE_OPTIONS;
3336 #endif
3337
3338 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3339 arm_base_arch = arm_active_target.base_arch;
3340
3341 arm_tune = arm_active_target.tune_core;
3342 tune_flags = arm_active_target.tune_flags;
3343 current_tune = arm_active_target.tune;
3344
3345 /* TBD: Dwarf info for apcs frame is not handled yet. */
3346 if (TARGET_APCS_FRAME)
3347 flag_shrink_wrap = false;
3348
3349 /* BPABI targets use linker tricks to allow interworking on cores
3350 without thumb support. */
3351 if (TARGET_INTERWORK
3352 && !TARGET_BPABI
3353 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3354 {
3355 warning (0, "target CPU does not support interworking" );
3356 target_flags &= ~MASK_INTERWORK;
3357 }
3358
3359 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3360 {
3361 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3362 target_flags |= MASK_APCS_FRAME;
3363 }
3364
3365 if (TARGET_POKE_FUNCTION_NAME)
3366 target_flags |= MASK_APCS_FRAME;
3367
3368 if (TARGET_APCS_REENT && flag_pic)
3369 error ("-fpic and -mapcs-reent are incompatible");
3370
3371 if (TARGET_APCS_REENT)
3372 warning (0, "APCS reentrant code not supported. Ignored");
3373
3374 /* Initialize boolean versions of the architectural flags, for use
3375 in the arm.md file. */
3376 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3377 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3378 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3379 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3380 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3381 arm_arch5te = arm_arch5e
3382 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3383 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3384 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3385 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3386 arm_arch6m = arm_arch6 && !arm_arch_notm;
3387 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3388 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3389 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3390 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3391 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3392 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3393 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3394 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3395 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3396 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3397 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3398 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3399 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3400 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3401 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3402 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3403 if (arm_fp16_inst)
3404 {
3405 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3406 error ("selected fp16 options are incompatible");
3407 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3408 }
3409
3410
3411 /* Set up some tuning parameters. */
3412 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3413 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3414 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3415 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3416 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3417 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3418
3419 /* And finally, set up some quirks. */
3420 arm_arch_no_volatile_ce
3421 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3422 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3423 isa_bit_quirk_armv6kz);
3424
3425 /* V5 code we generate is completely interworking capable, so we turn off
3426 TARGET_INTERWORK here to avoid many tests later on. */
3427
3428 /* XXX However, we must pass the right pre-processor defines to CPP
3429 or GLD can get confused. This is a hack. */
3430 if (TARGET_INTERWORK)
3431 arm_cpp_interwork = 1;
3432
3433 if (arm_arch5)
3434 target_flags &= ~MASK_INTERWORK;
3435
3436 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3437 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3438
3439 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3440 error ("iwmmxt abi requires an iwmmxt capable cpu");
3441
3442 /* If soft-float is specified then don't use FPU. */
3443 if (TARGET_SOFT_FLOAT)
3444 arm_fpu_attr = FPU_NONE;
3445 else
3446 arm_fpu_attr = FPU_VFP;
3447
3448 if (TARGET_AAPCS_BASED)
3449 {
3450 if (TARGET_CALLER_INTERWORKING)
3451 error ("AAPCS does not support -mcaller-super-interworking");
3452 else
3453 if (TARGET_CALLEE_INTERWORKING)
3454 error ("AAPCS does not support -mcallee-super-interworking");
3455 }
3456
3457 /* __fp16 support currently assumes the core has ldrh. */
3458 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3459 sorry ("__fp16 and no ldrh");
3460
3461 if (TARGET_AAPCS_BASED)
3462 {
3463 if (arm_abi == ARM_ABI_IWMMXT)
3464 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3465 else if (TARGET_HARD_FLOAT_ABI)
3466 {
3467 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3468 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3469 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3470 }
3471 else
3472 arm_pcs_default = ARM_PCS_AAPCS;
3473 }
3474 else
3475 {
3476 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3477 sorry ("-mfloat-abi=hard and VFP");
3478
3479 if (arm_abi == ARM_ABI_APCS)
3480 arm_pcs_default = ARM_PCS_APCS;
3481 else
3482 arm_pcs_default = ARM_PCS_ATPCS;
3483 }
3484
3485 /* For arm2/3 there is no need to do any scheduling if we are doing
3486 software floating-point. */
3487 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3488 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3489
3490 /* Use the cp15 method if it is available. */
3491 if (target_thread_pointer == TP_AUTO)
3492 {
3493 if (arm_arch6k && !TARGET_THUMB1)
3494 target_thread_pointer = TP_CP15;
3495 else
3496 target_thread_pointer = TP_SOFT;
3497 }
3498
3499 /* Override the default structure alignment for AAPCS ABI. */
3500 if (!global_options_set.x_arm_structure_size_boundary)
3501 {
3502 if (TARGET_AAPCS_BASED)
3503 arm_structure_size_boundary = 8;
3504 }
3505 else
3506 {
3507 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3508
3509 if (arm_structure_size_boundary != 8
3510 && arm_structure_size_boundary != 32
3511 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3512 {
3513 if (ARM_DOUBLEWORD_ALIGN)
3514 warning (0,
3515 "structure size boundary can only be set to 8, 32 or 64");
3516 else
3517 warning (0, "structure size boundary can only be set to 8 or 32");
3518 arm_structure_size_boundary
3519 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3520 }
3521 }
3522
3523 if (TARGET_VXWORKS_RTP)
3524 {
3525 if (!global_options_set.x_arm_pic_data_is_text_relative)
3526 arm_pic_data_is_text_relative = 0;
3527 }
3528 else if (flag_pic
3529 && !arm_pic_data_is_text_relative
3530 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3531 /* When text & data segments don't have a fixed displacement, the
3532 intended use is with a single, read only, pic base register.
3533 Unless the user explicitly requested not to do that, set
3534 it. */
3535 target_flags |= MASK_SINGLE_PIC_BASE;
3536
3537 /* If stack checking is disabled, we can use r10 as the PIC register,
3538 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3539 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3540 {
3541 if (TARGET_VXWORKS_RTP)
3542 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3543 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3544 }
3545
3546 if (flag_pic && TARGET_VXWORKS_RTP)
3547 arm_pic_register = 9;
3548
3549 if (arm_pic_register_string != NULL)
3550 {
3551 int pic_register = decode_reg_name (arm_pic_register_string);
3552
3553 if (!flag_pic)
3554 warning (0, "-mpic-register= is useless without -fpic");
3555
3556 /* Prevent the user from choosing an obviously stupid PIC register. */
3557 else if (pic_register < 0 || call_used_regs[pic_register]
3558 || pic_register == HARD_FRAME_POINTER_REGNUM
3559 || pic_register == STACK_POINTER_REGNUM
3560 || pic_register >= PC_REGNUM
3561 || (TARGET_VXWORKS_RTP
3562 && (unsigned int) pic_register != arm_pic_register))
3563 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3564 else
3565 arm_pic_register = pic_register;
3566 }
3567
3568 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3569 if (fix_cm3_ldrd == 2)
3570 {
3571 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3572 fix_cm3_ldrd = 1;
3573 else
3574 fix_cm3_ldrd = 0;
3575 }
3576
3577 /* Hot/Cold partitioning is not currently supported, since we can't
3578 handle literal pool placement in that case. */
3579 if (flag_reorder_blocks_and_partition)
3580 {
3581 inform (input_location,
3582 "-freorder-blocks-and-partition not supported on this architecture");
3583 flag_reorder_blocks_and_partition = 0;
3584 flag_reorder_blocks = 1;
3585 }
3586
3587 if (flag_pic)
3588 /* Hoisting PIC address calculations more aggressively provides a small,
3589 but measurable, size reduction for PIC code. Therefore, we decrease
3590 the bar for unrestricted expression hoisting to the cost of PIC address
3591 calculation, which is 2 instructions. */
3592 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3593 global_options.x_param_values,
3594 global_options_set.x_param_values);
3595
3596 /* ARM EABI defaults to strict volatile bitfields. */
3597 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3598 && abi_version_at_least(2))
3599 flag_strict_volatile_bitfields = 1;
3600
3601 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3602 have deemed it beneficial (signified by setting
3603 prefetch.num_slots to 1 or more). */
3604 if (flag_prefetch_loop_arrays < 0
3605 && HAVE_prefetch
3606 && optimize >= 3
3607 && current_tune->prefetch.num_slots > 0)
3608 flag_prefetch_loop_arrays = 1;
3609
3610 /* Set up parameters to be used in prefetching algorithm. Do not
3611 override the defaults unless we are tuning for a core we have
3612 researched values for. */
3613 if (current_tune->prefetch.num_slots > 0)
3614 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3615 current_tune->prefetch.num_slots,
3616 global_options.x_param_values,
3617 global_options_set.x_param_values);
3618 if (current_tune->prefetch.l1_cache_line_size >= 0)
3619 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3620 current_tune->prefetch.l1_cache_line_size,
3621 global_options.x_param_values,
3622 global_options_set.x_param_values);
3623 if (current_tune->prefetch.l1_cache_size >= 0)
3624 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3625 current_tune->prefetch.l1_cache_size,
3626 global_options.x_param_values,
3627 global_options_set.x_param_values);
3628
3629 /* Use Neon to perform 64-bits operations rather than core
3630 registers. */
3631 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3632 if (use_neon_for_64bits == 1)
3633 prefer_neon_for_64bits = true;
3634
3635 /* Use the alternative scheduling-pressure algorithm by default. */
3636 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3637 global_options.x_param_values,
3638 global_options_set.x_param_values);
3639
3640 /* Look through ready list and all of queue for instructions
3641 relevant for L2 auto-prefetcher. */
3642 int param_sched_autopref_queue_depth;
3643
3644 switch (current_tune->sched_autopref)
3645 {
3646 case tune_params::SCHED_AUTOPREF_OFF:
3647 param_sched_autopref_queue_depth = -1;
3648 break;
3649
3650 case tune_params::SCHED_AUTOPREF_RANK:
3651 param_sched_autopref_queue_depth = 0;
3652 break;
3653
3654 case tune_params::SCHED_AUTOPREF_FULL:
3655 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3656 break;
3657
3658 default:
3659 gcc_unreachable ();
3660 }
3661
3662 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3663 param_sched_autopref_queue_depth,
3664 global_options.x_param_values,
3665 global_options_set.x_param_values);
3666
3667 /* Currently, for slow flash data, we just disable literal pools. We also
3668 disable it for pure-code. */
3669 if (target_slow_flash_data || target_pure_code)
3670 arm_disable_literal_pool = true;
3671
3672 if (use_cmse && !arm_arch_cmse)
3673 error ("target CPU does not support ARMv8-M Security Extensions");
3674
3675 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3676 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3677 if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3678 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3679
3680 /* Disable scheduling fusion by default if it's not armv7 processor
3681 or doesn't prefer ldrd/strd. */
3682 if (flag_schedule_fusion == 2
3683 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3684 flag_schedule_fusion = 0;
3685
3686 /* Need to remember initial options before they are overriden. */
3687 init_optimize = build_optimization_node (&global_options);
3688
3689 arm_option_override_internal (&global_options, &global_options_set);
3690 arm_option_check_internal (&global_options);
3691 arm_option_params_internal ();
3692
3693 /* Create the default target_options structure. */
3694 target_option_default_node = target_option_current_node
3695 = build_target_option_node (&global_options);
3696
3697 /* Register global variables with the garbage collector. */
3698 arm_add_gc_roots ();
3699
3700 /* Init initial mode for testing. */
3701 thumb_flipper = TARGET_THUMB;
3702 }
3703
3704 static void
3705 arm_add_gc_roots (void)
3706 {
3707 gcc_obstack_init(&minipool_obstack);
3708 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3709 }
3710 \f
3711 /* A table of known ARM exception types.
3712 For use with the interrupt function attribute. */
3713
3714 typedef struct
3715 {
3716 const char *const arg;
3717 const unsigned long return_value;
3718 }
3719 isr_attribute_arg;
3720
3721 static const isr_attribute_arg isr_attribute_args [] =
3722 {
3723 { "IRQ", ARM_FT_ISR },
3724 { "irq", ARM_FT_ISR },
3725 { "FIQ", ARM_FT_FIQ },
3726 { "fiq", ARM_FT_FIQ },
3727 { "ABORT", ARM_FT_ISR },
3728 { "abort", ARM_FT_ISR },
3729 { "ABORT", ARM_FT_ISR },
3730 { "abort", ARM_FT_ISR },
3731 { "UNDEF", ARM_FT_EXCEPTION },
3732 { "undef", ARM_FT_EXCEPTION },
3733 { "SWI", ARM_FT_EXCEPTION },
3734 { "swi", ARM_FT_EXCEPTION },
3735 { NULL, ARM_FT_NORMAL }
3736 };
3737
3738 /* Returns the (interrupt) function type of the current
3739 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3740
3741 static unsigned long
3742 arm_isr_value (tree argument)
3743 {
3744 const isr_attribute_arg * ptr;
3745 const char * arg;
3746
3747 if (!arm_arch_notm)
3748 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3749
3750 /* No argument - default to IRQ. */
3751 if (argument == NULL_TREE)
3752 return ARM_FT_ISR;
3753
3754 /* Get the value of the argument. */
3755 if (TREE_VALUE (argument) == NULL_TREE
3756 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3757 return ARM_FT_UNKNOWN;
3758
3759 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3760
3761 /* Check it against the list of known arguments. */
3762 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3763 if (streq (arg, ptr->arg))
3764 return ptr->return_value;
3765
3766 /* An unrecognized interrupt type. */
3767 return ARM_FT_UNKNOWN;
3768 }
3769
3770 /* Computes the type of the current function. */
3771
3772 static unsigned long
3773 arm_compute_func_type (void)
3774 {
3775 unsigned long type = ARM_FT_UNKNOWN;
3776 tree a;
3777 tree attr;
3778
3779 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3780
3781 /* Decide if the current function is volatile. Such functions
3782 never return, and many memory cycles can be saved by not storing
3783 register values that will never be needed again. This optimization
3784 was added to speed up context switching in a kernel application. */
3785 if (optimize > 0
3786 && (TREE_NOTHROW (current_function_decl)
3787 || !(flag_unwind_tables
3788 || (flag_exceptions
3789 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3790 && TREE_THIS_VOLATILE (current_function_decl))
3791 type |= ARM_FT_VOLATILE;
3792
3793 if (cfun->static_chain_decl != NULL)
3794 type |= ARM_FT_NESTED;
3795
3796 attr = DECL_ATTRIBUTES (current_function_decl);
3797
3798 a = lookup_attribute ("naked", attr);
3799 if (a != NULL_TREE)
3800 type |= ARM_FT_NAKED;
3801
3802 a = lookup_attribute ("isr", attr);
3803 if (a == NULL_TREE)
3804 a = lookup_attribute ("interrupt", attr);
3805
3806 if (a == NULL_TREE)
3807 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3808 else
3809 type |= arm_isr_value (TREE_VALUE (a));
3810
3811 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3812 type |= ARM_FT_CMSE_ENTRY;
3813
3814 return type;
3815 }
3816
3817 /* Returns the type of the current function. */
3818
3819 unsigned long
3820 arm_current_func_type (void)
3821 {
3822 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3823 cfun->machine->func_type = arm_compute_func_type ();
3824
3825 return cfun->machine->func_type;
3826 }
3827
3828 bool
3829 arm_allocate_stack_slots_for_args (void)
3830 {
3831 /* Naked functions should not allocate stack slots for arguments. */
3832 return !IS_NAKED (arm_current_func_type ());
3833 }
3834
3835 static bool
3836 arm_warn_func_return (tree decl)
3837 {
3838 /* Naked functions are implemented entirely in assembly, including the
3839 return sequence, so suppress warnings about this. */
3840 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3841 }
3842
3843 \f
3844 /* Output assembler code for a block containing the constant parts
3845 of a trampoline, leaving space for the variable parts.
3846
3847 On the ARM, (if r8 is the static chain regnum, and remembering that
3848 referencing pc adds an offset of 8) the trampoline looks like:
3849 ldr r8, [pc, #0]
3850 ldr pc, [pc]
3851 .word static chain value
3852 .word function's address
3853 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3854
3855 static void
3856 arm_asm_trampoline_template (FILE *f)
3857 {
3858 fprintf (f, "\t.syntax unified\n");
3859
3860 if (TARGET_ARM)
3861 {
3862 fprintf (f, "\t.arm\n");
3863 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3864 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3865 }
3866 else if (TARGET_THUMB2)
3867 {
3868 fprintf (f, "\t.thumb\n");
3869 /* The Thumb-2 trampoline is similar to the arm implementation.
3870 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3871 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3872 STATIC_CHAIN_REGNUM, PC_REGNUM);
3873 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3874 }
3875 else
3876 {
3877 ASM_OUTPUT_ALIGN (f, 2);
3878 fprintf (f, "\t.code\t16\n");
3879 fprintf (f, ".Ltrampoline_start:\n");
3880 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3881 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3882 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3883 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3884 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3885 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3886 }
3887 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3888 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3889 }
3890
3891 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3892
3893 static void
3894 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3895 {
3896 rtx fnaddr, mem, a_tramp;
3897
3898 emit_block_move (m_tramp, assemble_trampoline_template (),
3899 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3900
3901 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3902 emit_move_insn (mem, chain_value);
3903
3904 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3905 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3906 emit_move_insn (mem, fnaddr);
3907
3908 a_tramp = XEXP (m_tramp, 0);
3909 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3910 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3911 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3912 }
3913
3914 /* Thumb trampolines should be entered in thumb mode, so set
3915 the bottom bit of the address. */
3916
3917 static rtx
3918 arm_trampoline_adjust_address (rtx addr)
3919 {
3920 if (TARGET_THUMB)
3921 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3922 NULL, 0, OPTAB_LIB_WIDEN);
3923 return addr;
3924 }
3925 \f
3926 /* Return 1 if it is possible to return using a single instruction.
3927 If SIBLING is non-null, this is a test for a return before a sibling
3928 call. SIBLING is the call insn, so we can examine its register usage. */
3929
3930 int
3931 use_return_insn (int iscond, rtx sibling)
3932 {
3933 int regno;
3934 unsigned int func_type;
3935 unsigned long saved_int_regs;
3936 unsigned HOST_WIDE_INT stack_adjust;
3937 arm_stack_offsets *offsets;
3938
3939 /* Never use a return instruction before reload has run. */
3940 if (!reload_completed)
3941 return 0;
3942
3943 func_type = arm_current_func_type ();
3944
3945 /* Naked, volatile and stack alignment functions need special
3946 consideration. */
3947 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3948 return 0;
3949
3950 /* So do interrupt functions that use the frame pointer and Thumb
3951 interrupt functions. */
3952 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3953 return 0;
3954
3955 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3956 && !optimize_function_for_size_p (cfun))
3957 return 0;
3958
3959 offsets = arm_get_frame_offsets ();
3960 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3961
3962 /* As do variadic functions. */
3963 if (crtl->args.pretend_args_size
3964 || cfun->machine->uses_anonymous_args
3965 /* Or if the function calls __builtin_eh_return () */
3966 || crtl->calls_eh_return
3967 /* Or if the function calls alloca */
3968 || cfun->calls_alloca
3969 /* Or if there is a stack adjustment. However, if the stack pointer
3970 is saved on the stack, we can use a pre-incrementing stack load. */
3971 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3972 && stack_adjust == 4))
3973 /* Or if the static chain register was saved above the frame, under the
3974 assumption that the stack pointer isn't saved on the stack. */
3975 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3976 && arm_compute_static_chain_stack_bytes() != 0))
3977 return 0;
3978
3979 saved_int_regs = offsets->saved_regs_mask;
3980
3981 /* Unfortunately, the insn
3982
3983 ldmib sp, {..., sp, ...}
3984
3985 triggers a bug on most SA-110 based devices, such that the stack
3986 pointer won't be correctly restored if the instruction takes a
3987 page fault. We work around this problem by popping r3 along with
3988 the other registers, since that is never slower than executing
3989 another instruction.
3990
3991 We test for !arm_arch5 here, because code for any architecture
3992 less than this could potentially be run on one of the buggy
3993 chips. */
3994 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3995 {
3996 /* Validate that r3 is a call-clobbered register (always true in
3997 the default abi) ... */
3998 if (!call_used_regs[3])
3999 return 0;
4000
4001 /* ... that it isn't being used for a return value ... */
4002 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4003 return 0;
4004
4005 /* ... or for a tail-call argument ... */
4006 if (sibling)
4007 {
4008 gcc_assert (CALL_P (sibling));
4009
4010 if (find_regno_fusage (sibling, USE, 3))
4011 return 0;
4012 }
4013
4014 /* ... and that there are no call-saved registers in r0-r2
4015 (always true in the default ABI). */
4016 if (saved_int_regs & 0x7)
4017 return 0;
4018 }
4019
4020 /* Can't be done if interworking with Thumb, and any registers have been
4021 stacked. */
4022 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4023 return 0;
4024
4025 /* On StrongARM, conditional returns are expensive if they aren't
4026 taken and multiple registers have been stacked. */
4027 if (iscond && arm_tune_strongarm)
4028 {
4029 /* Conditional return when just the LR is stored is a simple
4030 conditional-load instruction, that's not expensive. */
4031 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4032 return 0;
4033
4034 if (flag_pic
4035 && arm_pic_register != INVALID_REGNUM
4036 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4037 return 0;
4038 }
4039
4040 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4041 several instructions if anything needs to be popped. */
4042 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4043 return 0;
4044
4045 /* If there are saved registers but the LR isn't saved, then we need
4046 two instructions for the return. */
4047 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4048 return 0;
4049
4050 /* Can't be done if any of the VFP regs are pushed,
4051 since this also requires an insn. */
4052 if (TARGET_HARD_FLOAT)
4053 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4054 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4055 return 0;
4056
4057 if (TARGET_REALLY_IWMMXT)
4058 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4059 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4060 return 0;
4061
4062 return 1;
4063 }
4064
4065 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4066 shrink-wrapping if possible. This is the case if we need to emit a
4067 prologue, which we can test by looking at the offsets. */
4068 bool
4069 use_simple_return_p (void)
4070 {
4071 arm_stack_offsets *offsets;
4072
4073 /* Note this function can be called before or after reload. */
4074 if (!reload_completed)
4075 arm_compute_frame_layout ();
4076
4077 offsets = arm_get_frame_offsets ();
4078 return offsets->outgoing_args != 0;
4079 }
4080
4081 /* Return TRUE if int I is a valid immediate ARM constant. */
4082
4083 int
4084 const_ok_for_arm (HOST_WIDE_INT i)
4085 {
4086 int lowbit;
4087
4088 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4089 be all zero, or all one. */
4090 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4091 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4092 != ((~(unsigned HOST_WIDE_INT) 0)
4093 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4094 return FALSE;
4095
4096 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4097
4098 /* Fast return for 0 and small values. We must do this for zero, since
4099 the code below can't handle that one case. */
4100 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4101 return TRUE;
4102
4103 /* Get the number of trailing zeros. */
4104 lowbit = ffs((int) i) - 1;
4105
4106 /* Only even shifts are allowed in ARM mode so round down to the
4107 nearest even number. */
4108 if (TARGET_ARM)
4109 lowbit &= ~1;
4110
4111 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4112 return TRUE;
4113
4114 if (TARGET_ARM)
4115 {
4116 /* Allow rotated constants in ARM mode. */
4117 if (lowbit <= 4
4118 && ((i & ~0xc000003f) == 0
4119 || (i & ~0xf000000f) == 0
4120 || (i & ~0xfc000003) == 0))
4121 return TRUE;
4122 }
4123 else if (TARGET_THUMB2)
4124 {
4125 HOST_WIDE_INT v;
4126
4127 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4128 v = i & 0xff;
4129 v |= v << 16;
4130 if (i == v || i == (v | (v << 8)))
4131 return TRUE;
4132
4133 /* Allow repeated pattern 0xXY00XY00. */
4134 v = i & 0xff00;
4135 v |= v << 16;
4136 if (i == v)
4137 return TRUE;
4138 }
4139 else if (TARGET_HAVE_MOVT)
4140 {
4141 /* Thumb-1 Targets with MOVT. */
4142 if (i > 0xffff)
4143 return FALSE;
4144 else
4145 return TRUE;
4146 }
4147
4148 return FALSE;
4149 }
4150
4151 /* Return true if I is a valid constant for the operation CODE. */
4152 int
4153 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4154 {
4155 if (const_ok_for_arm (i))
4156 return 1;
4157
4158 switch (code)
4159 {
4160 case SET:
4161 /* See if we can use movw. */
4162 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4163 return 1;
4164 else
4165 /* Otherwise, try mvn. */
4166 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4167
4168 case PLUS:
4169 /* See if we can use addw or subw. */
4170 if (TARGET_THUMB2
4171 && ((i & 0xfffff000) == 0
4172 || ((-i) & 0xfffff000) == 0))
4173 return 1;
4174 /* Fall through. */
4175 case COMPARE:
4176 case EQ:
4177 case NE:
4178 case GT:
4179 case LE:
4180 case LT:
4181 case GE:
4182 case GEU:
4183 case LTU:
4184 case GTU:
4185 case LEU:
4186 case UNORDERED:
4187 case ORDERED:
4188 case UNEQ:
4189 case UNGE:
4190 case UNLT:
4191 case UNGT:
4192 case UNLE:
4193 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4194
4195 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4196 case XOR:
4197 return 0;
4198
4199 case IOR:
4200 if (TARGET_THUMB2)
4201 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4202 return 0;
4203
4204 case AND:
4205 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4206
4207 default:
4208 gcc_unreachable ();
4209 }
4210 }
4211
4212 /* Return true if I is a valid di mode constant for the operation CODE. */
4213 int
4214 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4215 {
4216 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4217 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4218 rtx hi = GEN_INT (hi_val);
4219 rtx lo = GEN_INT (lo_val);
4220
4221 if (TARGET_THUMB1)
4222 return 0;
4223
4224 switch (code)
4225 {
4226 case AND:
4227 case IOR:
4228 case XOR:
4229 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4230 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4231 case PLUS:
4232 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4233
4234 default:
4235 return 0;
4236 }
4237 }
4238
4239 /* Emit a sequence of insns to handle a large constant.
4240 CODE is the code of the operation required, it can be any of SET, PLUS,
4241 IOR, AND, XOR, MINUS;
4242 MODE is the mode in which the operation is being performed;
4243 VAL is the integer to operate on;
4244 SOURCE is the other operand (a register, or a null-pointer for SET);
4245 SUBTARGETS means it is safe to create scratch registers if that will
4246 either produce a simpler sequence, or we will want to cse the values.
4247 Return value is the number of insns emitted. */
4248
4249 /* ??? Tweak this for thumb2. */
4250 int
4251 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4252 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4253 {
4254 rtx cond;
4255
4256 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4257 cond = COND_EXEC_TEST (PATTERN (insn));
4258 else
4259 cond = NULL_RTX;
4260
4261 if (subtargets || code == SET
4262 || (REG_P (target) && REG_P (source)
4263 && REGNO (target) != REGNO (source)))
4264 {
4265 /* After arm_reorg has been called, we can't fix up expensive
4266 constants by pushing them into memory so we must synthesize
4267 them in-line, regardless of the cost. This is only likely to
4268 be more costly on chips that have load delay slots and we are
4269 compiling without running the scheduler (so no splitting
4270 occurred before the final instruction emission).
4271
4272 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4273 */
4274 if (!cfun->machine->after_arm_reorg
4275 && !cond
4276 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4277 1, 0)
4278 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4279 + (code != SET))))
4280 {
4281 if (code == SET)
4282 {
4283 /* Currently SET is the only monadic value for CODE, all
4284 the rest are diadic. */
4285 if (TARGET_USE_MOVT)
4286 arm_emit_movpair (target, GEN_INT (val));
4287 else
4288 emit_set_insn (target, GEN_INT (val));
4289
4290 return 1;
4291 }
4292 else
4293 {
4294 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4295
4296 if (TARGET_USE_MOVT)
4297 arm_emit_movpair (temp, GEN_INT (val));
4298 else
4299 emit_set_insn (temp, GEN_INT (val));
4300
4301 /* For MINUS, the value is subtracted from, since we never
4302 have subtraction of a constant. */
4303 if (code == MINUS)
4304 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4305 else
4306 emit_set_insn (target,
4307 gen_rtx_fmt_ee (code, mode, source, temp));
4308 return 2;
4309 }
4310 }
4311 }
4312
4313 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4314 1);
4315 }
4316
4317 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4318 ARM/THUMB2 immediates, and add up to VAL.
4319 Thr function return value gives the number of insns required. */
4320 static int
4321 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4322 struct four_ints *return_sequence)
4323 {
4324 int best_consecutive_zeros = 0;
4325 int i;
4326 int best_start = 0;
4327 int insns1, insns2;
4328 struct four_ints tmp_sequence;
4329
4330 /* If we aren't targeting ARM, the best place to start is always at
4331 the bottom, otherwise look more closely. */
4332 if (TARGET_ARM)
4333 {
4334 for (i = 0; i < 32; i += 2)
4335 {
4336 int consecutive_zeros = 0;
4337
4338 if (!(val & (3 << i)))
4339 {
4340 while ((i < 32) && !(val & (3 << i)))
4341 {
4342 consecutive_zeros += 2;
4343 i += 2;
4344 }
4345 if (consecutive_zeros > best_consecutive_zeros)
4346 {
4347 best_consecutive_zeros = consecutive_zeros;
4348 best_start = i - consecutive_zeros;
4349 }
4350 i -= 2;
4351 }
4352 }
4353 }
4354
4355 /* So long as it won't require any more insns to do so, it's
4356 desirable to emit a small constant (in bits 0...9) in the last
4357 insn. This way there is more chance that it can be combined with
4358 a later addressing insn to form a pre-indexed load or store
4359 operation. Consider:
4360
4361 *((volatile int *)0xe0000100) = 1;
4362 *((volatile int *)0xe0000110) = 2;
4363
4364 We want this to wind up as:
4365
4366 mov rA, #0xe0000000
4367 mov rB, #1
4368 str rB, [rA, #0x100]
4369 mov rB, #2
4370 str rB, [rA, #0x110]
4371
4372 rather than having to synthesize both large constants from scratch.
4373
4374 Therefore, we calculate how many insns would be required to emit
4375 the constant starting from `best_start', and also starting from
4376 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4377 yield a shorter sequence, we may as well use zero. */
4378 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4379 if (best_start != 0
4380 && ((HOST_WIDE_INT_1U << best_start) < val))
4381 {
4382 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4383 if (insns2 <= insns1)
4384 {
4385 *return_sequence = tmp_sequence;
4386 insns1 = insns2;
4387 }
4388 }
4389
4390 return insns1;
4391 }
4392
4393 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4394 static int
4395 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4396 struct four_ints *return_sequence, int i)
4397 {
4398 int remainder = val & 0xffffffff;
4399 int insns = 0;
4400
4401 /* Try and find a way of doing the job in either two or three
4402 instructions.
4403
4404 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4405 location. We start at position I. This may be the MSB, or
4406 optimial_immediate_sequence may have positioned it at the largest block
4407 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4408 wrapping around to the top of the word when we drop off the bottom.
4409 In the worst case this code should produce no more than four insns.
4410
4411 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4412 constants, shifted to any arbitrary location. We should always start
4413 at the MSB. */
4414 do
4415 {
4416 int end;
4417 unsigned int b1, b2, b3, b4;
4418 unsigned HOST_WIDE_INT result;
4419 int loc;
4420
4421 gcc_assert (insns < 4);
4422
4423 if (i <= 0)
4424 i += 32;
4425
4426 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4427 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4428 {
4429 loc = i;
4430 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4431 /* We can use addw/subw for the last 12 bits. */
4432 result = remainder;
4433 else
4434 {
4435 /* Use an 8-bit shifted/rotated immediate. */
4436 end = i - 8;
4437 if (end < 0)
4438 end += 32;
4439 result = remainder & ((0x0ff << end)
4440 | ((i < end) ? (0xff >> (32 - end))
4441 : 0));
4442 i -= 8;
4443 }
4444 }
4445 else
4446 {
4447 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4448 arbitrary shifts. */
4449 i -= TARGET_ARM ? 2 : 1;
4450 continue;
4451 }
4452
4453 /* Next, see if we can do a better job with a thumb2 replicated
4454 constant.
4455
4456 We do it this way around to catch the cases like 0x01F001E0 where
4457 two 8-bit immediates would work, but a replicated constant would
4458 make it worse.
4459
4460 TODO: 16-bit constants that don't clear all the bits, but still win.
4461 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4462 if (TARGET_THUMB2)
4463 {
4464 b1 = (remainder & 0xff000000) >> 24;
4465 b2 = (remainder & 0x00ff0000) >> 16;
4466 b3 = (remainder & 0x0000ff00) >> 8;
4467 b4 = remainder & 0xff;
4468
4469 if (loc > 24)
4470 {
4471 /* The 8-bit immediate already found clears b1 (and maybe b2),
4472 but must leave b3 and b4 alone. */
4473
4474 /* First try to find a 32-bit replicated constant that clears
4475 almost everything. We can assume that we can't do it in one,
4476 or else we wouldn't be here. */
4477 unsigned int tmp = b1 & b2 & b3 & b4;
4478 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4479 + (tmp << 24);
4480 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4481 + (tmp == b3) + (tmp == b4);
4482 if (tmp
4483 && (matching_bytes >= 3
4484 || (matching_bytes == 2
4485 && const_ok_for_op (remainder & ~tmp2, code))))
4486 {
4487 /* At least 3 of the bytes match, and the fourth has at
4488 least as many bits set, or two of the bytes match
4489 and it will only require one more insn to finish. */
4490 result = tmp2;
4491 i = tmp != b1 ? 32
4492 : tmp != b2 ? 24
4493 : tmp != b3 ? 16
4494 : 8;
4495 }
4496
4497 /* Second, try to find a 16-bit replicated constant that can
4498 leave three of the bytes clear. If b2 or b4 is already
4499 zero, then we can. If the 8-bit from above would not
4500 clear b2 anyway, then we still win. */
4501 else if (b1 == b3 && (!b2 || !b4
4502 || (remainder & 0x00ff0000 & ~result)))
4503 {
4504 result = remainder & 0xff00ff00;
4505 i = 24;
4506 }
4507 }
4508 else if (loc > 16)
4509 {
4510 /* The 8-bit immediate already found clears b2 (and maybe b3)
4511 and we don't get here unless b1 is alredy clear, but it will
4512 leave b4 unchanged. */
4513
4514 /* If we can clear b2 and b4 at once, then we win, since the
4515 8-bits couldn't possibly reach that far. */
4516 if (b2 == b4)
4517 {
4518 result = remainder & 0x00ff00ff;
4519 i = 16;
4520 }
4521 }
4522 }
4523
4524 return_sequence->i[insns++] = result;
4525 remainder &= ~result;
4526
4527 if (code == SET || code == MINUS)
4528 code = PLUS;
4529 }
4530 while (remainder);
4531
4532 return insns;
4533 }
4534
4535 /* Emit an instruction with the indicated PATTERN. If COND is
4536 non-NULL, conditionalize the execution of the instruction on COND
4537 being true. */
4538
4539 static void
4540 emit_constant_insn (rtx cond, rtx pattern)
4541 {
4542 if (cond)
4543 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4544 emit_insn (pattern);
4545 }
4546
4547 /* As above, but extra parameter GENERATE which, if clear, suppresses
4548 RTL generation. */
4549
4550 static int
4551 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4552 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4553 int subtargets, int generate)
4554 {
4555 int can_invert = 0;
4556 int can_negate = 0;
4557 int final_invert = 0;
4558 int i;
4559 int set_sign_bit_copies = 0;
4560 int clear_sign_bit_copies = 0;
4561 int clear_zero_bit_copies = 0;
4562 int set_zero_bit_copies = 0;
4563 int insns = 0, neg_insns, inv_insns;
4564 unsigned HOST_WIDE_INT temp1, temp2;
4565 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4566 struct four_ints *immediates;
4567 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4568
4569 /* Find out which operations are safe for a given CODE. Also do a quick
4570 check for degenerate cases; these can occur when DImode operations
4571 are split. */
4572 switch (code)
4573 {
4574 case SET:
4575 can_invert = 1;
4576 break;
4577
4578 case PLUS:
4579 can_negate = 1;
4580 break;
4581
4582 case IOR:
4583 if (remainder == 0xffffffff)
4584 {
4585 if (generate)
4586 emit_constant_insn (cond,
4587 gen_rtx_SET (target,
4588 GEN_INT (ARM_SIGN_EXTEND (val))));
4589 return 1;
4590 }
4591
4592 if (remainder == 0)
4593 {
4594 if (reload_completed && rtx_equal_p (target, source))
4595 return 0;
4596
4597 if (generate)
4598 emit_constant_insn (cond, gen_rtx_SET (target, source));
4599 return 1;
4600 }
4601 break;
4602
4603 case AND:
4604 if (remainder == 0)
4605 {
4606 if (generate)
4607 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4608 return 1;
4609 }
4610 if (remainder == 0xffffffff)
4611 {
4612 if (reload_completed && rtx_equal_p (target, source))
4613 return 0;
4614 if (generate)
4615 emit_constant_insn (cond, gen_rtx_SET (target, source));
4616 return 1;
4617 }
4618 can_invert = 1;
4619 break;
4620
4621 case XOR:
4622 if (remainder == 0)
4623 {
4624 if (reload_completed && rtx_equal_p (target, source))
4625 return 0;
4626 if (generate)
4627 emit_constant_insn (cond, gen_rtx_SET (target, source));
4628 return 1;
4629 }
4630
4631 if (remainder == 0xffffffff)
4632 {
4633 if (generate)
4634 emit_constant_insn (cond,
4635 gen_rtx_SET (target,
4636 gen_rtx_NOT (mode, source)));
4637 return 1;
4638 }
4639 final_invert = 1;
4640 break;
4641
4642 case MINUS:
4643 /* We treat MINUS as (val - source), since (source - val) is always
4644 passed as (source + (-val)). */
4645 if (remainder == 0)
4646 {
4647 if (generate)
4648 emit_constant_insn (cond,
4649 gen_rtx_SET (target,
4650 gen_rtx_NEG (mode, source)));
4651 return 1;
4652 }
4653 if (const_ok_for_arm (val))
4654 {
4655 if (generate)
4656 emit_constant_insn (cond,
4657 gen_rtx_SET (target,
4658 gen_rtx_MINUS (mode, GEN_INT (val),
4659 source)));
4660 return 1;
4661 }
4662
4663 break;
4664
4665 default:
4666 gcc_unreachable ();
4667 }
4668
4669 /* If we can do it in one insn get out quickly. */
4670 if (const_ok_for_op (val, code))
4671 {
4672 if (generate)
4673 emit_constant_insn (cond,
4674 gen_rtx_SET (target,
4675 (source
4676 ? gen_rtx_fmt_ee (code, mode, source,
4677 GEN_INT (val))
4678 : GEN_INT (val))));
4679 return 1;
4680 }
4681
4682 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4683 insn. */
4684 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4685 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4686 {
4687 if (generate)
4688 {
4689 if (mode == SImode && i == 16)
4690 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4691 smaller insn. */
4692 emit_constant_insn (cond,
4693 gen_zero_extendhisi2
4694 (target, gen_lowpart (HImode, source)));
4695 else
4696 /* Extz only supports SImode, but we can coerce the operands
4697 into that mode. */
4698 emit_constant_insn (cond,
4699 gen_extzv_t2 (gen_lowpart (SImode, target),
4700 gen_lowpart (SImode, source),
4701 GEN_INT (i), const0_rtx));
4702 }
4703
4704 return 1;
4705 }
4706
4707 /* Calculate a few attributes that may be useful for specific
4708 optimizations. */
4709 /* Count number of leading zeros. */
4710 for (i = 31; i >= 0; i--)
4711 {
4712 if ((remainder & (1 << i)) == 0)
4713 clear_sign_bit_copies++;
4714 else
4715 break;
4716 }
4717
4718 /* Count number of leading 1's. */
4719 for (i = 31; i >= 0; i--)
4720 {
4721 if ((remainder & (1 << i)) != 0)
4722 set_sign_bit_copies++;
4723 else
4724 break;
4725 }
4726
4727 /* Count number of trailing zero's. */
4728 for (i = 0; i <= 31; i++)
4729 {
4730 if ((remainder & (1 << i)) == 0)
4731 clear_zero_bit_copies++;
4732 else
4733 break;
4734 }
4735
4736 /* Count number of trailing 1's. */
4737 for (i = 0; i <= 31; i++)
4738 {
4739 if ((remainder & (1 << i)) != 0)
4740 set_zero_bit_copies++;
4741 else
4742 break;
4743 }
4744
4745 switch (code)
4746 {
4747 case SET:
4748 /* See if we can do this by sign_extending a constant that is known
4749 to be negative. This is a good, way of doing it, since the shift
4750 may well merge into a subsequent insn. */
4751 if (set_sign_bit_copies > 1)
4752 {
4753 if (const_ok_for_arm
4754 (temp1 = ARM_SIGN_EXTEND (remainder
4755 << (set_sign_bit_copies - 1))))
4756 {
4757 if (generate)
4758 {
4759 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4760 emit_constant_insn (cond,
4761 gen_rtx_SET (new_src, GEN_INT (temp1)));
4762 emit_constant_insn (cond,
4763 gen_ashrsi3 (target, new_src,
4764 GEN_INT (set_sign_bit_copies - 1)));
4765 }
4766 return 2;
4767 }
4768 /* For an inverted constant, we will need to set the low bits,
4769 these will be shifted out of harm's way. */
4770 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4771 if (const_ok_for_arm (~temp1))
4772 {
4773 if (generate)
4774 {
4775 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4776 emit_constant_insn (cond,
4777 gen_rtx_SET (new_src, GEN_INT (temp1)));
4778 emit_constant_insn (cond,
4779 gen_ashrsi3 (target, new_src,
4780 GEN_INT (set_sign_bit_copies - 1)));
4781 }
4782 return 2;
4783 }
4784 }
4785
4786 /* See if we can calculate the value as the difference between two
4787 valid immediates. */
4788 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4789 {
4790 int topshift = clear_sign_bit_copies & ~1;
4791
4792 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4793 & (0xff000000 >> topshift));
4794
4795 /* If temp1 is zero, then that means the 9 most significant
4796 bits of remainder were 1 and we've caused it to overflow.
4797 When topshift is 0 we don't need to do anything since we
4798 can borrow from 'bit 32'. */
4799 if (temp1 == 0 && topshift != 0)
4800 temp1 = 0x80000000 >> (topshift - 1);
4801
4802 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4803
4804 if (const_ok_for_arm (temp2))
4805 {
4806 if (generate)
4807 {
4808 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4809 emit_constant_insn (cond,
4810 gen_rtx_SET (new_src, GEN_INT (temp1)));
4811 emit_constant_insn (cond,
4812 gen_addsi3 (target, new_src,
4813 GEN_INT (-temp2)));
4814 }
4815
4816 return 2;
4817 }
4818 }
4819
4820 /* See if we can generate this by setting the bottom (or the top)
4821 16 bits, and then shifting these into the other half of the
4822 word. We only look for the simplest cases, to do more would cost
4823 too much. Be careful, however, not to generate this when the
4824 alternative would take fewer insns. */
4825 if (val & 0xffff0000)
4826 {
4827 temp1 = remainder & 0xffff0000;
4828 temp2 = remainder & 0x0000ffff;
4829
4830 /* Overlaps outside this range are best done using other methods. */
4831 for (i = 9; i < 24; i++)
4832 {
4833 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4834 && !const_ok_for_arm (temp2))
4835 {
4836 rtx new_src = (subtargets
4837 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4838 : target);
4839 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4840 source, subtargets, generate);
4841 source = new_src;
4842 if (generate)
4843 emit_constant_insn
4844 (cond,
4845 gen_rtx_SET
4846 (target,
4847 gen_rtx_IOR (mode,
4848 gen_rtx_ASHIFT (mode, source,
4849 GEN_INT (i)),
4850 source)));
4851 return insns + 1;
4852 }
4853 }
4854
4855 /* Don't duplicate cases already considered. */
4856 for (i = 17; i < 24; i++)
4857 {
4858 if (((temp1 | (temp1 >> i)) == remainder)
4859 && !const_ok_for_arm (temp1))
4860 {
4861 rtx new_src = (subtargets
4862 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4863 : target);
4864 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4865 source, subtargets, generate);
4866 source = new_src;
4867 if (generate)
4868 emit_constant_insn
4869 (cond,
4870 gen_rtx_SET (target,
4871 gen_rtx_IOR
4872 (mode,
4873 gen_rtx_LSHIFTRT (mode, source,
4874 GEN_INT (i)),
4875 source)));
4876 return insns + 1;
4877 }
4878 }
4879 }
4880 break;
4881
4882 case IOR:
4883 case XOR:
4884 /* If we have IOR or XOR, and the constant can be loaded in a
4885 single instruction, and we can find a temporary to put it in,
4886 then this can be done in two instructions instead of 3-4. */
4887 if (subtargets
4888 /* TARGET can't be NULL if SUBTARGETS is 0 */
4889 || (reload_completed && !reg_mentioned_p (target, source)))
4890 {
4891 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4892 {
4893 if (generate)
4894 {
4895 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4896
4897 emit_constant_insn (cond,
4898 gen_rtx_SET (sub, GEN_INT (val)));
4899 emit_constant_insn (cond,
4900 gen_rtx_SET (target,
4901 gen_rtx_fmt_ee (code, mode,
4902 source, sub)));
4903 }
4904 return 2;
4905 }
4906 }
4907
4908 if (code == XOR)
4909 break;
4910
4911 /* Convert.
4912 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4913 and the remainder 0s for e.g. 0xfff00000)
4914 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4915
4916 This can be done in 2 instructions by using shifts with mov or mvn.
4917 e.g. for
4918 x = x | 0xfff00000;
4919 we generate.
4920 mvn r0, r0, asl #12
4921 mvn r0, r0, lsr #12 */
4922 if (set_sign_bit_copies > 8
4923 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4924 {
4925 if (generate)
4926 {
4927 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4928 rtx shift = GEN_INT (set_sign_bit_copies);
4929
4930 emit_constant_insn
4931 (cond,
4932 gen_rtx_SET (sub,
4933 gen_rtx_NOT (mode,
4934 gen_rtx_ASHIFT (mode,
4935 source,
4936 shift))));
4937 emit_constant_insn
4938 (cond,
4939 gen_rtx_SET (target,
4940 gen_rtx_NOT (mode,
4941 gen_rtx_LSHIFTRT (mode, sub,
4942 shift))));
4943 }
4944 return 2;
4945 }
4946
4947 /* Convert
4948 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4949 to
4950 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4951
4952 For eg. r0 = r0 | 0xfff
4953 mvn r0, r0, lsr #12
4954 mvn r0, r0, asl #12
4955
4956 */
4957 if (set_zero_bit_copies > 8
4958 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4959 {
4960 if (generate)
4961 {
4962 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4963 rtx shift = GEN_INT (set_zero_bit_copies);
4964
4965 emit_constant_insn
4966 (cond,
4967 gen_rtx_SET (sub,
4968 gen_rtx_NOT (mode,
4969 gen_rtx_LSHIFTRT (mode,
4970 source,
4971 shift))));
4972 emit_constant_insn
4973 (cond,
4974 gen_rtx_SET (target,
4975 gen_rtx_NOT (mode,
4976 gen_rtx_ASHIFT (mode, sub,
4977 shift))));
4978 }
4979 return 2;
4980 }
4981
4982 /* This will never be reached for Thumb2 because orn is a valid
4983 instruction. This is for Thumb1 and the ARM 32 bit cases.
4984
4985 x = y | constant (such that ~constant is a valid constant)
4986 Transform this to
4987 x = ~(~y & ~constant).
4988 */
4989 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4990 {
4991 if (generate)
4992 {
4993 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4994 emit_constant_insn (cond,
4995 gen_rtx_SET (sub,
4996 gen_rtx_NOT (mode, source)));
4997 source = sub;
4998 if (subtargets)
4999 sub = gen_reg_rtx (mode);
5000 emit_constant_insn (cond,
5001 gen_rtx_SET (sub,
5002 gen_rtx_AND (mode, source,
5003 GEN_INT (temp1))));
5004 emit_constant_insn (cond,
5005 gen_rtx_SET (target,
5006 gen_rtx_NOT (mode, sub)));
5007 }
5008 return 3;
5009 }
5010 break;
5011
5012 case AND:
5013 /* See if two shifts will do 2 or more insn's worth of work. */
5014 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5015 {
5016 HOST_WIDE_INT shift_mask = ((0xffffffff
5017 << (32 - clear_sign_bit_copies))
5018 & 0xffffffff);
5019
5020 if ((remainder | shift_mask) != 0xffffffff)
5021 {
5022 HOST_WIDE_INT new_val
5023 = ARM_SIGN_EXTEND (remainder | shift_mask);
5024
5025 if (generate)
5026 {
5027 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5028 insns = arm_gen_constant (AND, SImode, cond, new_val,
5029 new_src, source, subtargets, 1);
5030 source = new_src;
5031 }
5032 else
5033 {
5034 rtx targ = subtargets ? NULL_RTX : target;
5035 insns = arm_gen_constant (AND, mode, cond, new_val,
5036 targ, source, subtargets, 0);
5037 }
5038 }
5039
5040 if (generate)
5041 {
5042 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5043 rtx shift = GEN_INT (clear_sign_bit_copies);
5044
5045 emit_insn (gen_ashlsi3 (new_src, source, shift));
5046 emit_insn (gen_lshrsi3 (target, new_src, shift));
5047 }
5048
5049 return insns + 2;
5050 }
5051
5052 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5053 {
5054 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5055
5056 if ((remainder | shift_mask) != 0xffffffff)
5057 {
5058 HOST_WIDE_INT new_val
5059 = ARM_SIGN_EXTEND (remainder | shift_mask);
5060 if (generate)
5061 {
5062 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5063
5064 insns = arm_gen_constant (AND, mode, cond, new_val,
5065 new_src, source, subtargets, 1);
5066 source = new_src;
5067 }
5068 else
5069 {
5070 rtx targ = subtargets ? NULL_RTX : target;
5071
5072 insns = arm_gen_constant (AND, mode, cond, new_val,
5073 targ, source, subtargets, 0);
5074 }
5075 }
5076
5077 if (generate)
5078 {
5079 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5080 rtx shift = GEN_INT (clear_zero_bit_copies);
5081
5082 emit_insn (gen_lshrsi3 (new_src, source, shift));
5083 emit_insn (gen_ashlsi3 (target, new_src, shift));
5084 }
5085
5086 return insns + 2;
5087 }
5088
5089 break;
5090
5091 default:
5092 break;
5093 }
5094
5095 /* Calculate what the instruction sequences would be if we generated it
5096 normally, negated, or inverted. */
5097 if (code == AND)
5098 /* AND cannot be split into multiple insns, so invert and use BIC. */
5099 insns = 99;
5100 else
5101 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5102
5103 if (can_negate)
5104 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5105 &neg_immediates);
5106 else
5107 neg_insns = 99;
5108
5109 if (can_invert || final_invert)
5110 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5111 &inv_immediates);
5112 else
5113 inv_insns = 99;
5114
5115 immediates = &pos_immediates;
5116
5117 /* Is the negated immediate sequence more efficient? */
5118 if (neg_insns < insns && neg_insns <= inv_insns)
5119 {
5120 insns = neg_insns;
5121 immediates = &neg_immediates;
5122 }
5123 else
5124 can_negate = 0;
5125
5126 /* Is the inverted immediate sequence more efficient?
5127 We must allow for an extra NOT instruction for XOR operations, although
5128 there is some chance that the final 'mvn' will get optimized later. */
5129 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5130 {
5131 insns = inv_insns;
5132 immediates = &inv_immediates;
5133 }
5134 else
5135 {
5136 can_invert = 0;
5137 final_invert = 0;
5138 }
5139
5140 /* Now output the chosen sequence as instructions. */
5141 if (generate)
5142 {
5143 for (i = 0; i < insns; i++)
5144 {
5145 rtx new_src, temp1_rtx;
5146
5147 temp1 = immediates->i[i];
5148
5149 if (code == SET || code == MINUS)
5150 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5151 else if ((final_invert || i < (insns - 1)) && subtargets)
5152 new_src = gen_reg_rtx (mode);
5153 else
5154 new_src = target;
5155
5156 if (can_invert)
5157 temp1 = ~temp1;
5158 else if (can_negate)
5159 temp1 = -temp1;
5160
5161 temp1 = trunc_int_for_mode (temp1, mode);
5162 temp1_rtx = GEN_INT (temp1);
5163
5164 if (code == SET)
5165 ;
5166 else if (code == MINUS)
5167 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5168 else
5169 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5170
5171 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5172 source = new_src;
5173
5174 if (code == SET)
5175 {
5176 can_negate = can_invert;
5177 can_invert = 0;
5178 code = PLUS;
5179 }
5180 else if (code == MINUS)
5181 code = PLUS;
5182 }
5183 }
5184
5185 if (final_invert)
5186 {
5187 if (generate)
5188 emit_constant_insn (cond, gen_rtx_SET (target,
5189 gen_rtx_NOT (mode, source)));
5190 insns++;
5191 }
5192
5193 return insns;
5194 }
5195
5196 /* Canonicalize a comparison so that we are more likely to recognize it.
5197 This can be done for a few constant compares, where we can make the
5198 immediate value easier to load. */
5199
5200 static void
5201 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5202 bool op0_preserve_value)
5203 {
5204 machine_mode mode;
5205 unsigned HOST_WIDE_INT i, maxval;
5206
5207 mode = GET_MODE (*op0);
5208 if (mode == VOIDmode)
5209 mode = GET_MODE (*op1);
5210
5211 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5212
5213 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5214 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5215 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5216 for GTU/LEU in Thumb mode. */
5217 if (mode == DImode)
5218 {
5219
5220 if (*code == GT || *code == LE
5221 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5222 {
5223 /* Missing comparison. First try to use an available
5224 comparison. */
5225 if (CONST_INT_P (*op1))
5226 {
5227 i = INTVAL (*op1);
5228 switch (*code)
5229 {
5230 case GT:
5231 case LE:
5232 if (i != maxval
5233 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5234 {
5235 *op1 = GEN_INT (i + 1);
5236 *code = *code == GT ? GE : LT;
5237 return;
5238 }
5239 break;
5240 case GTU:
5241 case LEU:
5242 if (i != ~((unsigned HOST_WIDE_INT) 0)
5243 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5244 {
5245 *op1 = GEN_INT (i + 1);
5246 *code = *code == GTU ? GEU : LTU;
5247 return;
5248 }
5249 break;
5250 default:
5251 gcc_unreachable ();
5252 }
5253 }
5254
5255 /* If that did not work, reverse the condition. */
5256 if (!op0_preserve_value)
5257 {
5258 std::swap (*op0, *op1);
5259 *code = (int)swap_condition ((enum rtx_code)*code);
5260 }
5261 }
5262 return;
5263 }
5264
5265 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5266 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5267 to facilitate possible combining with a cmp into 'ands'. */
5268 if (mode == SImode
5269 && GET_CODE (*op0) == ZERO_EXTEND
5270 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5271 && GET_MODE (XEXP (*op0, 0)) == QImode
5272 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5273 && subreg_lowpart_p (XEXP (*op0, 0))
5274 && *op1 == const0_rtx)
5275 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5276 GEN_INT (255));
5277
5278 /* Comparisons smaller than DImode. Only adjust comparisons against
5279 an out-of-range constant. */
5280 if (!CONST_INT_P (*op1)
5281 || const_ok_for_arm (INTVAL (*op1))
5282 || const_ok_for_arm (- INTVAL (*op1)))
5283 return;
5284
5285 i = INTVAL (*op1);
5286
5287 switch (*code)
5288 {
5289 case EQ:
5290 case NE:
5291 return;
5292
5293 case GT:
5294 case LE:
5295 if (i != maxval
5296 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5297 {
5298 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5299 *code = *code == GT ? GE : LT;
5300 return;
5301 }
5302 break;
5303
5304 case GE:
5305 case LT:
5306 if (i != ~maxval
5307 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5308 {
5309 *op1 = GEN_INT (i - 1);
5310 *code = *code == GE ? GT : LE;
5311 return;
5312 }
5313 break;
5314
5315 case GTU:
5316 case LEU:
5317 if (i != ~((unsigned HOST_WIDE_INT) 0)
5318 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5319 {
5320 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5321 *code = *code == GTU ? GEU : LTU;
5322 return;
5323 }
5324 break;
5325
5326 case GEU:
5327 case LTU:
5328 if (i != 0
5329 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5330 {
5331 *op1 = GEN_INT (i - 1);
5332 *code = *code == GEU ? GTU : LEU;
5333 return;
5334 }
5335 break;
5336
5337 default:
5338 gcc_unreachable ();
5339 }
5340 }
5341
5342
5343 /* Define how to find the value returned by a function. */
5344
5345 static rtx
5346 arm_function_value(const_tree type, const_tree func,
5347 bool outgoing ATTRIBUTE_UNUSED)
5348 {
5349 machine_mode mode;
5350 int unsignedp ATTRIBUTE_UNUSED;
5351 rtx r ATTRIBUTE_UNUSED;
5352
5353 mode = TYPE_MODE (type);
5354
5355 if (TARGET_AAPCS_BASED)
5356 return aapcs_allocate_return_reg (mode, type, func);
5357
5358 /* Promote integer types. */
5359 if (INTEGRAL_TYPE_P (type))
5360 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5361
5362 /* Promotes small structs returned in a register to full-word size
5363 for big-endian AAPCS. */
5364 if (arm_return_in_msb (type))
5365 {
5366 HOST_WIDE_INT size = int_size_in_bytes (type);
5367 if (size % UNITS_PER_WORD != 0)
5368 {
5369 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5370 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5371 }
5372 }
5373
5374 return arm_libcall_value_1 (mode);
5375 }
5376
5377 /* libcall hashtable helpers. */
5378
5379 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5380 {
5381 static inline hashval_t hash (const rtx_def *);
5382 static inline bool equal (const rtx_def *, const rtx_def *);
5383 static inline void remove (rtx_def *);
5384 };
5385
5386 inline bool
5387 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5388 {
5389 return rtx_equal_p (p1, p2);
5390 }
5391
5392 inline hashval_t
5393 libcall_hasher::hash (const rtx_def *p1)
5394 {
5395 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5396 }
5397
5398 typedef hash_table<libcall_hasher> libcall_table_type;
5399
5400 static void
5401 add_libcall (libcall_table_type *htab, rtx libcall)
5402 {
5403 *htab->find_slot (libcall, INSERT) = libcall;
5404 }
5405
5406 static bool
5407 arm_libcall_uses_aapcs_base (const_rtx libcall)
5408 {
5409 static bool init_done = false;
5410 static libcall_table_type *libcall_htab = NULL;
5411
5412 if (!init_done)
5413 {
5414 init_done = true;
5415
5416 libcall_htab = new libcall_table_type (31);
5417 add_libcall (libcall_htab,
5418 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5419 add_libcall (libcall_htab,
5420 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5421 add_libcall (libcall_htab,
5422 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5423 add_libcall (libcall_htab,
5424 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5425
5426 add_libcall (libcall_htab,
5427 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5428 add_libcall (libcall_htab,
5429 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5430 add_libcall (libcall_htab,
5431 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5432 add_libcall (libcall_htab,
5433 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5434
5435 add_libcall (libcall_htab,
5436 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5437 add_libcall (libcall_htab,
5438 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5439 add_libcall (libcall_htab,
5440 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5441 add_libcall (libcall_htab,
5442 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5443 add_libcall (libcall_htab,
5444 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5445 add_libcall (libcall_htab,
5446 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5447 add_libcall (libcall_htab,
5448 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5449 add_libcall (libcall_htab,
5450 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5451
5452 /* Values from double-precision helper functions are returned in core
5453 registers if the selected core only supports single-precision
5454 arithmetic, even if we are using the hard-float ABI. The same is
5455 true for single-precision helpers, but we will never be using the
5456 hard-float ABI on a CPU which doesn't support single-precision
5457 operations in hardware. */
5458 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5459 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5460 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5461 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5462 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5463 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5464 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5465 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5466 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5467 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5468 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5469 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5470 SFmode));
5471 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5472 DFmode));
5473 add_libcall (libcall_htab,
5474 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5475 }
5476
5477 return libcall && libcall_htab->find (libcall) != NULL;
5478 }
5479
5480 static rtx
5481 arm_libcall_value_1 (machine_mode mode)
5482 {
5483 if (TARGET_AAPCS_BASED)
5484 return aapcs_libcall_value (mode);
5485 else if (TARGET_IWMMXT_ABI
5486 && arm_vector_mode_supported_p (mode))
5487 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5488 else
5489 return gen_rtx_REG (mode, ARG_REGISTER (1));
5490 }
5491
5492 /* Define how to find the value returned by a library function
5493 assuming the value has mode MODE. */
5494
5495 static rtx
5496 arm_libcall_value (machine_mode mode, const_rtx libcall)
5497 {
5498 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5499 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5500 {
5501 /* The following libcalls return their result in integer registers,
5502 even though they return a floating point value. */
5503 if (arm_libcall_uses_aapcs_base (libcall))
5504 return gen_rtx_REG (mode, ARG_REGISTER(1));
5505
5506 }
5507
5508 return arm_libcall_value_1 (mode);
5509 }
5510
5511 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5512
5513 static bool
5514 arm_function_value_regno_p (const unsigned int regno)
5515 {
5516 if (regno == ARG_REGISTER (1)
5517 || (TARGET_32BIT
5518 && TARGET_AAPCS_BASED
5519 && TARGET_HARD_FLOAT
5520 && regno == FIRST_VFP_REGNUM)
5521 || (TARGET_IWMMXT_ABI
5522 && regno == FIRST_IWMMXT_REGNUM))
5523 return true;
5524
5525 return false;
5526 }
5527
5528 /* Determine the amount of memory needed to store the possible return
5529 registers of an untyped call. */
5530 int
5531 arm_apply_result_size (void)
5532 {
5533 int size = 16;
5534
5535 if (TARGET_32BIT)
5536 {
5537 if (TARGET_HARD_FLOAT_ABI)
5538 size += 32;
5539 if (TARGET_IWMMXT_ABI)
5540 size += 8;
5541 }
5542
5543 return size;
5544 }
5545
5546 /* Decide whether TYPE should be returned in memory (true)
5547 or in a register (false). FNTYPE is the type of the function making
5548 the call. */
5549 static bool
5550 arm_return_in_memory (const_tree type, const_tree fntype)
5551 {
5552 HOST_WIDE_INT size;
5553
5554 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5555
5556 if (TARGET_AAPCS_BASED)
5557 {
5558 /* Simple, non-aggregate types (ie not including vectors and
5559 complex) are always returned in a register (or registers).
5560 We don't care about which register here, so we can short-cut
5561 some of the detail. */
5562 if (!AGGREGATE_TYPE_P (type)
5563 && TREE_CODE (type) != VECTOR_TYPE
5564 && TREE_CODE (type) != COMPLEX_TYPE)
5565 return false;
5566
5567 /* Any return value that is no larger than one word can be
5568 returned in r0. */
5569 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5570 return false;
5571
5572 /* Check any available co-processors to see if they accept the
5573 type as a register candidate (VFP, for example, can return
5574 some aggregates in consecutive registers). These aren't
5575 available if the call is variadic. */
5576 if (aapcs_select_return_coproc (type, fntype) >= 0)
5577 return false;
5578
5579 /* Vector values should be returned using ARM registers, not
5580 memory (unless they're over 16 bytes, which will break since
5581 we only have four call-clobbered registers to play with). */
5582 if (TREE_CODE (type) == VECTOR_TYPE)
5583 return (size < 0 || size > (4 * UNITS_PER_WORD));
5584
5585 /* The rest go in memory. */
5586 return true;
5587 }
5588
5589 if (TREE_CODE (type) == VECTOR_TYPE)
5590 return (size < 0 || size > (4 * UNITS_PER_WORD));
5591
5592 if (!AGGREGATE_TYPE_P (type) &&
5593 (TREE_CODE (type) != VECTOR_TYPE))
5594 /* All simple types are returned in registers. */
5595 return false;
5596
5597 if (arm_abi != ARM_ABI_APCS)
5598 {
5599 /* ATPCS and later return aggregate types in memory only if they are
5600 larger than a word (or are variable size). */
5601 return (size < 0 || size > UNITS_PER_WORD);
5602 }
5603
5604 /* For the arm-wince targets we choose to be compatible with Microsoft's
5605 ARM and Thumb compilers, which always return aggregates in memory. */
5606 #ifndef ARM_WINCE
5607 /* All structures/unions bigger than one word are returned in memory.
5608 Also catch the case where int_size_in_bytes returns -1. In this case
5609 the aggregate is either huge or of variable size, and in either case
5610 we will want to return it via memory and not in a register. */
5611 if (size < 0 || size > UNITS_PER_WORD)
5612 return true;
5613
5614 if (TREE_CODE (type) == RECORD_TYPE)
5615 {
5616 tree field;
5617
5618 /* For a struct the APCS says that we only return in a register
5619 if the type is 'integer like' and every addressable element
5620 has an offset of zero. For practical purposes this means
5621 that the structure can have at most one non bit-field element
5622 and that this element must be the first one in the structure. */
5623
5624 /* Find the first field, ignoring non FIELD_DECL things which will
5625 have been created by C++. */
5626 for (field = TYPE_FIELDS (type);
5627 field && TREE_CODE (field) != FIELD_DECL;
5628 field = DECL_CHAIN (field))
5629 continue;
5630
5631 if (field == NULL)
5632 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5633
5634 /* Check that the first field is valid for returning in a register. */
5635
5636 /* ... Floats are not allowed */
5637 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5638 return true;
5639
5640 /* ... Aggregates that are not themselves valid for returning in
5641 a register are not allowed. */
5642 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5643 return true;
5644
5645 /* Now check the remaining fields, if any. Only bitfields are allowed,
5646 since they are not addressable. */
5647 for (field = DECL_CHAIN (field);
5648 field;
5649 field = DECL_CHAIN (field))
5650 {
5651 if (TREE_CODE (field) != FIELD_DECL)
5652 continue;
5653
5654 if (!DECL_BIT_FIELD_TYPE (field))
5655 return true;
5656 }
5657
5658 return false;
5659 }
5660
5661 if (TREE_CODE (type) == UNION_TYPE)
5662 {
5663 tree field;
5664
5665 /* Unions can be returned in registers if every element is
5666 integral, or can be returned in an integer register. */
5667 for (field = TYPE_FIELDS (type);
5668 field;
5669 field = DECL_CHAIN (field))
5670 {
5671 if (TREE_CODE (field) != FIELD_DECL)
5672 continue;
5673
5674 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5675 return true;
5676
5677 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5678 return true;
5679 }
5680
5681 return false;
5682 }
5683 #endif /* not ARM_WINCE */
5684
5685 /* Return all other types in memory. */
5686 return true;
5687 }
5688
5689 const struct pcs_attribute_arg
5690 {
5691 const char *arg;
5692 enum arm_pcs value;
5693 } pcs_attribute_args[] =
5694 {
5695 {"aapcs", ARM_PCS_AAPCS},
5696 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5697 #if 0
5698 /* We could recognize these, but changes would be needed elsewhere
5699 * to implement them. */
5700 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5701 {"atpcs", ARM_PCS_ATPCS},
5702 {"apcs", ARM_PCS_APCS},
5703 #endif
5704 {NULL, ARM_PCS_UNKNOWN}
5705 };
5706
5707 static enum arm_pcs
5708 arm_pcs_from_attribute (tree attr)
5709 {
5710 const struct pcs_attribute_arg *ptr;
5711 const char *arg;
5712
5713 /* Get the value of the argument. */
5714 if (TREE_VALUE (attr) == NULL_TREE
5715 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5716 return ARM_PCS_UNKNOWN;
5717
5718 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5719
5720 /* Check it against the list of known arguments. */
5721 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5722 if (streq (arg, ptr->arg))
5723 return ptr->value;
5724
5725 /* An unrecognized interrupt type. */
5726 return ARM_PCS_UNKNOWN;
5727 }
5728
5729 /* Get the PCS variant to use for this call. TYPE is the function's type
5730 specification, DECL is the specific declartion. DECL may be null if
5731 the call could be indirect or if this is a library call. */
5732 static enum arm_pcs
5733 arm_get_pcs_model (const_tree type, const_tree decl)
5734 {
5735 bool user_convention = false;
5736 enum arm_pcs user_pcs = arm_pcs_default;
5737 tree attr;
5738
5739 gcc_assert (type);
5740
5741 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5742 if (attr)
5743 {
5744 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5745 user_convention = true;
5746 }
5747
5748 if (TARGET_AAPCS_BASED)
5749 {
5750 /* Detect varargs functions. These always use the base rules
5751 (no argument is ever a candidate for a co-processor
5752 register). */
5753 bool base_rules = stdarg_p (type);
5754
5755 if (user_convention)
5756 {
5757 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5758 sorry ("non-AAPCS derived PCS variant");
5759 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5760 error ("variadic functions must use the base AAPCS variant");
5761 }
5762
5763 if (base_rules)
5764 return ARM_PCS_AAPCS;
5765 else if (user_convention)
5766 return user_pcs;
5767 else if (decl && flag_unit_at_a_time)
5768 {
5769 /* Local functions never leak outside this compilation unit,
5770 so we are free to use whatever conventions are
5771 appropriate. */
5772 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5773 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5774 if (i && i->local)
5775 return ARM_PCS_AAPCS_LOCAL;
5776 }
5777 }
5778 else if (user_convention && user_pcs != arm_pcs_default)
5779 sorry ("PCS variant");
5780
5781 /* For everything else we use the target's default. */
5782 return arm_pcs_default;
5783 }
5784
5785
5786 static void
5787 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5788 const_tree fntype ATTRIBUTE_UNUSED,
5789 rtx libcall ATTRIBUTE_UNUSED,
5790 const_tree fndecl ATTRIBUTE_UNUSED)
5791 {
5792 /* Record the unallocated VFP registers. */
5793 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5794 pcum->aapcs_vfp_reg_alloc = 0;
5795 }
5796
5797 /* Walk down the type tree of TYPE counting consecutive base elements.
5798 If *MODEP is VOIDmode, then set it to the first valid floating point
5799 type. If a non-floating point type is found, or if a floating point
5800 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5801 otherwise return the count in the sub-tree. */
5802 static int
5803 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5804 {
5805 machine_mode mode;
5806 HOST_WIDE_INT size;
5807
5808 switch (TREE_CODE (type))
5809 {
5810 case REAL_TYPE:
5811 mode = TYPE_MODE (type);
5812 if (mode != DFmode && mode != SFmode && mode != HFmode)
5813 return -1;
5814
5815 if (*modep == VOIDmode)
5816 *modep = mode;
5817
5818 if (*modep == mode)
5819 return 1;
5820
5821 break;
5822
5823 case COMPLEX_TYPE:
5824 mode = TYPE_MODE (TREE_TYPE (type));
5825 if (mode != DFmode && mode != SFmode)
5826 return -1;
5827
5828 if (*modep == VOIDmode)
5829 *modep = mode;
5830
5831 if (*modep == mode)
5832 return 2;
5833
5834 break;
5835
5836 case VECTOR_TYPE:
5837 /* Use V2SImode and V4SImode as representatives of all 64-bit
5838 and 128-bit vector types, whether or not those modes are
5839 supported with the present options. */
5840 size = int_size_in_bytes (type);
5841 switch (size)
5842 {
5843 case 8:
5844 mode = V2SImode;
5845 break;
5846 case 16:
5847 mode = V4SImode;
5848 break;
5849 default:
5850 return -1;
5851 }
5852
5853 if (*modep == VOIDmode)
5854 *modep = mode;
5855
5856 /* Vector modes are considered to be opaque: two vectors are
5857 equivalent for the purposes of being homogeneous aggregates
5858 if they are the same size. */
5859 if (*modep == mode)
5860 return 1;
5861
5862 break;
5863
5864 case ARRAY_TYPE:
5865 {
5866 int count;
5867 tree index = TYPE_DOMAIN (type);
5868
5869 /* Can't handle incomplete types nor sizes that are not
5870 fixed. */
5871 if (!COMPLETE_TYPE_P (type)
5872 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5873 return -1;
5874
5875 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5876 if (count == -1
5877 || !index
5878 || !TYPE_MAX_VALUE (index)
5879 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5880 || !TYPE_MIN_VALUE (index)
5881 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5882 || count < 0)
5883 return -1;
5884
5885 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5886 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5887
5888 /* There must be no padding. */
5889 if (wi::to_wide (TYPE_SIZE (type))
5890 != count * GET_MODE_BITSIZE (*modep))
5891 return -1;
5892
5893 return count;
5894 }
5895
5896 case RECORD_TYPE:
5897 {
5898 int count = 0;
5899 int sub_count;
5900 tree field;
5901
5902 /* Can't handle incomplete types nor sizes that are not
5903 fixed. */
5904 if (!COMPLETE_TYPE_P (type)
5905 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5906 return -1;
5907
5908 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5909 {
5910 if (TREE_CODE (field) != FIELD_DECL)
5911 continue;
5912
5913 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5914 if (sub_count < 0)
5915 return -1;
5916 count += sub_count;
5917 }
5918
5919 /* There must be no padding. */
5920 if (wi::to_wide (TYPE_SIZE (type))
5921 != count * GET_MODE_BITSIZE (*modep))
5922 return -1;
5923
5924 return count;
5925 }
5926
5927 case UNION_TYPE:
5928 case QUAL_UNION_TYPE:
5929 {
5930 /* These aren't very interesting except in a degenerate case. */
5931 int count = 0;
5932 int sub_count;
5933 tree field;
5934
5935 /* Can't handle incomplete types nor sizes that are not
5936 fixed. */
5937 if (!COMPLETE_TYPE_P (type)
5938 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5939 return -1;
5940
5941 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5942 {
5943 if (TREE_CODE (field) != FIELD_DECL)
5944 continue;
5945
5946 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5947 if (sub_count < 0)
5948 return -1;
5949 count = count > sub_count ? count : sub_count;
5950 }
5951
5952 /* There must be no padding. */
5953 if (wi::to_wide (TYPE_SIZE (type))
5954 != count * GET_MODE_BITSIZE (*modep))
5955 return -1;
5956
5957 return count;
5958 }
5959
5960 default:
5961 break;
5962 }
5963
5964 return -1;
5965 }
5966
5967 /* Return true if PCS_VARIANT should use VFP registers. */
5968 static bool
5969 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5970 {
5971 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5972 {
5973 static bool seen_thumb1_vfp = false;
5974
5975 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5976 {
5977 sorry ("Thumb-1 hard-float VFP ABI");
5978 /* sorry() is not immediately fatal, so only display this once. */
5979 seen_thumb1_vfp = true;
5980 }
5981
5982 return true;
5983 }
5984
5985 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5986 return false;
5987
5988 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5989 (TARGET_VFP_DOUBLE || !is_double));
5990 }
5991
5992 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5993 suitable for passing or returning in VFP registers for the PCS
5994 variant selected. If it is, then *BASE_MODE is updated to contain
5995 a machine mode describing each element of the argument's type and
5996 *COUNT to hold the number of such elements. */
5997 static bool
5998 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5999 machine_mode mode, const_tree type,
6000 machine_mode *base_mode, int *count)
6001 {
6002 machine_mode new_mode = VOIDmode;
6003
6004 /* If we have the type information, prefer that to working things
6005 out from the mode. */
6006 if (type)
6007 {
6008 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6009
6010 if (ag_count > 0 && ag_count <= 4)
6011 *count = ag_count;
6012 else
6013 return false;
6014 }
6015 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6016 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6017 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6018 {
6019 *count = 1;
6020 new_mode = mode;
6021 }
6022 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6023 {
6024 *count = 2;
6025 new_mode = (mode == DCmode ? DFmode : SFmode);
6026 }
6027 else
6028 return false;
6029
6030
6031 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6032 return false;
6033
6034 *base_mode = new_mode;
6035 return true;
6036 }
6037
6038 static bool
6039 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6040 machine_mode mode, const_tree type)
6041 {
6042 int count ATTRIBUTE_UNUSED;
6043 machine_mode ag_mode ATTRIBUTE_UNUSED;
6044
6045 if (!use_vfp_abi (pcs_variant, false))
6046 return false;
6047 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6048 &ag_mode, &count);
6049 }
6050
6051 static bool
6052 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6053 const_tree type)
6054 {
6055 if (!use_vfp_abi (pcum->pcs_variant, false))
6056 return false;
6057
6058 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6059 &pcum->aapcs_vfp_rmode,
6060 &pcum->aapcs_vfp_rcount);
6061 }
6062
6063 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6064 for the behaviour of this function. */
6065
6066 static bool
6067 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6068 const_tree type ATTRIBUTE_UNUSED)
6069 {
6070 int rmode_size
6071 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6072 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6073 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6074 int regno;
6075
6076 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6077 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6078 {
6079 pcum->aapcs_vfp_reg_alloc = mask << regno;
6080 if (mode == BLKmode
6081 || (mode == TImode && ! TARGET_NEON)
6082 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6083 {
6084 int i;
6085 int rcount = pcum->aapcs_vfp_rcount;
6086 int rshift = shift;
6087 machine_mode rmode = pcum->aapcs_vfp_rmode;
6088 rtx par;
6089 if (!TARGET_NEON)
6090 {
6091 /* Avoid using unsupported vector modes. */
6092 if (rmode == V2SImode)
6093 rmode = DImode;
6094 else if (rmode == V4SImode)
6095 {
6096 rmode = DImode;
6097 rcount *= 2;
6098 rshift /= 2;
6099 }
6100 }
6101 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6102 for (i = 0; i < rcount; i++)
6103 {
6104 rtx tmp = gen_rtx_REG (rmode,
6105 FIRST_VFP_REGNUM + regno + i * rshift);
6106 tmp = gen_rtx_EXPR_LIST
6107 (VOIDmode, tmp,
6108 GEN_INT (i * GET_MODE_SIZE (rmode)));
6109 XVECEXP (par, 0, i) = tmp;
6110 }
6111
6112 pcum->aapcs_reg = par;
6113 }
6114 else
6115 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6116 return true;
6117 }
6118 return false;
6119 }
6120
6121 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6122 comment there for the behaviour of this function. */
6123
6124 static rtx
6125 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6126 machine_mode mode,
6127 const_tree type ATTRIBUTE_UNUSED)
6128 {
6129 if (!use_vfp_abi (pcs_variant, false))
6130 return NULL;
6131
6132 if (mode == BLKmode
6133 || (GET_MODE_CLASS (mode) == MODE_INT
6134 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6135 && !TARGET_NEON))
6136 {
6137 int count;
6138 machine_mode ag_mode;
6139 int i;
6140 rtx par;
6141 int shift;
6142
6143 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6144 &ag_mode, &count);
6145
6146 if (!TARGET_NEON)
6147 {
6148 if (ag_mode == V2SImode)
6149 ag_mode = DImode;
6150 else if (ag_mode == V4SImode)
6151 {
6152 ag_mode = DImode;
6153 count *= 2;
6154 }
6155 }
6156 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6157 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6158 for (i = 0; i < count; i++)
6159 {
6160 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6161 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6162 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6163 XVECEXP (par, 0, i) = tmp;
6164 }
6165
6166 return par;
6167 }
6168
6169 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6170 }
6171
6172 static void
6173 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6174 machine_mode mode ATTRIBUTE_UNUSED,
6175 const_tree type ATTRIBUTE_UNUSED)
6176 {
6177 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6178 pcum->aapcs_vfp_reg_alloc = 0;
6179 return;
6180 }
6181
6182 #define AAPCS_CP(X) \
6183 { \
6184 aapcs_ ## X ## _cum_init, \
6185 aapcs_ ## X ## _is_call_candidate, \
6186 aapcs_ ## X ## _allocate, \
6187 aapcs_ ## X ## _is_return_candidate, \
6188 aapcs_ ## X ## _allocate_return_reg, \
6189 aapcs_ ## X ## _advance \
6190 }
6191
6192 /* Table of co-processors that can be used to pass arguments in
6193 registers. Idealy no arugment should be a candidate for more than
6194 one co-processor table entry, but the table is processed in order
6195 and stops after the first match. If that entry then fails to put
6196 the argument into a co-processor register, the argument will go on
6197 the stack. */
6198 static struct
6199 {
6200 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6201 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6202
6203 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6204 BLKmode) is a candidate for this co-processor's registers; this
6205 function should ignore any position-dependent state in
6206 CUMULATIVE_ARGS and only use call-type dependent information. */
6207 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6208
6209 /* Return true if the argument does get a co-processor register; it
6210 should set aapcs_reg to an RTX of the register allocated as is
6211 required for a return from FUNCTION_ARG. */
6212 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6213
6214 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6215 be returned in this co-processor's registers. */
6216 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6217
6218 /* Allocate and return an RTX element to hold the return type of a call. This
6219 routine must not fail and will only be called if is_return_candidate
6220 returned true with the same parameters. */
6221 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6222
6223 /* Finish processing this argument and prepare to start processing
6224 the next one. */
6225 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6226 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6227 {
6228 AAPCS_CP(vfp)
6229 };
6230
6231 #undef AAPCS_CP
6232
6233 static int
6234 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6235 const_tree type)
6236 {
6237 int i;
6238
6239 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6240 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6241 return i;
6242
6243 return -1;
6244 }
6245
6246 static int
6247 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6248 {
6249 /* We aren't passed a decl, so we can't check that a call is local.
6250 However, it isn't clear that that would be a win anyway, since it
6251 might limit some tail-calling opportunities. */
6252 enum arm_pcs pcs_variant;
6253
6254 if (fntype)
6255 {
6256 const_tree fndecl = NULL_TREE;
6257
6258 if (TREE_CODE (fntype) == FUNCTION_DECL)
6259 {
6260 fndecl = fntype;
6261 fntype = TREE_TYPE (fntype);
6262 }
6263
6264 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6265 }
6266 else
6267 pcs_variant = arm_pcs_default;
6268
6269 if (pcs_variant != ARM_PCS_AAPCS)
6270 {
6271 int i;
6272
6273 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6274 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6275 TYPE_MODE (type),
6276 type))
6277 return i;
6278 }
6279 return -1;
6280 }
6281
6282 static rtx
6283 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6284 const_tree fntype)
6285 {
6286 /* We aren't passed a decl, so we can't check that a call is local.
6287 However, it isn't clear that that would be a win anyway, since it
6288 might limit some tail-calling opportunities. */
6289 enum arm_pcs pcs_variant;
6290 int unsignedp ATTRIBUTE_UNUSED;
6291
6292 if (fntype)
6293 {
6294 const_tree fndecl = NULL_TREE;
6295
6296 if (TREE_CODE (fntype) == FUNCTION_DECL)
6297 {
6298 fndecl = fntype;
6299 fntype = TREE_TYPE (fntype);
6300 }
6301
6302 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6303 }
6304 else
6305 pcs_variant = arm_pcs_default;
6306
6307 /* Promote integer types. */
6308 if (type && INTEGRAL_TYPE_P (type))
6309 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6310
6311 if (pcs_variant != ARM_PCS_AAPCS)
6312 {
6313 int i;
6314
6315 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6316 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6317 type))
6318 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6319 mode, type);
6320 }
6321
6322 /* Promotes small structs returned in a register to full-word size
6323 for big-endian AAPCS. */
6324 if (type && arm_return_in_msb (type))
6325 {
6326 HOST_WIDE_INT size = int_size_in_bytes (type);
6327 if (size % UNITS_PER_WORD != 0)
6328 {
6329 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6330 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6331 }
6332 }
6333
6334 return gen_rtx_REG (mode, R0_REGNUM);
6335 }
6336
6337 static rtx
6338 aapcs_libcall_value (machine_mode mode)
6339 {
6340 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6341 && GET_MODE_SIZE (mode) <= 4)
6342 mode = SImode;
6343
6344 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6345 }
6346
6347 /* Lay out a function argument using the AAPCS rules. The rule
6348 numbers referred to here are those in the AAPCS. */
6349 static void
6350 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6351 const_tree type, bool named)
6352 {
6353 int nregs, nregs2;
6354 int ncrn;
6355
6356 /* We only need to do this once per argument. */
6357 if (pcum->aapcs_arg_processed)
6358 return;
6359
6360 pcum->aapcs_arg_processed = true;
6361
6362 /* Special case: if named is false then we are handling an incoming
6363 anonymous argument which is on the stack. */
6364 if (!named)
6365 return;
6366
6367 /* Is this a potential co-processor register candidate? */
6368 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6369 {
6370 int slot = aapcs_select_call_coproc (pcum, mode, type);
6371 pcum->aapcs_cprc_slot = slot;
6372
6373 /* We don't have to apply any of the rules from part B of the
6374 preparation phase, these are handled elsewhere in the
6375 compiler. */
6376
6377 if (slot >= 0)
6378 {
6379 /* A Co-processor register candidate goes either in its own
6380 class of registers or on the stack. */
6381 if (!pcum->aapcs_cprc_failed[slot])
6382 {
6383 /* C1.cp - Try to allocate the argument to co-processor
6384 registers. */
6385 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6386 return;
6387
6388 /* C2.cp - Put the argument on the stack and note that we
6389 can't assign any more candidates in this slot. We also
6390 need to note that we have allocated stack space, so that
6391 we won't later try to split a non-cprc candidate between
6392 core registers and the stack. */
6393 pcum->aapcs_cprc_failed[slot] = true;
6394 pcum->can_split = false;
6395 }
6396
6397 /* We didn't get a register, so this argument goes on the
6398 stack. */
6399 gcc_assert (pcum->can_split == false);
6400 return;
6401 }
6402 }
6403
6404 /* C3 - For double-word aligned arguments, round the NCRN up to the
6405 next even number. */
6406 ncrn = pcum->aapcs_ncrn;
6407 if (ncrn & 1)
6408 {
6409 int res = arm_needs_doubleword_align (mode, type);
6410 /* Only warn during RTL expansion of call stmts, otherwise we would
6411 warn e.g. during gimplification even on functions that will be
6412 always inlined, and we'd warn multiple times. Don't warn when
6413 called in expand_function_start either, as we warn instead in
6414 arm_function_arg_boundary in that case. */
6415 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6416 inform (input_location, "parameter passing for argument of type "
6417 "%qT changed in GCC 7.1", type);
6418 else if (res > 0)
6419 ncrn++;
6420 }
6421
6422 nregs = ARM_NUM_REGS2(mode, type);
6423
6424 /* Sigh, this test should really assert that nregs > 0, but a GCC
6425 extension allows empty structs and then gives them empty size; it
6426 then allows such a structure to be passed by value. For some of
6427 the code below we have to pretend that such an argument has
6428 non-zero size so that we 'locate' it correctly either in
6429 registers or on the stack. */
6430 gcc_assert (nregs >= 0);
6431
6432 nregs2 = nregs ? nregs : 1;
6433
6434 /* C4 - Argument fits entirely in core registers. */
6435 if (ncrn + nregs2 <= NUM_ARG_REGS)
6436 {
6437 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6438 pcum->aapcs_next_ncrn = ncrn + nregs;
6439 return;
6440 }
6441
6442 /* C5 - Some core registers left and there are no arguments already
6443 on the stack: split this argument between the remaining core
6444 registers and the stack. */
6445 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6446 {
6447 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6448 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6449 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6450 return;
6451 }
6452
6453 /* C6 - NCRN is set to 4. */
6454 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6455
6456 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6457 return;
6458 }
6459
6460 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6461 for a call to a function whose data type is FNTYPE.
6462 For a library call, FNTYPE is NULL. */
6463 void
6464 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6465 rtx libname,
6466 tree fndecl ATTRIBUTE_UNUSED)
6467 {
6468 /* Long call handling. */
6469 if (fntype)
6470 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6471 else
6472 pcum->pcs_variant = arm_pcs_default;
6473
6474 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6475 {
6476 if (arm_libcall_uses_aapcs_base (libname))
6477 pcum->pcs_variant = ARM_PCS_AAPCS;
6478
6479 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6480 pcum->aapcs_reg = NULL_RTX;
6481 pcum->aapcs_partial = 0;
6482 pcum->aapcs_arg_processed = false;
6483 pcum->aapcs_cprc_slot = -1;
6484 pcum->can_split = true;
6485
6486 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6487 {
6488 int i;
6489
6490 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6491 {
6492 pcum->aapcs_cprc_failed[i] = false;
6493 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6494 }
6495 }
6496 return;
6497 }
6498
6499 /* Legacy ABIs */
6500
6501 /* On the ARM, the offset starts at 0. */
6502 pcum->nregs = 0;
6503 pcum->iwmmxt_nregs = 0;
6504 pcum->can_split = true;
6505
6506 /* Varargs vectors are treated the same as long long.
6507 named_count avoids having to change the way arm handles 'named' */
6508 pcum->named_count = 0;
6509 pcum->nargs = 0;
6510
6511 if (TARGET_REALLY_IWMMXT && fntype)
6512 {
6513 tree fn_arg;
6514
6515 for (fn_arg = TYPE_ARG_TYPES (fntype);
6516 fn_arg;
6517 fn_arg = TREE_CHAIN (fn_arg))
6518 pcum->named_count += 1;
6519
6520 if (! pcum->named_count)
6521 pcum->named_count = INT_MAX;
6522 }
6523 }
6524
6525 /* Return 1 if double word alignment is required for argument passing.
6526 Return -1 if double word alignment used to be required for argument
6527 passing before PR77728 ABI fix, but is not required anymore.
6528 Return 0 if double word alignment is not required and wasn't requried
6529 before either. */
6530 static int
6531 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6532 {
6533 if (!type)
6534 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6535
6536 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6537 if (!AGGREGATE_TYPE_P (type))
6538 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6539
6540 /* Array types: Use member alignment of element type. */
6541 if (TREE_CODE (type) == ARRAY_TYPE)
6542 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6543
6544 int ret = 0;
6545 /* Record/aggregate types: Use greatest member alignment of any member. */
6546 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6547 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6548 {
6549 if (TREE_CODE (field) == FIELD_DECL)
6550 return 1;
6551 else
6552 /* Before PR77728 fix, we were incorrectly considering also
6553 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6554 Make sure we can warn about that with -Wpsabi. */
6555 ret = -1;
6556 }
6557
6558 return ret;
6559 }
6560
6561
6562 /* Determine where to put an argument to a function.
6563 Value is zero to push the argument on the stack,
6564 or a hard register in which to store the argument.
6565
6566 MODE is the argument's machine mode.
6567 TYPE is the data type of the argument (as a tree).
6568 This is null for libcalls where that information may
6569 not be available.
6570 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6571 the preceding args and about the function being called.
6572 NAMED is nonzero if this argument is a named parameter
6573 (otherwise it is an extra parameter matching an ellipsis).
6574
6575 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6576 other arguments are passed on the stack. If (NAMED == 0) (which happens
6577 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6578 defined), say it is passed in the stack (function_prologue will
6579 indeed make it pass in the stack if necessary). */
6580
6581 static rtx
6582 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6583 const_tree type, bool named)
6584 {
6585 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6586 int nregs;
6587
6588 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6589 a call insn (op3 of a call_value insn). */
6590 if (mode == VOIDmode)
6591 return const0_rtx;
6592
6593 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6594 {
6595 aapcs_layout_arg (pcum, mode, type, named);
6596 return pcum->aapcs_reg;
6597 }
6598
6599 /* Varargs vectors are treated the same as long long.
6600 named_count avoids having to change the way arm handles 'named' */
6601 if (TARGET_IWMMXT_ABI
6602 && arm_vector_mode_supported_p (mode)
6603 && pcum->named_count > pcum->nargs + 1)
6604 {
6605 if (pcum->iwmmxt_nregs <= 9)
6606 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6607 else
6608 {
6609 pcum->can_split = false;
6610 return NULL_RTX;
6611 }
6612 }
6613
6614 /* Put doubleword aligned quantities in even register pairs. */
6615 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6616 {
6617 int res = arm_needs_doubleword_align (mode, type);
6618 if (res < 0 && warn_psabi)
6619 inform (input_location, "parameter passing for argument of type "
6620 "%qT changed in GCC 7.1", type);
6621 else if (res > 0)
6622 pcum->nregs++;
6623 }
6624
6625 /* Only allow splitting an arg between regs and memory if all preceding
6626 args were allocated to regs. For args passed by reference we only count
6627 the reference pointer. */
6628 if (pcum->can_split)
6629 nregs = 1;
6630 else
6631 nregs = ARM_NUM_REGS2 (mode, type);
6632
6633 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6634 return NULL_RTX;
6635
6636 return gen_rtx_REG (mode, pcum->nregs);
6637 }
6638
6639 static unsigned int
6640 arm_function_arg_boundary (machine_mode mode, const_tree type)
6641 {
6642 if (!ARM_DOUBLEWORD_ALIGN)
6643 return PARM_BOUNDARY;
6644
6645 int res = arm_needs_doubleword_align (mode, type);
6646 if (res < 0 && warn_psabi)
6647 inform (input_location, "parameter passing for argument of type %qT "
6648 "changed in GCC 7.1", type);
6649
6650 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6651 }
6652
6653 static int
6654 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6655 tree type, bool named)
6656 {
6657 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6658 int nregs = pcum->nregs;
6659
6660 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6661 {
6662 aapcs_layout_arg (pcum, mode, type, named);
6663 return pcum->aapcs_partial;
6664 }
6665
6666 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6667 return 0;
6668
6669 if (NUM_ARG_REGS > nregs
6670 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6671 && pcum->can_split)
6672 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6673
6674 return 0;
6675 }
6676
6677 /* Update the data in PCUM to advance over an argument
6678 of mode MODE and data type TYPE.
6679 (TYPE is null for libcalls where that information may not be available.) */
6680
6681 static void
6682 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6683 const_tree type, bool named)
6684 {
6685 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6686
6687 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6688 {
6689 aapcs_layout_arg (pcum, mode, type, named);
6690
6691 if (pcum->aapcs_cprc_slot >= 0)
6692 {
6693 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6694 type);
6695 pcum->aapcs_cprc_slot = -1;
6696 }
6697
6698 /* Generic stuff. */
6699 pcum->aapcs_arg_processed = false;
6700 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6701 pcum->aapcs_reg = NULL_RTX;
6702 pcum->aapcs_partial = 0;
6703 }
6704 else
6705 {
6706 pcum->nargs += 1;
6707 if (arm_vector_mode_supported_p (mode)
6708 && pcum->named_count > pcum->nargs
6709 && TARGET_IWMMXT_ABI)
6710 pcum->iwmmxt_nregs += 1;
6711 else
6712 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6713 }
6714 }
6715
6716 /* Variable sized types are passed by reference. This is a GCC
6717 extension to the ARM ABI. */
6718
6719 static bool
6720 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6721 machine_mode mode ATTRIBUTE_UNUSED,
6722 const_tree type, bool named ATTRIBUTE_UNUSED)
6723 {
6724 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6725 }
6726 \f
6727 /* Encode the current state of the #pragma [no_]long_calls. */
6728 typedef enum
6729 {
6730 OFF, /* No #pragma [no_]long_calls is in effect. */
6731 LONG, /* #pragma long_calls is in effect. */
6732 SHORT /* #pragma no_long_calls is in effect. */
6733 } arm_pragma_enum;
6734
6735 static arm_pragma_enum arm_pragma_long_calls = OFF;
6736
6737 void
6738 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6739 {
6740 arm_pragma_long_calls = LONG;
6741 }
6742
6743 void
6744 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6745 {
6746 arm_pragma_long_calls = SHORT;
6747 }
6748
6749 void
6750 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6751 {
6752 arm_pragma_long_calls = OFF;
6753 }
6754 \f
6755 /* Handle an attribute requiring a FUNCTION_DECL;
6756 arguments as in struct attribute_spec.handler. */
6757 static tree
6758 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6759 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6760 {
6761 if (TREE_CODE (*node) != FUNCTION_DECL)
6762 {
6763 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6764 name);
6765 *no_add_attrs = true;
6766 }
6767
6768 return NULL_TREE;
6769 }
6770
6771 /* Handle an "interrupt" or "isr" attribute;
6772 arguments as in struct attribute_spec.handler. */
6773 static tree
6774 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6775 bool *no_add_attrs)
6776 {
6777 if (DECL_P (*node))
6778 {
6779 if (TREE_CODE (*node) != FUNCTION_DECL)
6780 {
6781 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6782 name);
6783 *no_add_attrs = true;
6784 }
6785 /* FIXME: the argument if any is checked for type attributes;
6786 should it be checked for decl ones? */
6787 }
6788 else
6789 {
6790 if (TREE_CODE (*node) == FUNCTION_TYPE
6791 || TREE_CODE (*node) == METHOD_TYPE)
6792 {
6793 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6794 {
6795 warning (OPT_Wattributes, "%qE attribute ignored",
6796 name);
6797 *no_add_attrs = true;
6798 }
6799 }
6800 else if (TREE_CODE (*node) == POINTER_TYPE
6801 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6802 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6803 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6804 {
6805 *node = build_variant_type_copy (*node);
6806 TREE_TYPE (*node) = build_type_attribute_variant
6807 (TREE_TYPE (*node),
6808 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6809 *no_add_attrs = true;
6810 }
6811 else
6812 {
6813 /* Possibly pass this attribute on from the type to a decl. */
6814 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6815 | (int) ATTR_FLAG_FUNCTION_NEXT
6816 | (int) ATTR_FLAG_ARRAY_NEXT))
6817 {
6818 *no_add_attrs = true;
6819 return tree_cons (name, args, NULL_TREE);
6820 }
6821 else
6822 {
6823 warning (OPT_Wattributes, "%qE attribute ignored",
6824 name);
6825 }
6826 }
6827 }
6828
6829 return NULL_TREE;
6830 }
6831
6832 /* Handle a "pcs" attribute; arguments as in struct
6833 attribute_spec.handler. */
6834 static tree
6835 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6836 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6837 {
6838 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6839 {
6840 warning (OPT_Wattributes, "%qE attribute ignored", name);
6841 *no_add_attrs = true;
6842 }
6843 return NULL_TREE;
6844 }
6845
6846 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6847 /* Handle the "notshared" attribute. This attribute is another way of
6848 requesting hidden visibility. ARM's compiler supports
6849 "__declspec(notshared)"; we support the same thing via an
6850 attribute. */
6851
6852 static tree
6853 arm_handle_notshared_attribute (tree *node,
6854 tree name ATTRIBUTE_UNUSED,
6855 tree args ATTRIBUTE_UNUSED,
6856 int flags ATTRIBUTE_UNUSED,
6857 bool *no_add_attrs)
6858 {
6859 tree decl = TYPE_NAME (*node);
6860
6861 if (decl)
6862 {
6863 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6864 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6865 *no_add_attrs = false;
6866 }
6867 return NULL_TREE;
6868 }
6869 #endif
6870
6871 /* This function returns true if a function with declaration FNDECL and type
6872 FNTYPE uses the stack to pass arguments or return variables and false
6873 otherwise. This is used for functions with the attributes
6874 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6875 diagnostic messages if the stack is used. NAME is the name of the attribute
6876 used. */
6877
6878 static bool
6879 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6880 {
6881 function_args_iterator args_iter;
6882 CUMULATIVE_ARGS args_so_far_v;
6883 cumulative_args_t args_so_far;
6884 bool first_param = true;
6885 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6886
6887 /* Error out if any argument is passed on the stack. */
6888 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6889 args_so_far = pack_cumulative_args (&args_so_far_v);
6890 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6891 {
6892 rtx arg_rtx;
6893 machine_mode arg_mode = TYPE_MODE (arg_type);
6894
6895 prev_arg_type = arg_type;
6896 if (VOID_TYPE_P (arg_type))
6897 continue;
6898
6899 if (!first_param)
6900 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6901 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6902 if (!arg_rtx
6903 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6904 {
6905 error ("%qE attribute not available to functions with arguments "
6906 "passed on the stack", name);
6907 return true;
6908 }
6909 first_param = false;
6910 }
6911
6912 /* Error out for variadic functions since we cannot control how many
6913 arguments will be passed and thus stack could be used. stdarg_p () is not
6914 used for the checking to avoid browsing arguments twice. */
6915 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6916 {
6917 error ("%qE attribute not available to functions with variable number "
6918 "of arguments", name);
6919 return true;
6920 }
6921
6922 /* Error out if return value is passed on the stack. */
6923 ret_type = TREE_TYPE (fntype);
6924 if (arm_return_in_memory (ret_type, fntype))
6925 {
6926 error ("%qE attribute not available to functions that return value on "
6927 "the stack", name);
6928 return true;
6929 }
6930 return false;
6931 }
6932
6933 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6934 function will check whether the attribute is allowed here and will add the
6935 attribute to the function declaration tree or otherwise issue a warning. */
6936
6937 static tree
6938 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6939 tree /* args */,
6940 int /* flags */,
6941 bool *no_add_attrs)
6942 {
6943 tree fndecl;
6944
6945 if (!use_cmse)
6946 {
6947 *no_add_attrs = true;
6948 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6949 name);
6950 return NULL_TREE;
6951 }
6952
6953 /* Ignore attribute for function types. */
6954 if (TREE_CODE (*node) != FUNCTION_DECL)
6955 {
6956 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6957 name);
6958 *no_add_attrs = true;
6959 return NULL_TREE;
6960 }
6961
6962 fndecl = *node;
6963
6964 /* Warn for static linkage functions. */
6965 if (!TREE_PUBLIC (fndecl))
6966 {
6967 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6968 "with static linkage", name);
6969 *no_add_attrs = true;
6970 return NULL_TREE;
6971 }
6972
6973 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6974 TREE_TYPE (fndecl));
6975 return NULL_TREE;
6976 }
6977
6978
6979 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6980 function will check whether the attribute is allowed here and will add the
6981 attribute to the function type tree or otherwise issue a diagnostic. The
6982 reason we check this at declaration time is to only allow the use of the
6983 attribute with declarations of function pointers and not function
6984 declarations. This function checks NODE is of the expected type and issues
6985 diagnostics otherwise using NAME. If it is not of the expected type
6986 *NO_ADD_ATTRS will be set to true. */
6987
6988 static tree
6989 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6990 tree /* args */,
6991 int /* flags */,
6992 bool *no_add_attrs)
6993 {
6994 tree decl = NULL_TREE, fntype = NULL_TREE;
6995 tree type;
6996
6997 if (!use_cmse)
6998 {
6999 *no_add_attrs = true;
7000 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7001 name);
7002 return NULL_TREE;
7003 }
7004
7005 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7006 {
7007 decl = *node;
7008 fntype = TREE_TYPE (decl);
7009 }
7010
7011 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7012 fntype = TREE_TYPE (fntype);
7013
7014 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7015 {
7016 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7017 "function pointer", name);
7018 *no_add_attrs = true;
7019 return NULL_TREE;
7020 }
7021
7022 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7023
7024 if (*no_add_attrs)
7025 return NULL_TREE;
7026
7027 /* Prevent trees being shared among function types with and without
7028 cmse_nonsecure_call attribute. */
7029 type = TREE_TYPE (decl);
7030
7031 type = build_distinct_type_copy (type);
7032 TREE_TYPE (decl) = type;
7033 fntype = type;
7034
7035 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7036 {
7037 type = fntype;
7038 fntype = TREE_TYPE (fntype);
7039 fntype = build_distinct_type_copy (fntype);
7040 TREE_TYPE (type) = fntype;
7041 }
7042
7043 /* Construct a type attribute and add it to the function type. */
7044 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7045 TYPE_ATTRIBUTES (fntype));
7046 TYPE_ATTRIBUTES (fntype) = attrs;
7047 return NULL_TREE;
7048 }
7049
7050 /* Return 0 if the attributes for two types are incompatible, 1 if they
7051 are compatible, and 2 if they are nearly compatible (which causes a
7052 warning to be generated). */
7053 static int
7054 arm_comp_type_attributes (const_tree type1, const_tree type2)
7055 {
7056 int l1, l2, s1, s2;
7057
7058 /* Check for mismatch of non-default calling convention. */
7059 if (TREE_CODE (type1) != FUNCTION_TYPE)
7060 return 1;
7061
7062 /* Check for mismatched call attributes. */
7063 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7064 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7065 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7066 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7067
7068 /* Only bother to check if an attribute is defined. */
7069 if (l1 | l2 | s1 | s2)
7070 {
7071 /* If one type has an attribute, the other must have the same attribute. */
7072 if ((l1 != l2) || (s1 != s2))
7073 return 0;
7074
7075 /* Disallow mixed attributes. */
7076 if ((l1 & s2) || (l2 & s1))
7077 return 0;
7078 }
7079
7080 /* Check for mismatched ISR attribute. */
7081 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7082 if (! l1)
7083 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7084 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7085 if (! l2)
7086 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7087 if (l1 != l2)
7088 return 0;
7089
7090 l1 = lookup_attribute ("cmse_nonsecure_call",
7091 TYPE_ATTRIBUTES (type1)) != NULL;
7092 l2 = lookup_attribute ("cmse_nonsecure_call",
7093 TYPE_ATTRIBUTES (type2)) != NULL;
7094
7095 if (l1 != l2)
7096 return 0;
7097
7098 return 1;
7099 }
7100
7101 /* Assigns default attributes to newly defined type. This is used to
7102 set short_call/long_call attributes for function types of
7103 functions defined inside corresponding #pragma scopes. */
7104 static void
7105 arm_set_default_type_attributes (tree type)
7106 {
7107 /* Add __attribute__ ((long_call)) to all functions, when
7108 inside #pragma long_calls or __attribute__ ((short_call)),
7109 when inside #pragma no_long_calls. */
7110 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7111 {
7112 tree type_attr_list, attr_name;
7113 type_attr_list = TYPE_ATTRIBUTES (type);
7114
7115 if (arm_pragma_long_calls == LONG)
7116 attr_name = get_identifier ("long_call");
7117 else if (arm_pragma_long_calls == SHORT)
7118 attr_name = get_identifier ("short_call");
7119 else
7120 return;
7121
7122 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7123 TYPE_ATTRIBUTES (type) = type_attr_list;
7124 }
7125 }
7126 \f
7127 /* Return true if DECL is known to be linked into section SECTION. */
7128
7129 static bool
7130 arm_function_in_section_p (tree decl, section *section)
7131 {
7132 /* We can only be certain about the prevailing symbol definition. */
7133 if (!decl_binds_to_current_def_p (decl))
7134 return false;
7135
7136 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7137 if (!DECL_SECTION_NAME (decl))
7138 {
7139 /* Make sure that we will not create a unique section for DECL. */
7140 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7141 return false;
7142 }
7143
7144 return function_section (decl) == section;
7145 }
7146
7147 /* Return nonzero if a 32-bit "long_call" should be generated for
7148 a call from the current function to DECL. We generate a long_call
7149 if the function:
7150
7151 a. has an __attribute__((long call))
7152 or b. is within the scope of a #pragma long_calls
7153 or c. the -mlong-calls command line switch has been specified
7154
7155 However we do not generate a long call if the function:
7156
7157 d. has an __attribute__ ((short_call))
7158 or e. is inside the scope of a #pragma no_long_calls
7159 or f. is defined in the same section as the current function. */
7160
7161 bool
7162 arm_is_long_call_p (tree decl)
7163 {
7164 tree attrs;
7165
7166 if (!decl)
7167 return TARGET_LONG_CALLS;
7168
7169 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7170 if (lookup_attribute ("short_call", attrs))
7171 return false;
7172
7173 /* For "f", be conservative, and only cater for cases in which the
7174 whole of the current function is placed in the same section. */
7175 if (!flag_reorder_blocks_and_partition
7176 && TREE_CODE (decl) == FUNCTION_DECL
7177 && arm_function_in_section_p (decl, current_function_section ()))
7178 return false;
7179
7180 if (lookup_attribute ("long_call", attrs))
7181 return true;
7182
7183 return TARGET_LONG_CALLS;
7184 }
7185
7186 /* Return nonzero if it is ok to make a tail-call to DECL. */
7187 static bool
7188 arm_function_ok_for_sibcall (tree decl, tree exp)
7189 {
7190 unsigned long func_type;
7191
7192 if (cfun->machine->sibcall_blocked)
7193 return false;
7194
7195 /* Never tailcall something if we are generating code for Thumb-1. */
7196 if (TARGET_THUMB1)
7197 return false;
7198
7199 /* The PIC register is live on entry to VxWorks PLT entries, so we
7200 must make the call before restoring the PIC register. */
7201 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7202 return false;
7203
7204 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7205 may be used both as target of the call and base register for restoring
7206 the VFP registers */
7207 if (TARGET_APCS_FRAME && TARGET_ARM
7208 && TARGET_HARD_FLOAT
7209 && decl && arm_is_long_call_p (decl))
7210 return false;
7211
7212 /* If we are interworking and the function is not declared static
7213 then we can't tail-call it unless we know that it exists in this
7214 compilation unit (since it might be a Thumb routine). */
7215 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7216 && !TREE_ASM_WRITTEN (decl))
7217 return false;
7218
7219 func_type = arm_current_func_type ();
7220 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7221 if (IS_INTERRUPT (func_type))
7222 return false;
7223
7224 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7225 generated for entry functions themselves. */
7226 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7227 return false;
7228
7229 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7230 this would complicate matters for later code generation. */
7231 if (TREE_CODE (exp) == CALL_EXPR)
7232 {
7233 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7234 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7235 return false;
7236 }
7237
7238 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7239 {
7240 /* Check that the return value locations are the same. For
7241 example that we aren't returning a value from the sibling in
7242 a VFP register but then need to transfer it to a core
7243 register. */
7244 rtx a, b;
7245 tree decl_or_type = decl;
7246
7247 /* If it is an indirect function pointer, get the function type. */
7248 if (!decl)
7249 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7250
7251 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7252 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7253 cfun->decl, false);
7254 if (!rtx_equal_p (a, b))
7255 return false;
7256 }
7257
7258 /* Never tailcall if function may be called with a misaligned SP. */
7259 if (IS_STACKALIGN (func_type))
7260 return false;
7261
7262 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7263 references should become a NOP. Don't convert such calls into
7264 sibling calls. */
7265 if (TARGET_AAPCS_BASED
7266 && arm_abi == ARM_ABI_AAPCS
7267 && decl
7268 && DECL_WEAK (decl))
7269 return false;
7270
7271 /* We cannot do a tailcall for an indirect call by descriptor if all the
7272 argument registers are used because the only register left to load the
7273 address is IP and it will already contain the static chain. */
7274 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7275 {
7276 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7277 CUMULATIVE_ARGS cum;
7278 cumulative_args_t cum_v;
7279
7280 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7281 cum_v = pack_cumulative_args (&cum);
7282
7283 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7284 {
7285 tree type = TREE_VALUE (t);
7286 if (!VOID_TYPE_P (type))
7287 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7288 }
7289
7290 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7291 return false;
7292 }
7293
7294 /* Everything else is ok. */
7295 return true;
7296 }
7297
7298 \f
7299 /* Addressing mode support functions. */
7300
7301 /* Return nonzero if X is a legitimate immediate operand when compiling
7302 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7303 int
7304 legitimate_pic_operand_p (rtx x)
7305 {
7306 if (GET_CODE (x) == SYMBOL_REF
7307 || (GET_CODE (x) == CONST
7308 && GET_CODE (XEXP (x, 0)) == PLUS
7309 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7310 return 0;
7311
7312 return 1;
7313 }
7314
7315 /* Record that the current function needs a PIC register. Initialize
7316 cfun->machine->pic_reg if we have not already done so. */
7317
7318 static void
7319 require_pic_register (void)
7320 {
7321 /* A lot of the logic here is made obscure by the fact that this
7322 routine gets called as part of the rtx cost estimation process.
7323 We don't want those calls to affect any assumptions about the real
7324 function; and further, we can't call entry_of_function() until we
7325 start the real expansion process. */
7326 if (!crtl->uses_pic_offset_table)
7327 {
7328 gcc_assert (can_create_pseudo_p ());
7329 if (arm_pic_register != INVALID_REGNUM
7330 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7331 {
7332 if (!cfun->machine->pic_reg)
7333 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7334
7335 /* Play games to avoid marking the function as needing pic
7336 if we are being called as part of the cost-estimation
7337 process. */
7338 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7339 crtl->uses_pic_offset_table = 1;
7340 }
7341 else
7342 {
7343 rtx_insn *seq, *insn;
7344
7345 if (!cfun->machine->pic_reg)
7346 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7347
7348 /* Play games to avoid marking the function as needing pic
7349 if we are being called as part of the cost-estimation
7350 process. */
7351 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7352 {
7353 crtl->uses_pic_offset_table = 1;
7354 start_sequence ();
7355
7356 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7357 && arm_pic_register > LAST_LO_REGNUM)
7358 emit_move_insn (cfun->machine->pic_reg,
7359 gen_rtx_REG (Pmode, arm_pic_register));
7360 else
7361 arm_load_pic_register (0UL);
7362
7363 seq = get_insns ();
7364 end_sequence ();
7365
7366 for (insn = seq; insn; insn = NEXT_INSN (insn))
7367 if (INSN_P (insn))
7368 INSN_LOCATION (insn) = prologue_location;
7369
7370 /* We can be called during expansion of PHI nodes, where
7371 we can't yet emit instructions directly in the final
7372 insn stream. Queue the insns on the entry edge, they will
7373 be committed after everything else is expanded. */
7374 insert_insn_on_edge (seq,
7375 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7376 }
7377 }
7378 }
7379 }
7380
7381 rtx
7382 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7383 {
7384 if (GET_CODE (orig) == SYMBOL_REF
7385 || GET_CODE (orig) == LABEL_REF)
7386 {
7387 if (reg == 0)
7388 {
7389 gcc_assert (can_create_pseudo_p ());
7390 reg = gen_reg_rtx (Pmode);
7391 }
7392
7393 /* VxWorks does not impose a fixed gap between segments; the run-time
7394 gap can be different from the object-file gap. We therefore can't
7395 use GOTOFF unless we are absolutely sure that the symbol is in the
7396 same segment as the GOT. Unfortunately, the flexibility of linker
7397 scripts means that we can't be sure of that in general, so assume
7398 that GOTOFF is never valid on VxWorks. */
7399 /* References to weak symbols cannot be resolved locally: they
7400 may be overridden by a non-weak definition at link time. */
7401 rtx_insn *insn;
7402 if ((GET_CODE (orig) == LABEL_REF
7403 || (GET_CODE (orig) == SYMBOL_REF
7404 && SYMBOL_REF_LOCAL_P (orig)
7405 && (SYMBOL_REF_DECL (orig)
7406 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7407 && NEED_GOT_RELOC
7408 && arm_pic_data_is_text_relative)
7409 insn = arm_pic_static_addr (orig, reg);
7410 else
7411 {
7412 rtx pat;
7413 rtx mem;
7414
7415 /* If this function doesn't have a pic register, create one now. */
7416 require_pic_register ();
7417
7418 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7419
7420 /* Make the MEM as close to a constant as possible. */
7421 mem = SET_SRC (pat);
7422 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7423 MEM_READONLY_P (mem) = 1;
7424 MEM_NOTRAP_P (mem) = 1;
7425
7426 insn = emit_insn (pat);
7427 }
7428
7429 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7430 by loop. */
7431 set_unique_reg_note (insn, REG_EQUAL, orig);
7432
7433 return reg;
7434 }
7435 else if (GET_CODE (orig) == CONST)
7436 {
7437 rtx base, offset;
7438
7439 if (GET_CODE (XEXP (orig, 0)) == PLUS
7440 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7441 return orig;
7442
7443 /* Handle the case where we have: const (UNSPEC_TLS). */
7444 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7445 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7446 return orig;
7447
7448 /* Handle the case where we have:
7449 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7450 CONST_INT. */
7451 if (GET_CODE (XEXP (orig, 0)) == PLUS
7452 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7453 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7454 {
7455 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7456 return orig;
7457 }
7458
7459 if (reg == 0)
7460 {
7461 gcc_assert (can_create_pseudo_p ());
7462 reg = gen_reg_rtx (Pmode);
7463 }
7464
7465 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7466
7467 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7468 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7469 base == reg ? 0 : reg);
7470
7471 if (CONST_INT_P (offset))
7472 {
7473 /* The base register doesn't really matter, we only want to
7474 test the index for the appropriate mode. */
7475 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7476 {
7477 gcc_assert (can_create_pseudo_p ());
7478 offset = force_reg (Pmode, offset);
7479 }
7480
7481 if (CONST_INT_P (offset))
7482 return plus_constant (Pmode, base, INTVAL (offset));
7483 }
7484
7485 if (GET_MODE_SIZE (mode) > 4
7486 && (GET_MODE_CLASS (mode) == MODE_INT
7487 || TARGET_SOFT_FLOAT))
7488 {
7489 emit_insn (gen_addsi3 (reg, base, offset));
7490 return reg;
7491 }
7492
7493 return gen_rtx_PLUS (Pmode, base, offset);
7494 }
7495
7496 return orig;
7497 }
7498
7499
7500 /* Find a spare register to use during the prolog of a function. */
7501
7502 static int
7503 thumb_find_work_register (unsigned long pushed_regs_mask)
7504 {
7505 int reg;
7506
7507 /* Check the argument registers first as these are call-used. The
7508 register allocation order means that sometimes r3 might be used
7509 but earlier argument registers might not, so check them all. */
7510 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7511 if (!df_regs_ever_live_p (reg))
7512 return reg;
7513
7514 /* Before going on to check the call-saved registers we can try a couple
7515 more ways of deducing that r3 is available. The first is when we are
7516 pushing anonymous arguments onto the stack and we have less than 4
7517 registers worth of fixed arguments(*). In this case r3 will be part of
7518 the variable argument list and so we can be sure that it will be
7519 pushed right at the start of the function. Hence it will be available
7520 for the rest of the prologue.
7521 (*): ie crtl->args.pretend_args_size is greater than 0. */
7522 if (cfun->machine->uses_anonymous_args
7523 && crtl->args.pretend_args_size > 0)
7524 return LAST_ARG_REGNUM;
7525
7526 /* The other case is when we have fixed arguments but less than 4 registers
7527 worth. In this case r3 might be used in the body of the function, but
7528 it is not being used to convey an argument into the function. In theory
7529 we could just check crtl->args.size to see how many bytes are
7530 being passed in argument registers, but it seems that it is unreliable.
7531 Sometimes it will have the value 0 when in fact arguments are being
7532 passed. (See testcase execute/20021111-1.c for an example). So we also
7533 check the args_info.nregs field as well. The problem with this field is
7534 that it makes no allowances for arguments that are passed to the
7535 function but which are not used. Hence we could miss an opportunity
7536 when a function has an unused argument in r3. But it is better to be
7537 safe than to be sorry. */
7538 if (! cfun->machine->uses_anonymous_args
7539 && crtl->args.size >= 0
7540 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7541 && (TARGET_AAPCS_BASED
7542 ? crtl->args.info.aapcs_ncrn < 4
7543 : crtl->args.info.nregs < 4))
7544 return LAST_ARG_REGNUM;
7545
7546 /* Otherwise look for a call-saved register that is going to be pushed. */
7547 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7548 if (pushed_regs_mask & (1 << reg))
7549 return reg;
7550
7551 if (TARGET_THUMB2)
7552 {
7553 /* Thumb-2 can use high regs. */
7554 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7555 if (pushed_regs_mask & (1 << reg))
7556 return reg;
7557 }
7558 /* Something went wrong - thumb_compute_save_reg_mask()
7559 should have arranged for a suitable register to be pushed. */
7560 gcc_unreachable ();
7561 }
7562
7563 static GTY(()) int pic_labelno;
7564
7565 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7566 low register. */
7567
7568 void
7569 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7570 {
7571 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7572
7573 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7574 return;
7575
7576 gcc_assert (flag_pic);
7577
7578 pic_reg = cfun->machine->pic_reg;
7579 if (TARGET_VXWORKS_RTP)
7580 {
7581 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7582 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7583 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7584
7585 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7586
7587 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7588 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7589 }
7590 else
7591 {
7592 /* We use an UNSPEC rather than a LABEL_REF because this label
7593 never appears in the code stream. */
7594
7595 labelno = GEN_INT (pic_labelno++);
7596 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7597 l1 = gen_rtx_CONST (VOIDmode, l1);
7598
7599 /* On the ARM the PC register contains 'dot + 8' at the time of the
7600 addition, on the Thumb it is 'dot + 4'. */
7601 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7602 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7603 UNSPEC_GOTSYM_OFF);
7604 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7605
7606 if (TARGET_32BIT)
7607 {
7608 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7609 }
7610 else /* TARGET_THUMB1 */
7611 {
7612 if (arm_pic_register != INVALID_REGNUM
7613 && REGNO (pic_reg) > LAST_LO_REGNUM)
7614 {
7615 /* We will have pushed the pic register, so we should always be
7616 able to find a work register. */
7617 pic_tmp = gen_rtx_REG (SImode,
7618 thumb_find_work_register (saved_regs));
7619 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7620 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7621 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7622 }
7623 else if (arm_pic_register != INVALID_REGNUM
7624 && arm_pic_register > LAST_LO_REGNUM
7625 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7626 {
7627 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7628 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7629 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7630 }
7631 else
7632 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7633 }
7634 }
7635
7636 /* Need to emit this whether or not we obey regdecls,
7637 since setjmp/longjmp can cause life info to screw up. */
7638 emit_use (pic_reg);
7639 }
7640
7641 /* Generate code to load the address of a static var when flag_pic is set. */
7642 static rtx_insn *
7643 arm_pic_static_addr (rtx orig, rtx reg)
7644 {
7645 rtx l1, labelno, offset_rtx;
7646
7647 gcc_assert (flag_pic);
7648
7649 /* We use an UNSPEC rather than a LABEL_REF because this label
7650 never appears in the code stream. */
7651 labelno = GEN_INT (pic_labelno++);
7652 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7653 l1 = gen_rtx_CONST (VOIDmode, l1);
7654
7655 /* On the ARM the PC register contains 'dot + 8' at the time of the
7656 addition, on the Thumb it is 'dot + 4'. */
7657 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7658 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7659 UNSPEC_SYMBOL_OFFSET);
7660 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7661
7662 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7663 }
7664
7665 /* Return nonzero if X is valid as an ARM state addressing register. */
7666 static int
7667 arm_address_register_rtx_p (rtx x, int strict_p)
7668 {
7669 int regno;
7670
7671 if (!REG_P (x))
7672 return 0;
7673
7674 regno = REGNO (x);
7675
7676 if (strict_p)
7677 return ARM_REGNO_OK_FOR_BASE_P (regno);
7678
7679 return (regno <= LAST_ARM_REGNUM
7680 || regno >= FIRST_PSEUDO_REGISTER
7681 || regno == FRAME_POINTER_REGNUM
7682 || regno == ARG_POINTER_REGNUM);
7683 }
7684
7685 /* Return TRUE if this rtx is the difference of a symbol and a label,
7686 and will reduce to a PC-relative relocation in the object file.
7687 Expressions like this can be left alone when generating PIC, rather
7688 than forced through the GOT. */
7689 static int
7690 pcrel_constant_p (rtx x)
7691 {
7692 if (GET_CODE (x) == MINUS)
7693 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7694
7695 return FALSE;
7696 }
7697
7698 /* Return true if X will surely end up in an index register after next
7699 splitting pass. */
7700 static bool
7701 will_be_in_index_register (const_rtx x)
7702 {
7703 /* arm.md: calculate_pic_address will split this into a register. */
7704 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7705 }
7706
7707 /* Return nonzero if X is a valid ARM state address operand. */
7708 int
7709 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7710 int strict_p)
7711 {
7712 bool use_ldrd;
7713 enum rtx_code code = GET_CODE (x);
7714
7715 if (arm_address_register_rtx_p (x, strict_p))
7716 return 1;
7717
7718 use_ldrd = (TARGET_LDRD
7719 && (mode == DImode || mode == DFmode));
7720
7721 if (code == POST_INC || code == PRE_DEC
7722 || ((code == PRE_INC || code == POST_DEC)
7723 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7724 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7725
7726 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7727 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7728 && GET_CODE (XEXP (x, 1)) == PLUS
7729 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7730 {
7731 rtx addend = XEXP (XEXP (x, 1), 1);
7732
7733 /* Don't allow ldrd post increment by register because it's hard
7734 to fixup invalid register choices. */
7735 if (use_ldrd
7736 && GET_CODE (x) == POST_MODIFY
7737 && REG_P (addend))
7738 return 0;
7739
7740 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7741 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7742 }
7743
7744 /* After reload constants split into minipools will have addresses
7745 from a LABEL_REF. */
7746 else if (reload_completed
7747 && (code == LABEL_REF
7748 || (code == CONST
7749 && GET_CODE (XEXP (x, 0)) == PLUS
7750 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7751 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7752 return 1;
7753
7754 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7755 return 0;
7756
7757 else if (code == PLUS)
7758 {
7759 rtx xop0 = XEXP (x, 0);
7760 rtx xop1 = XEXP (x, 1);
7761
7762 return ((arm_address_register_rtx_p (xop0, strict_p)
7763 && ((CONST_INT_P (xop1)
7764 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7765 || (!strict_p && will_be_in_index_register (xop1))))
7766 || (arm_address_register_rtx_p (xop1, strict_p)
7767 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7768 }
7769
7770 #if 0
7771 /* Reload currently can't handle MINUS, so disable this for now */
7772 else if (GET_CODE (x) == MINUS)
7773 {
7774 rtx xop0 = XEXP (x, 0);
7775 rtx xop1 = XEXP (x, 1);
7776
7777 return (arm_address_register_rtx_p (xop0, strict_p)
7778 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7779 }
7780 #endif
7781
7782 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7783 && code == SYMBOL_REF
7784 && CONSTANT_POOL_ADDRESS_P (x)
7785 && ! (flag_pic
7786 && symbol_mentioned_p (get_pool_constant (x))
7787 && ! pcrel_constant_p (get_pool_constant (x))))
7788 return 1;
7789
7790 return 0;
7791 }
7792
7793 /* Return true if we can avoid creating a constant pool entry for x. */
7794 static bool
7795 can_avoid_literal_pool_for_label_p (rtx x)
7796 {
7797 /* Normally we can assign constant values to target registers without
7798 the help of constant pool. But there are cases we have to use constant
7799 pool like:
7800 1) assign a label to register.
7801 2) sign-extend a 8bit value to 32bit and then assign to register.
7802
7803 Constant pool access in format:
7804 (set (reg r0) (mem (symbol_ref (".LC0"))))
7805 will cause the use of literal pool (later in function arm_reorg).
7806 So here we mark such format as an invalid format, then the compiler
7807 will adjust it into:
7808 (set (reg r0) (symbol_ref (".LC0")))
7809 (set (reg r0) (mem (reg r0))).
7810 No extra register is required, and (mem (reg r0)) won't cause the use
7811 of literal pools. */
7812 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7813 && CONSTANT_POOL_ADDRESS_P (x))
7814 return 1;
7815 return 0;
7816 }
7817
7818
7819 /* Return nonzero if X is a valid Thumb-2 address operand. */
7820 static int
7821 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7822 {
7823 bool use_ldrd;
7824 enum rtx_code code = GET_CODE (x);
7825
7826 if (arm_address_register_rtx_p (x, strict_p))
7827 return 1;
7828
7829 use_ldrd = (TARGET_LDRD
7830 && (mode == DImode || mode == DFmode));
7831
7832 if (code == POST_INC || code == PRE_DEC
7833 || ((code == PRE_INC || code == POST_DEC)
7834 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7835 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7836
7837 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7838 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7839 && GET_CODE (XEXP (x, 1)) == PLUS
7840 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7841 {
7842 /* Thumb-2 only has autoincrement by constant. */
7843 rtx addend = XEXP (XEXP (x, 1), 1);
7844 HOST_WIDE_INT offset;
7845
7846 if (!CONST_INT_P (addend))
7847 return 0;
7848
7849 offset = INTVAL(addend);
7850 if (GET_MODE_SIZE (mode) <= 4)
7851 return (offset > -256 && offset < 256);
7852
7853 return (use_ldrd && offset > -1024 && offset < 1024
7854 && (offset & 3) == 0);
7855 }
7856
7857 /* After reload constants split into minipools will have addresses
7858 from a LABEL_REF. */
7859 else if (reload_completed
7860 && (code == LABEL_REF
7861 || (code == CONST
7862 && GET_CODE (XEXP (x, 0)) == PLUS
7863 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7864 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7865 return 1;
7866
7867 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7868 return 0;
7869
7870 else if (code == PLUS)
7871 {
7872 rtx xop0 = XEXP (x, 0);
7873 rtx xop1 = XEXP (x, 1);
7874
7875 return ((arm_address_register_rtx_p (xop0, strict_p)
7876 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7877 || (!strict_p && will_be_in_index_register (xop1))))
7878 || (arm_address_register_rtx_p (xop1, strict_p)
7879 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7880 }
7881
7882 else if (can_avoid_literal_pool_for_label_p (x))
7883 return 0;
7884
7885 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7886 && code == SYMBOL_REF
7887 && CONSTANT_POOL_ADDRESS_P (x)
7888 && ! (flag_pic
7889 && symbol_mentioned_p (get_pool_constant (x))
7890 && ! pcrel_constant_p (get_pool_constant (x))))
7891 return 1;
7892
7893 return 0;
7894 }
7895
7896 /* Return nonzero if INDEX is valid for an address index operand in
7897 ARM state. */
7898 static int
7899 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7900 int strict_p)
7901 {
7902 HOST_WIDE_INT range;
7903 enum rtx_code code = GET_CODE (index);
7904
7905 /* Standard coprocessor addressing modes. */
7906 if (TARGET_HARD_FLOAT
7907 && (mode == SFmode || mode == DFmode))
7908 return (code == CONST_INT && INTVAL (index) < 1024
7909 && INTVAL (index) > -1024
7910 && (INTVAL (index) & 3) == 0);
7911
7912 /* For quad modes, we restrict the constant offset to be slightly less
7913 than what the instruction format permits. We do this because for
7914 quad mode moves, we will actually decompose them into two separate
7915 double-mode reads or writes. INDEX must therefore be a valid
7916 (double-mode) offset and so should INDEX+8. */
7917 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7918 return (code == CONST_INT
7919 && INTVAL (index) < 1016
7920 && INTVAL (index) > -1024
7921 && (INTVAL (index) & 3) == 0);
7922
7923 /* We have no such constraint on double mode offsets, so we permit the
7924 full range of the instruction format. */
7925 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7926 return (code == CONST_INT
7927 && INTVAL (index) < 1024
7928 && INTVAL (index) > -1024
7929 && (INTVAL (index) & 3) == 0);
7930
7931 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7932 return (code == CONST_INT
7933 && INTVAL (index) < 1024
7934 && INTVAL (index) > -1024
7935 && (INTVAL (index) & 3) == 0);
7936
7937 if (arm_address_register_rtx_p (index, strict_p)
7938 && (GET_MODE_SIZE (mode) <= 4))
7939 return 1;
7940
7941 if (mode == DImode || mode == DFmode)
7942 {
7943 if (code == CONST_INT)
7944 {
7945 HOST_WIDE_INT val = INTVAL (index);
7946
7947 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
7948 If vldr is selected it uses arm_coproc_mem_operand. */
7949 if (TARGET_LDRD)
7950 return val > -256 && val < 256;
7951 else
7952 return val > -4096 && val < 4092;
7953 }
7954
7955 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7956 }
7957
7958 if (GET_MODE_SIZE (mode) <= 4
7959 && ! (arm_arch4
7960 && (mode == HImode
7961 || mode == HFmode
7962 || (mode == QImode && outer == SIGN_EXTEND))))
7963 {
7964 if (code == MULT)
7965 {
7966 rtx xiop0 = XEXP (index, 0);
7967 rtx xiop1 = XEXP (index, 1);
7968
7969 return ((arm_address_register_rtx_p (xiop0, strict_p)
7970 && power_of_two_operand (xiop1, SImode))
7971 || (arm_address_register_rtx_p (xiop1, strict_p)
7972 && power_of_two_operand (xiop0, SImode)));
7973 }
7974 else if (code == LSHIFTRT || code == ASHIFTRT
7975 || code == ASHIFT || code == ROTATERT)
7976 {
7977 rtx op = XEXP (index, 1);
7978
7979 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7980 && CONST_INT_P (op)
7981 && INTVAL (op) > 0
7982 && INTVAL (op) <= 31);
7983 }
7984 }
7985
7986 /* For ARM v4 we may be doing a sign-extend operation during the
7987 load. */
7988 if (arm_arch4)
7989 {
7990 if (mode == HImode
7991 || mode == HFmode
7992 || (outer == SIGN_EXTEND && mode == QImode))
7993 range = 256;
7994 else
7995 range = 4096;
7996 }
7997 else
7998 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7999
8000 return (code == CONST_INT
8001 && INTVAL (index) < range
8002 && INTVAL (index) > -range);
8003 }
8004
8005 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8006 index operand. i.e. 1, 2, 4 or 8. */
8007 static bool
8008 thumb2_index_mul_operand (rtx op)
8009 {
8010 HOST_WIDE_INT val;
8011
8012 if (!CONST_INT_P (op))
8013 return false;
8014
8015 val = INTVAL(op);
8016 return (val == 1 || val == 2 || val == 4 || val == 8);
8017 }
8018
8019 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8020 static int
8021 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8022 {
8023 enum rtx_code code = GET_CODE (index);
8024
8025 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8026 /* Standard coprocessor addressing modes. */
8027 if (TARGET_HARD_FLOAT
8028 && (mode == SFmode || mode == DFmode))
8029 return (code == CONST_INT && INTVAL (index) < 1024
8030 /* Thumb-2 allows only > -256 index range for it's core register
8031 load/stores. Since we allow SF/DF in core registers, we have
8032 to use the intersection between -256~4096 (core) and -1024~1024
8033 (coprocessor). */
8034 && INTVAL (index) > -256
8035 && (INTVAL (index) & 3) == 0);
8036
8037 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8038 {
8039 /* For DImode assume values will usually live in core regs
8040 and only allow LDRD addressing modes. */
8041 if (!TARGET_LDRD || mode != DImode)
8042 return (code == CONST_INT
8043 && INTVAL (index) < 1024
8044 && INTVAL (index) > -1024
8045 && (INTVAL (index) & 3) == 0);
8046 }
8047
8048 /* For quad modes, we restrict the constant offset to be slightly less
8049 than what the instruction format permits. We do this because for
8050 quad mode moves, we will actually decompose them into two separate
8051 double-mode reads or writes. INDEX must therefore be a valid
8052 (double-mode) offset and so should INDEX+8. */
8053 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8054 return (code == CONST_INT
8055 && INTVAL (index) < 1016
8056 && INTVAL (index) > -1024
8057 && (INTVAL (index) & 3) == 0);
8058
8059 /* We have no such constraint on double mode offsets, so we permit the
8060 full range of the instruction format. */
8061 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8062 return (code == CONST_INT
8063 && INTVAL (index) < 1024
8064 && INTVAL (index) > -1024
8065 && (INTVAL (index) & 3) == 0);
8066
8067 if (arm_address_register_rtx_p (index, strict_p)
8068 && (GET_MODE_SIZE (mode) <= 4))
8069 return 1;
8070
8071 if (mode == DImode || mode == DFmode)
8072 {
8073 if (code == CONST_INT)
8074 {
8075 HOST_WIDE_INT val = INTVAL (index);
8076 /* Thumb-2 ldrd only has reg+const addressing modes.
8077 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8078 If vldr is selected it uses arm_coproc_mem_operand. */
8079 if (TARGET_LDRD)
8080 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8081 else
8082 return IN_RANGE (val, -255, 4095 - 4);
8083 }
8084 else
8085 return 0;
8086 }
8087
8088 if (code == MULT)
8089 {
8090 rtx xiop0 = XEXP (index, 0);
8091 rtx xiop1 = XEXP (index, 1);
8092
8093 return ((arm_address_register_rtx_p (xiop0, strict_p)
8094 && thumb2_index_mul_operand (xiop1))
8095 || (arm_address_register_rtx_p (xiop1, strict_p)
8096 && thumb2_index_mul_operand (xiop0)));
8097 }
8098 else if (code == ASHIFT)
8099 {
8100 rtx op = XEXP (index, 1);
8101
8102 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8103 && CONST_INT_P (op)
8104 && INTVAL (op) > 0
8105 && INTVAL (op) <= 3);
8106 }
8107
8108 return (code == CONST_INT
8109 && INTVAL (index) < 4096
8110 && INTVAL (index) > -256);
8111 }
8112
8113 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8114 static int
8115 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8116 {
8117 int regno;
8118
8119 if (!REG_P (x))
8120 return 0;
8121
8122 regno = REGNO (x);
8123
8124 if (strict_p)
8125 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8126
8127 return (regno <= LAST_LO_REGNUM
8128 || regno > LAST_VIRTUAL_REGISTER
8129 || regno == FRAME_POINTER_REGNUM
8130 || (GET_MODE_SIZE (mode) >= 4
8131 && (regno == STACK_POINTER_REGNUM
8132 || regno >= FIRST_PSEUDO_REGISTER
8133 || x == hard_frame_pointer_rtx
8134 || x == arg_pointer_rtx)));
8135 }
8136
8137 /* Return nonzero if x is a legitimate index register. This is the case
8138 for any base register that can access a QImode object. */
8139 inline static int
8140 thumb1_index_register_rtx_p (rtx x, int strict_p)
8141 {
8142 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8143 }
8144
8145 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8146
8147 The AP may be eliminated to either the SP or the FP, so we use the
8148 least common denominator, e.g. SImode, and offsets from 0 to 64.
8149
8150 ??? Verify whether the above is the right approach.
8151
8152 ??? Also, the FP may be eliminated to the SP, so perhaps that
8153 needs special handling also.
8154
8155 ??? Look at how the mips16 port solves this problem. It probably uses
8156 better ways to solve some of these problems.
8157
8158 Although it is not incorrect, we don't accept QImode and HImode
8159 addresses based on the frame pointer or arg pointer until the
8160 reload pass starts. This is so that eliminating such addresses
8161 into stack based ones won't produce impossible code. */
8162 int
8163 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8164 {
8165 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8166 return 0;
8167
8168 /* ??? Not clear if this is right. Experiment. */
8169 if (GET_MODE_SIZE (mode) < 4
8170 && !(reload_in_progress || reload_completed)
8171 && (reg_mentioned_p (frame_pointer_rtx, x)
8172 || reg_mentioned_p (arg_pointer_rtx, x)
8173 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8174 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8175 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8176 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8177 return 0;
8178
8179 /* Accept any base register. SP only in SImode or larger. */
8180 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8181 return 1;
8182
8183 /* This is PC relative data before arm_reorg runs. */
8184 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8185 && GET_CODE (x) == SYMBOL_REF
8186 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8187 return 1;
8188
8189 /* This is PC relative data after arm_reorg runs. */
8190 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8191 && reload_completed
8192 && (GET_CODE (x) == LABEL_REF
8193 || (GET_CODE (x) == CONST
8194 && GET_CODE (XEXP (x, 0)) == PLUS
8195 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8196 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8197 return 1;
8198
8199 /* Post-inc indexing only supported for SImode and larger. */
8200 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8201 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8202 return 1;
8203
8204 else if (GET_CODE (x) == PLUS)
8205 {
8206 /* REG+REG address can be any two index registers. */
8207 /* We disallow FRAME+REG addressing since we know that FRAME
8208 will be replaced with STACK, and SP relative addressing only
8209 permits SP+OFFSET. */
8210 if (GET_MODE_SIZE (mode) <= 4
8211 && XEXP (x, 0) != frame_pointer_rtx
8212 && XEXP (x, 1) != frame_pointer_rtx
8213 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8214 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8215 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8216 return 1;
8217
8218 /* REG+const has 5-7 bit offset for non-SP registers. */
8219 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8220 || XEXP (x, 0) == arg_pointer_rtx)
8221 && CONST_INT_P (XEXP (x, 1))
8222 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8223 return 1;
8224
8225 /* REG+const has 10-bit offset for SP, but only SImode and
8226 larger is supported. */
8227 /* ??? Should probably check for DI/DFmode overflow here
8228 just like GO_IF_LEGITIMATE_OFFSET does. */
8229 else if (REG_P (XEXP (x, 0))
8230 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8231 && GET_MODE_SIZE (mode) >= 4
8232 && CONST_INT_P (XEXP (x, 1))
8233 && INTVAL (XEXP (x, 1)) >= 0
8234 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8235 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8236 return 1;
8237
8238 else if (REG_P (XEXP (x, 0))
8239 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8240 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8241 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8242 && REGNO (XEXP (x, 0))
8243 <= LAST_VIRTUAL_POINTER_REGISTER))
8244 && GET_MODE_SIZE (mode) >= 4
8245 && CONST_INT_P (XEXP (x, 1))
8246 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8247 return 1;
8248 }
8249
8250 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8251 && GET_MODE_SIZE (mode) == 4
8252 && GET_CODE (x) == SYMBOL_REF
8253 && CONSTANT_POOL_ADDRESS_P (x)
8254 && ! (flag_pic
8255 && symbol_mentioned_p (get_pool_constant (x))
8256 && ! pcrel_constant_p (get_pool_constant (x))))
8257 return 1;
8258
8259 return 0;
8260 }
8261
8262 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8263 instruction of mode MODE. */
8264 int
8265 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8266 {
8267 switch (GET_MODE_SIZE (mode))
8268 {
8269 case 1:
8270 return val >= 0 && val < 32;
8271
8272 case 2:
8273 return val >= 0 && val < 64 && (val & 1) == 0;
8274
8275 default:
8276 return (val >= 0
8277 && (val + GET_MODE_SIZE (mode)) <= 128
8278 && (val & 3) == 0);
8279 }
8280 }
8281
8282 bool
8283 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8284 {
8285 if (TARGET_ARM)
8286 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8287 else if (TARGET_THUMB2)
8288 return thumb2_legitimate_address_p (mode, x, strict_p);
8289 else /* if (TARGET_THUMB1) */
8290 return thumb1_legitimate_address_p (mode, x, strict_p);
8291 }
8292
8293 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8294
8295 Given an rtx X being reloaded into a reg required to be
8296 in class CLASS, return the class of reg to actually use.
8297 In general this is just CLASS, but for the Thumb core registers and
8298 immediate constants we prefer a LO_REGS class or a subset. */
8299
8300 static reg_class_t
8301 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8302 {
8303 if (TARGET_32BIT)
8304 return rclass;
8305 else
8306 {
8307 if (rclass == GENERAL_REGS)
8308 return LO_REGS;
8309 else
8310 return rclass;
8311 }
8312 }
8313
8314 /* Build the SYMBOL_REF for __tls_get_addr. */
8315
8316 static GTY(()) rtx tls_get_addr_libfunc;
8317
8318 static rtx
8319 get_tls_get_addr (void)
8320 {
8321 if (!tls_get_addr_libfunc)
8322 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8323 return tls_get_addr_libfunc;
8324 }
8325
8326 rtx
8327 arm_load_tp (rtx target)
8328 {
8329 if (!target)
8330 target = gen_reg_rtx (SImode);
8331
8332 if (TARGET_HARD_TP)
8333 {
8334 /* Can return in any reg. */
8335 emit_insn (gen_load_tp_hard (target));
8336 }
8337 else
8338 {
8339 /* Always returned in r0. Immediately copy the result into a pseudo,
8340 otherwise other uses of r0 (e.g. setting up function arguments) may
8341 clobber the value. */
8342
8343 rtx tmp;
8344
8345 emit_insn (gen_load_tp_soft ());
8346
8347 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8348 emit_move_insn (target, tmp);
8349 }
8350 return target;
8351 }
8352
8353 static rtx
8354 load_tls_operand (rtx x, rtx reg)
8355 {
8356 rtx tmp;
8357
8358 if (reg == NULL_RTX)
8359 reg = gen_reg_rtx (SImode);
8360
8361 tmp = gen_rtx_CONST (SImode, x);
8362
8363 emit_move_insn (reg, tmp);
8364
8365 return reg;
8366 }
8367
8368 static rtx_insn *
8369 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8370 {
8371 rtx label, labelno, sum;
8372
8373 gcc_assert (reloc != TLS_DESCSEQ);
8374 start_sequence ();
8375
8376 labelno = GEN_INT (pic_labelno++);
8377 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8378 label = gen_rtx_CONST (VOIDmode, label);
8379
8380 sum = gen_rtx_UNSPEC (Pmode,
8381 gen_rtvec (4, x, GEN_INT (reloc), label,
8382 GEN_INT (TARGET_ARM ? 8 : 4)),
8383 UNSPEC_TLS);
8384 reg = load_tls_operand (sum, reg);
8385
8386 if (TARGET_ARM)
8387 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8388 else
8389 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8390
8391 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8392 LCT_PURE, /* LCT_CONST? */
8393 Pmode, reg, Pmode);
8394
8395 rtx_insn *insns = get_insns ();
8396 end_sequence ();
8397
8398 return insns;
8399 }
8400
8401 static rtx
8402 arm_tls_descseq_addr (rtx x, rtx reg)
8403 {
8404 rtx labelno = GEN_INT (pic_labelno++);
8405 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8406 rtx sum = gen_rtx_UNSPEC (Pmode,
8407 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8408 gen_rtx_CONST (VOIDmode, label),
8409 GEN_INT (!TARGET_ARM)),
8410 UNSPEC_TLS);
8411 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8412
8413 emit_insn (gen_tlscall (x, labelno));
8414 if (!reg)
8415 reg = gen_reg_rtx (SImode);
8416 else
8417 gcc_assert (REGNO (reg) != R0_REGNUM);
8418
8419 emit_move_insn (reg, reg0);
8420
8421 return reg;
8422 }
8423
8424 rtx
8425 legitimize_tls_address (rtx x, rtx reg)
8426 {
8427 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8428 rtx_insn *insns;
8429 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8430
8431 switch (model)
8432 {
8433 case TLS_MODEL_GLOBAL_DYNAMIC:
8434 if (TARGET_GNU2_TLS)
8435 {
8436 reg = arm_tls_descseq_addr (x, reg);
8437
8438 tp = arm_load_tp (NULL_RTX);
8439
8440 dest = gen_rtx_PLUS (Pmode, tp, reg);
8441 }
8442 else
8443 {
8444 /* Original scheme */
8445 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8446 dest = gen_reg_rtx (Pmode);
8447 emit_libcall_block (insns, dest, ret, x);
8448 }
8449 return dest;
8450
8451 case TLS_MODEL_LOCAL_DYNAMIC:
8452 if (TARGET_GNU2_TLS)
8453 {
8454 reg = arm_tls_descseq_addr (x, reg);
8455
8456 tp = arm_load_tp (NULL_RTX);
8457
8458 dest = gen_rtx_PLUS (Pmode, tp, reg);
8459 }
8460 else
8461 {
8462 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8463
8464 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8465 share the LDM result with other LD model accesses. */
8466 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8467 UNSPEC_TLS);
8468 dest = gen_reg_rtx (Pmode);
8469 emit_libcall_block (insns, dest, ret, eqv);
8470
8471 /* Load the addend. */
8472 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8473 GEN_INT (TLS_LDO32)),
8474 UNSPEC_TLS);
8475 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8476 dest = gen_rtx_PLUS (Pmode, dest, addend);
8477 }
8478 return dest;
8479
8480 case TLS_MODEL_INITIAL_EXEC:
8481 labelno = GEN_INT (pic_labelno++);
8482 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8483 label = gen_rtx_CONST (VOIDmode, label);
8484 sum = gen_rtx_UNSPEC (Pmode,
8485 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8486 GEN_INT (TARGET_ARM ? 8 : 4)),
8487 UNSPEC_TLS);
8488 reg = load_tls_operand (sum, reg);
8489
8490 if (TARGET_ARM)
8491 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8492 else if (TARGET_THUMB2)
8493 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8494 else
8495 {
8496 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8497 emit_move_insn (reg, gen_const_mem (SImode, reg));
8498 }
8499
8500 tp = arm_load_tp (NULL_RTX);
8501
8502 return gen_rtx_PLUS (Pmode, tp, reg);
8503
8504 case TLS_MODEL_LOCAL_EXEC:
8505 tp = arm_load_tp (NULL_RTX);
8506
8507 reg = gen_rtx_UNSPEC (Pmode,
8508 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8509 UNSPEC_TLS);
8510 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8511
8512 return gen_rtx_PLUS (Pmode, tp, reg);
8513
8514 default:
8515 abort ();
8516 }
8517 }
8518
8519 /* Try machine-dependent ways of modifying an illegitimate address
8520 to be legitimate. If we find one, return the new, valid address. */
8521 rtx
8522 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8523 {
8524 if (arm_tls_referenced_p (x))
8525 {
8526 rtx addend = NULL;
8527
8528 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8529 {
8530 addend = XEXP (XEXP (x, 0), 1);
8531 x = XEXP (XEXP (x, 0), 0);
8532 }
8533
8534 if (GET_CODE (x) != SYMBOL_REF)
8535 return x;
8536
8537 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8538
8539 x = legitimize_tls_address (x, NULL_RTX);
8540
8541 if (addend)
8542 {
8543 x = gen_rtx_PLUS (SImode, x, addend);
8544 orig_x = x;
8545 }
8546 else
8547 return x;
8548 }
8549
8550 if (!TARGET_ARM)
8551 {
8552 /* TODO: legitimize_address for Thumb2. */
8553 if (TARGET_THUMB2)
8554 return x;
8555 return thumb_legitimize_address (x, orig_x, mode);
8556 }
8557
8558 if (GET_CODE (x) == PLUS)
8559 {
8560 rtx xop0 = XEXP (x, 0);
8561 rtx xop1 = XEXP (x, 1);
8562
8563 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8564 xop0 = force_reg (SImode, xop0);
8565
8566 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8567 && !symbol_mentioned_p (xop1))
8568 xop1 = force_reg (SImode, xop1);
8569
8570 if (ARM_BASE_REGISTER_RTX_P (xop0)
8571 && CONST_INT_P (xop1))
8572 {
8573 HOST_WIDE_INT n, low_n;
8574 rtx base_reg, val;
8575 n = INTVAL (xop1);
8576
8577 /* VFP addressing modes actually allow greater offsets, but for
8578 now we just stick with the lowest common denominator. */
8579 if (mode == DImode || mode == DFmode)
8580 {
8581 low_n = n & 0x0f;
8582 n &= ~0x0f;
8583 if (low_n > 4)
8584 {
8585 n += 16;
8586 low_n -= 16;
8587 }
8588 }
8589 else
8590 {
8591 low_n = ((mode) == TImode ? 0
8592 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8593 n -= low_n;
8594 }
8595
8596 base_reg = gen_reg_rtx (SImode);
8597 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8598 emit_move_insn (base_reg, val);
8599 x = plus_constant (Pmode, base_reg, low_n);
8600 }
8601 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8602 x = gen_rtx_PLUS (SImode, xop0, xop1);
8603 }
8604
8605 /* XXX We don't allow MINUS any more -- see comment in
8606 arm_legitimate_address_outer_p (). */
8607 else if (GET_CODE (x) == MINUS)
8608 {
8609 rtx xop0 = XEXP (x, 0);
8610 rtx xop1 = XEXP (x, 1);
8611
8612 if (CONSTANT_P (xop0))
8613 xop0 = force_reg (SImode, xop0);
8614
8615 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8616 xop1 = force_reg (SImode, xop1);
8617
8618 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8619 x = gen_rtx_MINUS (SImode, xop0, xop1);
8620 }
8621
8622 /* Make sure to take full advantage of the pre-indexed addressing mode
8623 with absolute addresses which often allows for the base register to
8624 be factorized for multiple adjacent memory references, and it might
8625 even allows for the mini pool to be avoided entirely. */
8626 else if (CONST_INT_P (x) && optimize > 0)
8627 {
8628 unsigned int bits;
8629 HOST_WIDE_INT mask, base, index;
8630 rtx base_reg;
8631
8632 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8633 use a 8-bit index. So let's use a 12-bit index for SImode only and
8634 hope that arm_gen_constant will enable ldrb to use more bits. */
8635 bits = (mode == SImode) ? 12 : 8;
8636 mask = (1 << bits) - 1;
8637 base = INTVAL (x) & ~mask;
8638 index = INTVAL (x) & mask;
8639 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8640 {
8641 /* It'll most probably be more efficient to generate the base
8642 with more bits set and use a negative index instead. */
8643 base |= mask;
8644 index -= mask;
8645 }
8646 base_reg = force_reg (SImode, GEN_INT (base));
8647 x = plus_constant (Pmode, base_reg, index);
8648 }
8649
8650 if (flag_pic)
8651 {
8652 /* We need to find and carefully transform any SYMBOL and LABEL
8653 references; so go back to the original address expression. */
8654 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8655
8656 if (new_x != orig_x)
8657 x = new_x;
8658 }
8659
8660 return x;
8661 }
8662
8663
8664 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8665 to be legitimate. If we find one, return the new, valid address. */
8666 rtx
8667 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8668 {
8669 if (GET_CODE (x) == PLUS
8670 && CONST_INT_P (XEXP (x, 1))
8671 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8672 || INTVAL (XEXP (x, 1)) < 0))
8673 {
8674 rtx xop0 = XEXP (x, 0);
8675 rtx xop1 = XEXP (x, 1);
8676 HOST_WIDE_INT offset = INTVAL (xop1);
8677
8678 /* Try and fold the offset into a biasing of the base register and
8679 then offsetting that. Don't do this when optimizing for space
8680 since it can cause too many CSEs. */
8681 if (optimize_size && offset >= 0
8682 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8683 {
8684 HOST_WIDE_INT delta;
8685
8686 if (offset >= 256)
8687 delta = offset - (256 - GET_MODE_SIZE (mode));
8688 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8689 delta = 31 * GET_MODE_SIZE (mode);
8690 else
8691 delta = offset & (~31 * GET_MODE_SIZE (mode));
8692
8693 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8694 NULL_RTX);
8695 x = plus_constant (Pmode, xop0, delta);
8696 }
8697 else if (offset < 0 && offset > -256)
8698 /* Small negative offsets are best done with a subtract before the
8699 dereference, forcing these into a register normally takes two
8700 instructions. */
8701 x = force_operand (x, NULL_RTX);
8702 else
8703 {
8704 /* For the remaining cases, force the constant into a register. */
8705 xop1 = force_reg (SImode, xop1);
8706 x = gen_rtx_PLUS (SImode, xop0, xop1);
8707 }
8708 }
8709 else if (GET_CODE (x) == PLUS
8710 && s_register_operand (XEXP (x, 1), SImode)
8711 && !s_register_operand (XEXP (x, 0), SImode))
8712 {
8713 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8714
8715 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8716 }
8717
8718 if (flag_pic)
8719 {
8720 /* We need to find and carefully transform any SYMBOL and LABEL
8721 references; so go back to the original address expression. */
8722 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8723
8724 if (new_x != orig_x)
8725 x = new_x;
8726 }
8727
8728 return x;
8729 }
8730
8731 /* Return TRUE if X contains any TLS symbol references. */
8732
8733 bool
8734 arm_tls_referenced_p (rtx x)
8735 {
8736 if (! TARGET_HAVE_TLS)
8737 return false;
8738
8739 subrtx_iterator::array_type array;
8740 FOR_EACH_SUBRTX (iter, array, x, ALL)
8741 {
8742 const_rtx x = *iter;
8743 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8744 {
8745 /* ARM currently does not provide relocations to encode TLS variables
8746 into AArch32 instructions, only data, so there is no way to
8747 currently implement these if a literal pool is disabled. */
8748 if (arm_disable_literal_pool)
8749 sorry ("accessing thread-local storage is not currently supported "
8750 "with -mpure-code or -mslow-flash-data");
8751
8752 return true;
8753 }
8754
8755 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8756 TLS offsets, not real symbol references. */
8757 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8758 iter.skip_subrtxes ();
8759 }
8760 return false;
8761 }
8762
8763 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8764
8765 On the ARM, allow any integer (invalid ones are removed later by insn
8766 patterns), nice doubles and symbol_refs which refer to the function's
8767 constant pool XXX.
8768
8769 When generating pic allow anything. */
8770
8771 static bool
8772 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8773 {
8774 return flag_pic || !label_mentioned_p (x);
8775 }
8776
8777 static bool
8778 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8779 {
8780 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8781 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8782 for ARMv8-M Baseline or later the result is valid. */
8783 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8784 x = XEXP (x, 0);
8785
8786 return (CONST_INT_P (x)
8787 || CONST_DOUBLE_P (x)
8788 || CONSTANT_ADDRESS_P (x)
8789 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8790 || flag_pic);
8791 }
8792
8793 static bool
8794 arm_legitimate_constant_p (machine_mode mode, rtx x)
8795 {
8796 return (!arm_cannot_force_const_mem (mode, x)
8797 && (TARGET_32BIT
8798 ? arm_legitimate_constant_p_1 (mode, x)
8799 : thumb_legitimate_constant_p (mode, x)));
8800 }
8801
8802 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8803
8804 static bool
8805 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8806 {
8807 rtx base, offset;
8808
8809 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8810 {
8811 split_const (x, &base, &offset);
8812 if (GET_CODE (base) == SYMBOL_REF
8813 && !offset_within_block_p (base, INTVAL (offset)))
8814 return true;
8815 }
8816 return arm_tls_referenced_p (x);
8817 }
8818 \f
8819 #define REG_OR_SUBREG_REG(X) \
8820 (REG_P (X) \
8821 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8822
8823 #define REG_OR_SUBREG_RTX(X) \
8824 (REG_P (X) ? (X) : SUBREG_REG (X))
8825
8826 static inline int
8827 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8828 {
8829 machine_mode mode = GET_MODE (x);
8830 int total, words;
8831
8832 switch (code)
8833 {
8834 case ASHIFT:
8835 case ASHIFTRT:
8836 case LSHIFTRT:
8837 case ROTATERT:
8838 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8839
8840 case PLUS:
8841 case MINUS:
8842 case COMPARE:
8843 case NEG:
8844 case NOT:
8845 return COSTS_N_INSNS (1);
8846
8847 case MULT:
8848 if (arm_arch6m && arm_m_profile_small_mul)
8849 return COSTS_N_INSNS (32);
8850
8851 if (CONST_INT_P (XEXP (x, 1)))
8852 {
8853 int cycles = 0;
8854 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8855
8856 while (i)
8857 {
8858 i >>= 2;
8859 cycles++;
8860 }
8861 return COSTS_N_INSNS (2) + cycles;
8862 }
8863 return COSTS_N_INSNS (1) + 16;
8864
8865 case SET:
8866 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8867 the mode. */
8868 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8869 return (COSTS_N_INSNS (words)
8870 + 4 * ((MEM_P (SET_SRC (x)))
8871 + MEM_P (SET_DEST (x))));
8872
8873 case CONST_INT:
8874 if (outer == SET)
8875 {
8876 if (UINTVAL (x) < 256
8877 /* 16-bit constant. */
8878 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8879 return 0;
8880 if (thumb_shiftable_const (INTVAL (x)))
8881 return COSTS_N_INSNS (2);
8882 return COSTS_N_INSNS (3);
8883 }
8884 else if ((outer == PLUS || outer == COMPARE)
8885 && INTVAL (x) < 256 && INTVAL (x) > -256)
8886 return 0;
8887 else if ((outer == IOR || outer == XOR || outer == AND)
8888 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8889 return COSTS_N_INSNS (1);
8890 else if (outer == AND)
8891 {
8892 int i;
8893 /* This duplicates the tests in the andsi3 expander. */
8894 for (i = 9; i <= 31; i++)
8895 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8896 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8897 return COSTS_N_INSNS (2);
8898 }
8899 else if (outer == ASHIFT || outer == ASHIFTRT
8900 || outer == LSHIFTRT)
8901 return 0;
8902 return COSTS_N_INSNS (2);
8903
8904 case CONST:
8905 case CONST_DOUBLE:
8906 case LABEL_REF:
8907 case SYMBOL_REF:
8908 return COSTS_N_INSNS (3);
8909
8910 case UDIV:
8911 case UMOD:
8912 case DIV:
8913 case MOD:
8914 return 100;
8915
8916 case TRUNCATE:
8917 return 99;
8918
8919 case AND:
8920 case XOR:
8921 case IOR:
8922 /* XXX guess. */
8923 return 8;
8924
8925 case MEM:
8926 /* XXX another guess. */
8927 /* Memory costs quite a lot for the first word, but subsequent words
8928 load at the equivalent of a single insn each. */
8929 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8930 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8931 ? 4 : 0));
8932
8933 case IF_THEN_ELSE:
8934 /* XXX a guess. */
8935 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8936 return 14;
8937 return 2;
8938
8939 case SIGN_EXTEND:
8940 case ZERO_EXTEND:
8941 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8942 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8943
8944 if (mode == SImode)
8945 return total;
8946
8947 if (arm_arch6)
8948 return total + COSTS_N_INSNS (1);
8949
8950 /* Assume a two-shift sequence. Increase the cost slightly so
8951 we prefer actual shifts over an extend operation. */
8952 return total + 1 + COSTS_N_INSNS (2);
8953
8954 default:
8955 return 99;
8956 }
8957 }
8958
8959 /* Estimates the size cost of thumb1 instructions.
8960 For now most of the code is copied from thumb1_rtx_costs. We need more
8961 fine grain tuning when we have more related test cases. */
8962 static inline int
8963 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8964 {
8965 machine_mode mode = GET_MODE (x);
8966 int words, cost;
8967
8968 switch (code)
8969 {
8970 case ASHIFT:
8971 case ASHIFTRT:
8972 case LSHIFTRT:
8973 case ROTATERT:
8974 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8975
8976 case PLUS:
8977 case MINUS:
8978 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8979 defined by RTL expansion, especially for the expansion of
8980 multiplication. */
8981 if ((GET_CODE (XEXP (x, 0)) == MULT
8982 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8983 || (GET_CODE (XEXP (x, 1)) == MULT
8984 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8985 return COSTS_N_INSNS (2);
8986 /* Fall through. */
8987 case COMPARE:
8988 case NEG:
8989 case NOT:
8990 return COSTS_N_INSNS (1);
8991
8992 case MULT:
8993 if (CONST_INT_P (XEXP (x, 1)))
8994 {
8995 /* Thumb1 mul instruction can't operate on const. We must Load it
8996 into a register first. */
8997 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8998 /* For the targets which have a very small and high-latency multiply
8999 unit, we prefer to synthesize the mult with up to 5 instructions,
9000 giving a good balance between size and performance. */
9001 if (arm_arch6m && arm_m_profile_small_mul)
9002 return COSTS_N_INSNS (5);
9003 else
9004 return COSTS_N_INSNS (1) + const_size;
9005 }
9006 return COSTS_N_INSNS (1);
9007
9008 case SET:
9009 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9010 the mode. */
9011 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9012 cost = COSTS_N_INSNS (words);
9013 if (satisfies_constraint_J (SET_SRC (x))
9014 || satisfies_constraint_K (SET_SRC (x))
9015 /* Too big an immediate for a 2-byte mov, using MOVT. */
9016 || (CONST_INT_P (SET_SRC (x))
9017 && UINTVAL (SET_SRC (x)) >= 256
9018 && TARGET_HAVE_MOVT
9019 && satisfies_constraint_j (SET_SRC (x)))
9020 /* thumb1_movdi_insn. */
9021 || ((words > 1) && MEM_P (SET_SRC (x))))
9022 cost += COSTS_N_INSNS (1);
9023 return cost;
9024
9025 case CONST_INT:
9026 if (outer == SET)
9027 {
9028 if (UINTVAL (x) < 256)
9029 return COSTS_N_INSNS (1);
9030 /* movw is 4byte long. */
9031 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9032 return COSTS_N_INSNS (2);
9033 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9034 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9035 return COSTS_N_INSNS (2);
9036 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9037 if (thumb_shiftable_const (INTVAL (x)))
9038 return COSTS_N_INSNS (2);
9039 return COSTS_N_INSNS (3);
9040 }
9041 else if ((outer == PLUS || outer == COMPARE)
9042 && INTVAL (x) < 256 && INTVAL (x) > -256)
9043 return 0;
9044 else if ((outer == IOR || outer == XOR || outer == AND)
9045 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9046 return COSTS_N_INSNS (1);
9047 else if (outer == AND)
9048 {
9049 int i;
9050 /* This duplicates the tests in the andsi3 expander. */
9051 for (i = 9; i <= 31; i++)
9052 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9053 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9054 return COSTS_N_INSNS (2);
9055 }
9056 else if (outer == ASHIFT || outer == ASHIFTRT
9057 || outer == LSHIFTRT)
9058 return 0;
9059 return COSTS_N_INSNS (2);
9060
9061 case CONST:
9062 case CONST_DOUBLE:
9063 case LABEL_REF:
9064 case SYMBOL_REF:
9065 return COSTS_N_INSNS (3);
9066
9067 case UDIV:
9068 case UMOD:
9069 case DIV:
9070 case MOD:
9071 return 100;
9072
9073 case TRUNCATE:
9074 return 99;
9075
9076 case AND:
9077 case XOR:
9078 case IOR:
9079 return COSTS_N_INSNS (1);
9080
9081 case MEM:
9082 return (COSTS_N_INSNS (1)
9083 + COSTS_N_INSNS (1)
9084 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9085 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9086 ? COSTS_N_INSNS (1) : 0));
9087
9088 case IF_THEN_ELSE:
9089 /* XXX a guess. */
9090 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9091 return 14;
9092 return 2;
9093
9094 case ZERO_EXTEND:
9095 /* XXX still guessing. */
9096 switch (GET_MODE (XEXP (x, 0)))
9097 {
9098 case E_QImode:
9099 return (1 + (mode == DImode ? 4 : 0)
9100 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9101
9102 case E_HImode:
9103 return (4 + (mode == DImode ? 4 : 0)
9104 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9105
9106 case E_SImode:
9107 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9108
9109 default:
9110 return 99;
9111 }
9112
9113 default:
9114 return 99;
9115 }
9116 }
9117
9118 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9119 operand, then return the operand that is being shifted. If the shift
9120 is not by a constant, then set SHIFT_REG to point to the operand.
9121 Return NULL if OP is not a shifter operand. */
9122 static rtx
9123 shifter_op_p (rtx op, rtx *shift_reg)
9124 {
9125 enum rtx_code code = GET_CODE (op);
9126
9127 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9128 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9129 return XEXP (op, 0);
9130 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9131 return XEXP (op, 0);
9132 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9133 || code == ASHIFTRT)
9134 {
9135 if (!CONST_INT_P (XEXP (op, 1)))
9136 *shift_reg = XEXP (op, 1);
9137 return XEXP (op, 0);
9138 }
9139
9140 return NULL;
9141 }
9142
9143 static bool
9144 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9145 {
9146 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9147 rtx_code code = GET_CODE (x);
9148 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9149
9150 switch (XINT (x, 1))
9151 {
9152 case UNSPEC_UNALIGNED_LOAD:
9153 /* We can only do unaligned loads into the integer unit, and we can't
9154 use LDM or LDRD. */
9155 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9156 if (speed_p)
9157 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9158 + extra_cost->ldst.load_unaligned);
9159
9160 #ifdef NOT_YET
9161 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9162 ADDR_SPACE_GENERIC, speed_p);
9163 #endif
9164 return true;
9165
9166 case UNSPEC_UNALIGNED_STORE:
9167 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9168 if (speed_p)
9169 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9170 + extra_cost->ldst.store_unaligned);
9171
9172 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9173 #ifdef NOT_YET
9174 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9175 ADDR_SPACE_GENERIC, speed_p);
9176 #endif
9177 return true;
9178
9179 case UNSPEC_VRINTZ:
9180 case UNSPEC_VRINTP:
9181 case UNSPEC_VRINTM:
9182 case UNSPEC_VRINTR:
9183 case UNSPEC_VRINTX:
9184 case UNSPEC_VRINTA:
9185 if (speed_p)
9186 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9187
9188 return true;
9189 default:
9190 *cost = COSTS_N_INSNS (2);
9191 break;
9192 }
9193 return true;
9194 }
9195
9196 /* Cost of a libcall. We assume one insn per argument, an amount for the
9197 call (one insn for -Os) and then one for processing the result. */
9198 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9199
9200 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9201 do \
9202 { \
9203 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9204 if (shift_op != NULL \
9205 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9206 { \
9207 if (shift_reg) \
9208 { \
9209 if (speed_p) \
9210 *cost += extra_cost->alu.arith_shift_reg; \
9211 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9212 ASHIFT, 1, speed_p); \
9213 } \
9214 else if (speed_p) \
9215 *cost += extra_cost->alu.arith_shift; \
9216 \
9217 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9218 ASHIFT, 0, speed_p) \
9219 + rtx_cost (XEXP (x, 1 - IDX), \
9220 GET_MODE (shift_op), \
9221 OP, 1, speed_p)); \
9222 return true; \
9223 } \
9224 } \
9225 while (0);
9226
9227 /* RTX costs. Make an estimate of the cost of executing the operation
9228 X, which is contained with an operation with code OUTER_CODE.
9229 SPEED_P indicates whether the cost desired is the performance cost,
9230 or the size cost. The estimate is stored in COST and the return
9231 value is TRUE if the cost calculation is final, or FALSE if the
9232 caller should recurse through the operands of X to add additional
9233 costs.
9234
9235 We currently make no attempt to model the size savings of Thumb-2
9236 16-bit instructions. At the normal points in compilation where
9237 this code is called we have no measure of whether the condition
9238 flags are live or not, and thus no realistic way to determine what
9239 the size will eventually be. */
9240 static bool
9241 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9242 const struct cpu_cost_table *extra_cost,
9243 int *cost, bool speed_p)
9244 {
9245 machine_mode mode = GET_MODE (x);
9246
9247 *cost = COSTS_N_INSNS (1);
9248
9249 if (TARGET_THUMB1)
9250 {
9251 if (speed_p)
9252 *cost = thumb1_rtx_costs (x, code, outer_code);
9253 else
9254 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9255 return true;
9256 }
9257
9258 switch (code)
9259 {
9260 case SET:
9261 *cost = 0;
9262 /* SET RTXs don't have a mode so we get it from the destination. */
9263 mode = GET_MODE (SET_DEST (x));
9264
9265 if (REG_P (SET_SRC (x))
9266 && REG_P (SET_DEST (x)))
9267 {
9268 /* Assume that most copies can be done with a single insn,
9269 unless we don't have HW FP, in which case everything
9270 larger than word mode will require two insns. */
9271 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9272 && GET_MODE_SIZE (mode) > 4)
9273 || mode == DImode)
9274 ? 2 : 1);
9275 /* Conditional register moves can be encoded
9276 in 16 bits in Thumb mode. */
9277 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9278 *cost >>= 1;
9279
9280 return true;
9281 }
9282
9283 if (CONST_INT_P (SET_SRC (x)))
9284 {
9285 /* Handle CONST_INT here, since the value doesn't have a mode
9286 and we would otherwise be unable to work out the true cost. */
9287 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9288 0, speed_p);
9289 outer_code = SET;
9290 /* Slightly lower the cost of setting a core reg to a constant.
9291 This helps break up chains and allows for better scheduling. */
9292 if (REG_P (SET_DEST (x))
9293 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9294 *cost -= 1;
9295 x = SET_SRC (x);
9296 /* Immediate moves with an immediate in the range [0, 255] can be
9297 encoded in 16 bits in Thumb mode. */
9298 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9299 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9300 *cost >>= 1;
9301 goto const_int_cost;
9302 }
9303
9304 return false;
9305
9306 case MEM:
9307 /* A memory access costs 1 insn if the mode is small, or the address is
9308 a single register, otherwise it costs one insn per word. */
9309 if (REG_P (XEXP (x, 0)))
9310 *cost = COSTS_N_INSNS (1);
9311 else if (flag_pic
9312 && GET_CODE (XEXP (x, 0)) == PLUS
9313 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9314 /* This will be split into two instructions.
9315 See arm.md:calculate_pic_address. */
9316 *cost = COSTS_N_INSNS (2);
9317 else
9318 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9319
9320 /* For speed optimizations, add the costs of the address and
9321 accessing memory. */
9322 if (speed_p)
9323 #ifdef NOT_YET
9324 *cost += (extra_cost->ldst.load
9325 + arm_address_cost (XEXP (x, 0), mode,
9326 ADDR_SPACE_GENERIC, speed_p));
9327 #else
9328 *cost += extra_cost->ldst.load;
9329 #endif
9330 return true;
9331
9332 case PARALLEL:
9333 {
9334 /* Calculations of LDM costs are complex. We assume an initial cost
9335 (ldm_1st) which will load the number of registers mentioned in
9336 ldm_regs_per_insn_1st registers; then each additional
9337 ldm_regs_per_insn_subsequent registers cost one more insn. The
9338 formula for N regs is thus:
9339
9340 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9341 + ldm_regs_per_insn_subsequent - 1)
9342 / ldm_regs_per_insn_subsequent).
9343
9344 Additional costs may also be added for addressing. A similar
9345 formula is used for STM. */
9346
9347 bool is_ldm = load_multiple_operation (x, SImode);
9348 bool is_stm = store_multiple_operation (x, SImode);
9349
9350 if (is_ldm || is_stm)
9351 {
9352 if (speed_p)
9353 {
9354 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9355 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9356 ? extra_cost->ldst.ldm_regs_per_insn_1st
9357 : extra_cost->ldst.stm_regs_per_insn_1st;
9358 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9359 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9360 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9361
9362 *cost += regs_per_insn_1st
9363 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9364 + regs_per_insn_sub - 1)
9365 / regs_per_insn_sub);
9366 return true;
9367 }
9368
9369 }
9370 return false;
9371 }
9372 case DIV:
9373 case UDIV:
9374 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9375 && (mode == SFmode || !TARGET_VFP_SINGLE))
9376 *cost += COSTS_N_INSNS (speed_p
9377 ? extra_cost->fp[mode != SFmode].div : 0);
9378 else if (mode == SImode && TARGET_IDIV)
9379 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9380 else
9381 *cost = LIBCALL_COST (2);
9382
9383 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9384 possible udiv is prefered. */
9385 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9386 return false; /* All arguments must be in registers. */
9387
9388 case MOD:
9389 /* MOD by a power of 2 can be expanded as:
9390 rsbs r1, r0, #0
9391 and r0, r0, #(n - 1)
9392 and r1, r1, #(n - 1)
9393 rsbpl r0, r1, #0. */
9394 if (CONST_INT_P (XEXP (x, 1))
9395 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9396 && mode == SImode)
9397 {
9398 *cost += COSTS_N_INSNS (3);
9399
9400 if (speed_p)
9401 *cost += 2 * extra_cost->alu.logical
9402 + extra_cost->alu.arith;
9403 return true;
9404 }
9405
9406 /* Fall-through. */
9407 case UMOD:
9408 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9409 possible udiv is prefered. */
9410 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9411 return false; /* All arguments must be in registers. */
9412
9413 case ROTATE:
9414 if (mode == SImode && REG_P (XEXP (x, 1)))
9415 {
9416 *cost += (COSTS_N_INSNS (1)
9417 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9418 if (speed_p)
9419 *cost += extra_cost->alu.shift_reg;
9420 return true;
9421 }
9422 /* Fall through */
9423 case ROTATERT:
9424 case ASHIFT:
9425 case LSHIFTRT:
9426 case ASHIFTRT:
9427 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9428 {
9429 *cost += (COSTS_N_INSNS (2)
9430 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9431 if (speed_p)
9432 *cost += 2 * extra_cost->alu.shift;
9433 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9434 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9435 *cost += 1;
9436 return true;
9437 }
9438 else if (mode == SImode)
9439 {
9440 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9441 /* Slightly disparage register shifts at -Os, but not by much. */
9442 if (!CONST_INT_P (XEXP (x, 1)))
9443 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9444 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9445 return true;
9446 }
9447 else if (GET_MODE_CLASS (mode) == MODE_INT
9448 && GET_MODE_SIZE (mode) < 4)
9449 {
9450 if (code == ASHIFT)
9451 {
9452 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9453 /* Slightly disparage register shifts at -Os, but not by
9454 much. */
9455 if (!CONST_INT_P (XEXP (x, 1)))
9456 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9457 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9458 }
9459 else if (code == LSHIFTRT || code == ASHIFTRT)
9460 {
9461 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9462 {
9463 /* Can use SBFX/UBFX. */
9464 if (speed_p)
9465 *cost += extra_cost->alu.bfx;
9466 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9467 }
9468 else
9469 {
9470 *cost += COSTS_N_INSNS (1);
9471 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9472 if (speed_p)
9473 {
9474 if (CONST_INT_P (XEXP (x, 1)))
9475 *cost += 2 * extra_cost->alu.shift;
9476 else
9477 *cost += (extra_cost->alu.shift
9478 + extra_cost->alu.shift_reg);
9479 }
9480 else
9481 /* Slightly disparage register shifts. */
9482 *cost += !CONST_INT_P (XEXP (x, 1));
9483 }
9484 }
9485 else /* Rotates. */
9486 {
9487 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9488 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9489 if (speed_p)
9490 {
9491 if (CONST_INT_P (XEXP (x, 1)))
9492 *cost += (2 * extra_cost->alu.shift
9493 + extra_cost->alu.log_shift);
9494 else
9495 *cost += (extra_cost->alu.shift
9496 + extra_cost->alu.shift_reg
9497 + extra_cost->alu.log_shift_reg);
9498 }
9499 }
9500 return true;
9501 }
9502
9503 *cost = LIBCALL_COST (2);
9504 return false;
9505
9506 case BSWAP:
9507 if (arm_arch6)
9508 {
9509 if (mode == SImode)
9510 {
9511 if (speed_p)
9512 *cost += extra_cost->alu.rev;
9513
9514 return false;
9515 }
9516 }
9517 else
9518 {
9519 /* No rev instruction available. Look at arm_legacy_rev
9520 and thumb_legacy_rev for the form of RTL used then. */
9521 if (TARGET_THUMB)
9522 {
9523 *cost += COSTS_N_INSNS (9);
9524
9525 if (speed_p)
9526 {
9527 *cost += 6 * extra_cost->alu.shift;
9528 *cost += 3 * extra_cost->alu.logical;
9529 }
9530 }
9531 else
9532 {
9533 *cost += COSTS_N_INSNS (4);
9534
9535 if (speed_p)
9536 {
9537 *cost += 2 * extra_cost->alu.shift;
9538 *cost += extra_cost->alu.arith_shift;
9539 *cost += 2 * extra_cost->alu.logical;
9540 }
9541 }
9542 return true;
9543 }
9544 return false;
9545
9546 case MINUS:
9547 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9548 && (mode == SFmode || !TARGET_VFP_SINGLE))
9549 {
9550 if (GET_CODE (XEXP (x, 0)) == MULT
9551 || GET_CODE (XEXP (x, 1)) == MULT)
9552 {
9553 rtx mul_op0, mul_op1, sub_op;
9554
9555 if (speed_p)
9556 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9557
9558 if (GET_CODE (XEXP (x, 0)) == MULT)
9559 {
9560 mul_op0 = XEXP (XEXP (x, 0), 0);
9561 mul_op1 = XEXP (XEXP (x, 0), 1);
9562 sub_op = XEXP (x, 1);
9563 }
9564 else
9565 {
9566 mul_op0 = XEXP (XEXP (x, 1), 0);
9567 mul_op1 = XEXP (XEXP (x, 1), 1);
9568 sub_op = XEXP (x, 0);
9569 }
9570
9571 /* The first operand of the multiply may be optionally
9572 negated. */
9573 if (GET_CODE (mul_op0) == NEG)
9574 mul_op0 = XEXP (mul_op0, 0);
9575
9576 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9577 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9578 + rtx_cost (sub_op, mode, code, 0, speed_p));
9579
9580 return true;
9581 }
9582
9583 if (speed_p)
9584 *cost += extra_cost->fp[mode != SFmode].addsub;
9585 return false;
9586 }
9587
9588 if (mode == SImode)
9589 {
9590 rtx shift_by_reg = NULL;
9591 rtx shift_op;
9592 rtx non_shift_op;
9593
9594 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9595 if (shift_op == NULL)
9596 {
9597 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9598 non_shift_op = XEXP (x, 0);
9599 }
9600 else
9601 non_shift_op = XEXP (x, 1);
9602
9603 if (shift_op != NULL)
9604 {
9605 if (shift_by_reg != NULL)
9606 {
9607 if (speed_p)
9608 *cost += extra_cost->alu.arith_shift_reg;
9609 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9610 }
9611 else if (speed_p)
9612 *cost += extra_cost->alu.arith_shift;
9613
9614 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9615 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9616 return true;
9617 }
9618
9619 if (arm_arch_thumb2
9620 && GET_CODE (XEXP (x, 1)) == MULT)
9621 {
9622 /* MLS. */
9623 if (speed_p)
9624 *cost += extra_cost->mult[0].add;
9625 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9626 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9627 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9628 return true;
9629 }
9630
9631 if (CONST_INT_P (XEXP (x, 0)))
9632 {
9633 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9634 INTVAL (XEXP (x, 0)), NULL_RTX,
9635 NULL_RTX, 1, 0);
9636 *cost = COSTS_N_INSNS (insns);
9637 if (speed_p)
9638 *cost += insns * extra_cost->alu.arith;
9639 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9640 return true;
9641 }
9642 else if (speed_p)
9643 *cost += extra_cost->alu.arith;
9644
9645 return false;
9646 }
9647
9648 if (GET_MODE_CLASS (mode) == MODE_INT
9649 && GET_MODE_SIZE (mode) < 4)
9650 {
9651 rtx shift_op, shift_reg;
9652 shift_reg = NULL;
9653
9654 /* We check both sides of the MINUS for shifter operands since,
9655 unlike PLUS, it's not commutative. */
9656
9657 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9658 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9659
9660 /* Slightly disparage, as we might need to widen the result. */
9661 *cost += 1;
9662 if (speed_p)
9663 *cost += extra_cost->alu.arith;
9664
9665 if (CONST_INT_P (XEXP (x, 0)))
9666 {
9667 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9668 return true;
9669 }
9670
9671 return false;
9672 }
9673
9674 if (mode == DImode)
9675 {
9676 *cost += COSTS_N_INSNS (1);
9677
9678 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9679 {
9680 rtx op1 = XEXP (x, 1);
9681
9682 if (speed_p)
9683 *cost += 2 * extra_cost->alu.arith;
9684
9685 if (GET_CODE (op1) == ZERO_EXTEND)
9686 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9687 0, speed_p);
9688 else
9689 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9690 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9691 0, speed_p);
9692 return true;
9693 }
9694 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9695 {
9696 if (speed_p)
9697 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9698 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9699 0, speed_p)
9700 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9701 return true;
9702 }
9703 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9704 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9705 {
9706 if (speed_p)
9707 *cost += (extra_cost->alu.arith
9708 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9709 ? extra_cost->alu.arith
9710 : extra_cost->alu.arith_shift));
9711 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9712 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9713 GET_CODE (XEXP (x, 1)), 0, speed_p));
9714 return true;
9715 }
9716
9717 if (speed_p)
9718 *cost += 2 * extra_cost->alu.arith;
9719 return false;
9720 }
9721
9722 /* Vector mode? */
9723
9724 *cost = LIBCALL_COST (2);
9725 return false;
9726
9727 case PLUS:
9728 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9729 && (mode == SFmode || !TARGET_VFP_SINGLE))
9730 {
9731 if (GET_CODE (XEXP (x, 0)) == MULT)
9732 {
9733 rtx mul_op0, mul_op1, add_op;
9734
9735 if (speed_p)
9736 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9737
9738 mul_op0 = XEXP (XEXP (x, 0), 0);
9739 mul_op1 = XEXP (XEXP (x, 0), 1);
9740 add_op = XEXP (x, 1);
9741
9742 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9743 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9744 + rtx_cost (add_op, mode, code, 0, speed_p));
9745
9746 return true;
9747 }
9748
9749 if (speed_p)
9750 *cost += extra_cost->fp[mode != SFmode].addsub;
9751 return false;
9752 }
9753 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9754 {
9755 *cost = LIBCALL_COST (2);
9756 return false;
9757 }
9758
9759 /* Narrow modes can be synthesized in SImode, but the range
9760 of useful sub-operations is limited. Check for shift operations
9761 on one of the operands. Only left shifts can be used in the
9762 narrow modes. */
9763 if (GET_MODE_CLASS (mode) == MODE_INT
9764 && GET_MODE_SIZE (mode) < 4)
9765 {
9766 rtx shift_op, shift_reg;
9767 shift_reg = NULL;
9768
9769 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9770
9771 if (CONST_INT_P (XEXP (x, 1)))
9772 {
9773 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9774 INTVAL (XEXP (x, 1)), NULL_RTX,
9775 NULL_RTX, 1, 0);
9776 *cost = COSTS_N_INSNS (insns);
9777 if (speed_p)
9778 *cost += insns * extra_cost->alu.arith;
9779 /* Slightly penalize a narrow operation as the result may
9780 need widening. */
9781 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9782 return true;
9783 }
9784
9785 /* Slightly penalize a narrow operation as the result may
9786 need widening. */
9787 *cost += 1;
9788 if (speed_p)
9789 *cost += extra_cost->alu.arith;
9790
9791 return false;
9792 }
9793
9794 if (mode == SImode)
9795 {
9796 rtx shift_op, shift_reg;
9797
9798 if (TARGET_INT_SIMD
9799 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9800 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9801 {
9802 /* UXTA[BH] or SXTA[BH]. */
9803 if (speed_p)
9804 *cost += extra_cost->alu.extend_arith;
9805 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9806 0, speed_p)
9807 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9808 return true;
9809 }
9810
9811 shift_reg = NULL;
9812 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9813 if (shift_op != NULL)
9814 {
9815 if (shift_reg)
9816 {
9817 if (speed_p)
9818 *cost += extra_cost->alu.arith_shift_reg;
9819 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9820 }
9821 else if (speed_p)
9822 *cost += extra_cost->alu.arith_shift;
9823
9824 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9825 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9826 return true;
9827 }
9828 if (GET_CODE (XEXP (x, 0)) == MULT)
9829 {
9830 rtx mul_op = XEXP (x, 0);
9831
9832 if (TARGET_DSP_MULTIPLY
9833 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9834 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9835 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9836 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9837 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9838 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9839 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9840 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9841 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9842 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9843 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9844 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9845 == 16))))))
9846 {
9847 /* SMLA[BT][BT]. */
9848 if (speed_p)
9849 *cost += extra_cost->mult[0].extend_add;
9850 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9851 SIGN_EXTEND, 0, speed_p)
9852 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9853 SIGN_EXTEND, 0, speed_p)
9854 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9855 return true;
9856 }
9857
9858 if (speed_p)
9859 *cost += extra_cost->mult[0].add;
9860 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9861 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9862 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9863 return true;
9864 }
9865 if (CONST_INT_P (XEXP (x, 1)))
9866 {
9867 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9868 INTVAL (XEXP (x, 1)), NULL_RTX,
9869 NULL_RTX, 1, 0);
9870 *cost = COSTS_N_INSNS (insns);
9871 if (speed_p)
9872 *cost += insns * extra_cost->alu.arith;
9873 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9874 return true;
9875 }
9876 else if (speed_p)
9877 *cost += extra_cost->alu.arith;
9878
9879 return false;
9880 }
9881
9882 if (mode == DImode)
9883 {
9884 if (arm_arch3m
9885 && GET_CODE (XEXP (x, 0)) == MULT
9886 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9887 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9888 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9889 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9890 {
9891 if (speed_p)
9892 *cost += extra_cost->mult[1].extend_add;
9893 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9894 ZERO_EXTEND, 0, speed_p)
9895 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9896 ZERO_EXTEND, 0, speed_p)
9897 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9898 return true;
9899 }
9900
9901 *cost += COSTS_N_INSNS (1);
9902
9903 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9904 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9905 {
9906 if (speed_p)
9907 *cost += (extra_cost->alu.arith
9908 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9909 ? extra_cost->alu.arith
9910 : extra_cost->alu.arith_shift));
9911
9912 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9913 0, speed_p)
9914 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9915 return true;
9916 }
9917
9918 if (speed_p)
9919 *cost += 2 * extra_cost->alu.arith;
9920 return false;
9921 }
9922
9923 /* Vector mode? */
9924 *cost = LIBCALL_COST (2);
9925 return false;
9926 case IOR:
9927 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9928 {
9929 if (speed_p)
9930 *cost += extra_cost->alu.rev;
9931
9932 return true;
9933 }
9934 /* Fall through. */
9935 case AND: case XOR:
9936 if (mode == SImode)
9937 {
9938 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9939 rtx op0 = XEXP (x, 0);
9940 rtx shift_op, shift_reg;
9941
9942 if (subcode == NOT
9943 && (code == AND
9944 || (code == IOR && TARGET_THUMB2)))
9945 op0 = XEXP (op0, 0);
9946
9947 shift_reg = NULL;
9948 shift_op = shifter_op_p (op0, &shift_reg);
9949 if (shift_op != NULL)
9950 {
9951 if (shift_reg)
9952 {
9953 if (speed_p)
9954 *cost += extra_cost->alu.log_shift_reg;
9955 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9956 }
9957 else if (speed_p)
9958 *cost += extra_cost->alu.log_shift;
9959
9960 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9961 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9962 return true;
9963 }
9964
9965 if (CONST_INT_P (XEXP (x, 1)))
9966 {
9967 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9968 INTVAL (XEXP (x, 1)), NULL_RTX,
9969 NULL_RTX, 1, 0);
9970
9971 *cost = COSTS_N_INSNS (insns);
9972 if (speed_p)
9973 *cost += insns * extra_cost->alu.logical;
9974 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9975 return true;
9976 }
9977
9978 if (speed_p)
9979 *cost += extra_cost->alu.logical;
9980 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9981 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9982 return true;
9983 }
9984
9985 if (mode == DImode)
9986 {
9987 rtx op0 = XEXP (x, 0);
9988 enum rtx_code subcode = GET_CODE (op0);
9989
9990 *cost += COSTS_N_INSNS (1);
9991
9992 if (subcode == NOT
9993 && (code == AND
9994 || (code == IOR && TARGET_THUMB2)))
9995 op0 = XEXP (op0, 0);
9996
9997 if (GET_CODE (op0) == ZERO_EXTEND)
9998 {
9999 if (speed_p)
10000 *cost += 2 * extra_cost->alu.logical;
10001
10002 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10003 0, speed_p)
10004 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10005 return true;
10006 }
10007 else if (GET_CODE (op0) == SIGN_EXTEND)
10008 {
10009 if (speed_p)
10010 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10011
10012 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10013 0, speed_p)
10014 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10015 return true;
10016 }
10017
10018 if (speed_p)
10019 *cost += 2 * extra_cost->alu.logical;
10020
10021 return true;
10022 }
10023 /* Vector mode? */
10024
10025 *cost = LIBCALL_COST (2);
10026 return false;
10027
10028 case MULT:
10029 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10030 && (mode == SFmode || !TARGET_VFP_SINGLE))
10031 {
10032 rtx op0 = XEXP (x, 0);
10033
10034 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10035 op0 = XEXP (op0, 0);
10036
10037 if (speed_p)
10038 *cost += extra_cost->fp[mode != SFmode].mult;
10039
10040 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10041 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10042 return true;
10043 }
10044 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10045 {
10046 *cost = LIBCALL_COST (2);
10047 return false;
10048 }
10049
10050 if (mode == SImode)
10051 {
10052 if (TARGET_DSP_MULTIPLY
10053 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10054 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10055 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10056 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10057 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10058 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10059 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10060 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10061 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10062 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10063 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10064 && (INTVAL (XEXP (XEXP (x, 1), 1))
10065 == 16))))))
10066 {
10067 /* SMUL[TB][TB]. */
10068 if (speed_p)
10069 *cost += extra_cost->mult[0].extend;
10070 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10071 SIGN_EXTEND, 0, speed_p);
10072 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10073 SIGN_EXTEND, 1, speed_p);
10074 return true;
10075 }
10076 if (speed_p)
10077 *cost += extra_cost->mult[0].simple;
10078 return false;
10079 }
10080
10081 if (mode == DImode)
10082 {
10083 if (arm_arch3m
10084 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10085 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10086 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10087 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10088 {
10089 if (speed_p)
10090 *cost += extra_cost->mult[1].extend;
10091 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10092 ZERO_EXTEND, 0, speed_p)
10093 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10094 ZERO_EXTEND, 0, speed_p));
10095 return true;
10096 }
10097
10098 *cost = LIBCALL_COST (2);
10099 return false;
10100 }
10101
10102 /* Vector mode? */
10103 *cost = LIBCALL_COST (2);
10104 return false;
10105
10106 case NEG:
10107 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10108 && (mode == SFmode || !TARGET_VFP_SINGLE))
10109 {
10110 if (GET_CODE (XEXP (x, 0)) == MULT)
10111 {
10112 /* VNMUL. */
10113 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10114 return true;
10115 }
10116
10117 if (speed_p)
10118 *cost += extra_cost->fp[mode != SFmode].neg;
10119
10120 return false;
10121 }
10122 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10123 {
10124 *cost = LIBCALL_COST (1);
10125 return false;
10126 }
10127
10128 if (mode == SImode)
10129 {
10130 if (GET_CODE (XEXP (x, 0)) == ABS)
10131 {
10132 *cost += COSTS_N_INSNS (1);
10133 /* Assume the non-flag-changing variant. */
10134 if (speed_p)
10135 *cost += (extra_cost->alu.log_shift
10136 + extra_cost->alu.arith_shift);
10137 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10138 return true;
10139 }
10140
10141 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10142 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10143 {
10144 *cost += COSTS_N_INSNS (1);
10145 /* No extra cost for MOV imm and MVN imm. */
10146 /* If the comparison op is using the flags, there's no further
10147 cost, otherwise we need to add the cost of the comparison. */
10148 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10149 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10150 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10151 {
10152 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10153 *cost += (COSTS_N_INSNS (1)
10154 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10155 0, speed_p)
10156 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10157 1, speed_p));
10158 if (speed_p)
10159 *cost += extra_cost->alu.arith;
10160 }
10161 return true;
10162 }
10163
10164 if (speed_p)
10165 *cost += extra_cost->alu.arith;
10166 return false;
10167 }
10168
10169 if (GET_MODE_CLASS (mode) == MODE_INT
10170 && GET_MODE_SIZE (mode) < 4)
10171 {
10172 /* Slightly disparage, as we might need an extend operation. */
10173 *cost += 1;
10174 if (speed_p)
10175 *cost += extra_cost->alu.arith;
10176 return false;
10177 }
10178
10179 if (mode == DImode)
10180 {
10181 *cost += COSTS_N_INSNS (1);
10182 if (speed_p)
10183 *cost += 2 * extra_cost->alu.arith;
10184 return false;
10185 }
10186
10187 /* Vector mode? */
10188 *cost = LIBCALL_COST (1);
10189 return false;
10190
10191 case NOT:
10192 if (mode == SImode)
10193 {
10194 rtx shift_op;
10195 rtx shift_reg = NULL;
10196
10197 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10198
10199 if (shift_op)
10200 {
10201 if (shift_reg != NULL)
10202 {
10203 if (speed_p)
10204 *cost += extra_cost->alu.log_shift_reg;
10205 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10206 }
10207 else if (speed_p)
10208 *cost += extra_cost->alu.log_shift;
10209 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10210 return true;
10211 }
10212
10213 if (speed_p)
10214 *cost += extra_cost->alu.logical;
10215 return false;
10216 }
10217 if (mode == DImode)
10218 {
10219 *cost += COSTS_N_INSNS (1);
10220 return false;
10221 }
10222
10223 /* Vector mode? */
10224
10225 *cost += LIBCALL_COST (1);
10226 return false;
10227
10228 case IF_THEN_ELSE:
10229 {
10230 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10231 {
10232 *cost += COSTS_N_INSNS (3);
10233 return true;
10234 }
10235 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10236 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10237
10238 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10239 /* Assume that if one arm of the if_then_else is a register,
10240 that it will be tied with the result and eliminate the
10241 conditional insn. */
10242 if (REG_P (XEXP (x, 1)))
10243 *cost += op2cost;
10244 else if (REG_P (XEXP (x, 2)))
10245 *cost += op1cost;
10246 else
10247 {
10248 if (speed_p)
10249 {
10250 if (extra_cost->alu.non_exec_costs_exec)
10251 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10252 else
10253 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10254 }
10255 else
10256 *cost += op1cost + op2cost;
10257 }
10258 }
10259 return true;
10260
10261 case COMPARE:
10262 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10263 *cost = 0;
10264 else
10265 {
10266 machine_mode op0mode;
10267 /* We'll mostly assume that the cost of a compare is the cost of the
10268 LHS. However, there are some notable exceptions. */
10269
10270 /* Floating point compares are never done as side-effects. */
10271 op0mode = GET_MODE (XEXP (x, 0));
10272 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10273 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10274 {
10275 if (speed_p)
10276 *cost += extra_cost->fp[op0mode != SFmode].compare;
10277
10278 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10279 {
10280 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10281 return true;
10282 }
10283
10284 return false;
10285 }
10286 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10287 {
10288 *cost = LIBCALL_COST (2);
10289 return false;
10290 }
10291
10292 /* DImode compares normally take two insns. */
10293 if (op0mode == DImode)
10294 {
10295 *cost += COSTS_N_INSNS (1);
10296 if (speed_p)
10297 *cost += 2 * extra_cost->alu.arith;
10298 return false;
10299 }
10300
10301 if (op0mode == SImode)
10302 {
10303 rtx shift_op;
10304 rtx shift_reg;
10305
10306 if (XEXP (x, 1) == const0_rtx
10307 && !(REG_P (XEXP (x, 0))
10308 || (GET_CODE (XEXP (x, 0)) == SUBREG
10309 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10310 {
10311 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10312
10313 /* Multiply operations that set the flags are often
10314 significantly more expensive. */
10315 if (speed_p
10316 && GET_CODE (XEXP (x, 0)) == MULT
10317 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10318 *cost += extra_cost->mult[0].flag_setting;
10319
10320 if (speed_p
10321 && GET_CODE (XEXP (x, 0)) == PLUS
10322 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10323 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10324 0), 1), mode))
10325 *cost += extra_cost->mult[0].flag_setting;
10326 return true;
10327 }
10328
10329 shift_reg = NULL;
10330 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10331 if (shift_op != NULL)
10332 {
10333 if (shift_reg != NULL)
10334 {
10335 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10336 1, speed_p);
10337 if (speed_p)
10338 *cost += extra_cost->alu.arith_shift_reg;
10339 }
10340 else if (speed_p)
10341 *cost += extra_cost->alu.arith_shift;
10342 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10343 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10344 return true;
10345 }
10346
10347 if (speed_p)
10348 *cost += extra_cost->alu.arith;
10349 if (CONST_INT_P (XEXP (x, 1))
10350 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10351 {
10352 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10353 return true;
10354 }
10355 return false;
10356 }
10357
10358 /* Vector mode? */
10359
10360 *cost = LIBCALL_COST (2);
10361 return false;
10362 }
10363 return true;
10364
10365 case EQ:
10366 case NE:
10367 case LT:
10368 case LE:
10369 case GT:
10370 case GE:
10371 case LTU:
10372 case LEU:
10373 case GEU:
10374 case GTU:
10375 case ORDERED:
10376 case UNORDERED:
10377 case UNEQ:
10378 case UNLE:
10379 case UNLT:
10380 case UNGE:
10381 case UNGT:
10382 case LTGT:
10383 if (outer_code == SET)
10384 {
10385 /* Is it a store-flag operation? */
10386 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10387 && XEXP (x, 1) == const0_rtx)
10388 {
10389 /* Thumb also needs an IT insn. */
10390 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10391 return true;
10392 }
10393 if (XEXP (x, 1) == const0_rtx)
10394 {
10395 switch (code)
10396 {
10397 case LT:
10398 /* LSR Rd, Rn, #31. */
10399 if (speed_p)
10400 *cost += extra_cost->alu.shift;
10401 break;
10402
10403 case EQ:
10404 /* RSBS T1, Rn, #0
10405 ADC Rd, Rn, T1. */
10406
10407 case NE:
10408 /* SUBS T1, Rn, #1
10409 SBC Rd, Rn, T1. */
10410 *cost += COSTS_N_INSNS (1);
10411 break;
10412
10413 case LE:
10414 /* RSBS T1, Rn, Rn, LSR #31
10415 ADC Rd, Rn, T1. */
10416 *cost += COSTS_N_INSNS (1);
10417 if (speed_p)
10418 *cost += extra_cost->alu.arith_shift;
10419 break;
10420
10421 case GT:
10422 /* RSB Rd, Rn, Rn, ASR #1
10423 LSR Rd, Rd, #31. */
10424 *cost += COSTS_N_INSNS (1);
10425 if (speed_p)
10426 *cost += (extra_cost->alu.arith_shift
10427 + extra_cost->alu.shift);
10428 break;
10429
10430 case GE:
10431 /* ASR Rd, Rn, #31
10432 ADD Rd, Rn, #1. */
10433 *cost += COSTS_N_INSNS (1);
10434 if (speed_p)
10435 *cost += extra_cost->alu.shift;
10436 break;
10437
10438 default:
10439 /* Remaining cases are either meaningless or would take
10440 three insns anyway. */
10441 *cost = COSTS_N_INSNS (3);
10442 break;
10443 }
10444 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10445 return true;
10446 }
10447 else
10448 {
10449 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10450 if (CONST_INT_P (XEXP (x, 1))
10451 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10452 {
10453 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10454 return true;
10455 }
10456
10457 return false;
10458 }
10459 }
10460 /* Not directly inside a set. If it involves the condition code
10461 register it must be the condition for a branch, cond_exec or
10462 I_T_E operation. Since the comparison is performed elsewhere
10463 this is just the control part which has no additional
10464 cost. */
10465 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10466 && XEXP (x, 1) == const0_rtx)
10467 {
10468 *cost = 0;
10469 return true;
10470 }
10471 return false;
10472
10473 case ABS:
10474 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10475 && (mode == SFmode || !TARGET_VFP_SINGLE))
10476 {
10477 if (speed_p)
10478 *cost += extra_cost->fp[mode != SFmode].neg;
10479
10480 return false;
10481 }
10482 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10483 {
10484 *cost = LIBCALL_COST (1);
10485 return false;
10486 }
10487
10488 if (mode == SImode)
10489 {
10490 if (speed_p)
10491 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10492 return false;
10493 }
10494 /* Vector mode? */
10495 *cost = LIBCALL_COST (1);
10496 return false;
10497
10498 case SIGN_EXTEND:
10499 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10500 && MEM_P (XEXP (x, 0)))
10501 {
10502 if (mode == DImode)
10503 *cost += COSTS_N_INSNS (1);
10504
10505 if (!speed_p)
10506 return true;
10507
10508 if (GET_MODE (XEXP (x, 0)) == SImode)
10509 *cost += extra_cost->ldst.load;
10510 else
10511 *cost += extra_cost->ldst.load_sign_extend;
10512
10513 if (mode == DImode)
10514 *cost += extra_cost->alu.shift;
10515
10516 return true;
10517 }
10518
10519 /* Widening from less than 32-bits requires an extend operation. */
10520 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10521 {
10522 /* We have SXTB/SXTH. */
10523 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10524 if (speed_p)
10525 *cost += extra_cost->alu.extend;
10526 }
10527 else if (GET_MODE (XEXP (x, 0)) != SImode)
10528 {
10529 /* Needs two shifts. */
10530 *cost += COSTS_N_INSNS (1);
10531 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10532 if (speed_p)
10533 *cost += 2 * extra_cost->alu.shift;
10534 }
10535
10536 /* Widening beyond 32-bits requires one more insn. */
10537 if (mode == DImode)
10538 {
10539 *cost += COSTS_N_INSNS (1);
10540 if (speed_p)
10541 *cost += extra_cost->alu.shift;
10542 }
10543
10544 return true;
10545
10546 case ZERO_EXTEND:
10547 if ((arm_arch4
10548 || GET_MODE (XEXP (x, 0)) == SImode
10549 || GET_MODE (XEXP (x, 0)) == QImode)
10550 && MEM_P (XEXP (x, 0)))
10551 {
10552 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10553
10554 if (mode == DImode)
10555 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10556
10557 return true;
10558 }
10559
10560 /* Widening from less than 32-bits requires an extend operation. */
10561 if (GET_MODE (XEXP (x, 0)) == QImode)
10562 {
10563 /* UXTB can be a shorter instruction in Thumb2, but it might
10564 be slower than the AND Rd, Rn, #255 alternative. When
10565 optimizing for speed it should never be slower to use
10566 AND, and we don't really model 16-bit vs 32-bit insns
10567 here. */
10568 if (speed_p)
10569 *cost += extra_cost->alu.logical;
10570 }
10571 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10572 {
10573 /* We have UXTB/UXTH. */
10574 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10575 if (speed_p)
10576 *cost += extra_cost->alu.extend;
10577 }
10578 else if (GET_MODE (XEXP (x, 0)) != SImode)
10579 {
10580 /* Needs two shifts. It's marginally preferable to use
10581 shifts rather than two BIC instructions as the second
10582 shift may merge with a subsequent insn as a shifter
10583 op. */
10584 *cost = COSTS_N_INSNS (2);
10585 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10586 if (speed_p)
10587 *cost += 2 * extra_cost->alu.shift;
10588 }
10589
10590 /* Widening beyond 32-bits requires one more insn. */
10591 if (mode == DImode)
10592 {
10593 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10594 }
10595
10596 return true;
10597
10598 case CONST_INT:
10599 *cost = 0;
10600 /* CONST_INT has no mode, so we cannot tell for sure how many
10601 insns are really going to be needed. The best we can do is
10602 look at the value passed. If it fits in SImode, then assume
10603 that's the mode it will be used for. Otherwise assume it
10604 will be used in DImode. */
10605 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10606 mode = SImode;
10607 else
10608 mode = DImode;
10609
10610 /* Avoid blowing up in arm_gen_constant (). */
10611 if (!(outer_code == PLUS
10612 || outer_code == AND
10613 || outer_code == IOR
10614 || outer_code == XOR
10615 || outer_code == MINUS))
10616 outer_code = SET;
10617
10618 const_int_cost:
10619 if (mode == SImode)
10620 {
10621 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10622 INTVAL (x), NULL, NULL,
10623 0, 0));
10624 /* Extra costs? */
10625 }
10626 else
10627 {
10628 *cost += COSTS_N_INSNS (arm_gen_constant
10629 (outer_code, SImode, NULL,
10630 trunc_int_for_mode (INTVAL (x), SImode),
10631 NULL, NULL, 0, 0)
10632 + arm_gen_constant (outer_code, SImode, NULL,
10633 INTVAL (x) >> 32, NULL,
10634 NULL, 0, 0));
10635 /* Extra costs? */
10636 }
10637
10638 return true;
10639
10640 case CONST:
10641 case LABEL_REF:
10642 case SYMBOL_REF:
10643 if (speed_p)
10644 {
10645 if (arm_arch_thumb2 && !flag_pic)
10646 *cost += COSTS_N_INSNS (1);
10647 else
10648 *cost += extra_cost->ldst.load;
10649 }
10650 else
10651 *cost += COSTS_N_INSNS (1);
10652
10653 if (flag_pic)
10654 {
10655 *cost += COSTS_N_INSNS (1);
10656 if (speed_p)
10657 *cost += extra_cost->alu.arith;
10658 }
10659
10660 return true;
10661
10662 case CONST_FIXED:
10663 *cost = COSTS_N_INSNS (4);
10664 /* Fixme. */
10665 return true;
10666
10667 case CONST_DOUBLE:
10668 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10669 && (mode == SFmode || !TARGET_VFP_SINGLE))
10670 {
10671 if (vfp3_const_double_rtx (x))
10672 {
10673 if (speed_p)
10674 *cost += extra_cost->fp[mode == DFmode].fpconst;
10675 return true;
10676 }
10677
10678 if (speed_p)
10679 {
10680 if (mode == DFmode)
10681 *cost += extra_cost->ldst.loadd;
10682 else
10683 *cost += extra_cost->ldst.loadf;
10684 }
10685 else
10686 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10687
10688 return true;
10689 }
10690 *cost = COSTS_N_INSNS (4);
10691 return true;
10692
10693 case CONST_VECTOR:
10694 /* Fixme. */
10695 if (TARGET_NEON
10696 && TARGET_HARD_FLOAT
10697 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10698 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10699 *cost = COSTS_N_INSNS (1);
10700 else
10701 *cost = COSTS_N_INSNS (4);
10702 return true;
10703
10704 case HIGH:
10705 case LO_SUM:
10706 /* When optimizing for size, we prefer constant pool entries to
10707 MOVW/MOVT pairs, so bump the cost of these slightly. */
10708 if (!speed_p)
10709 *cost += 1;
10710 return true;
10711
10712 case CLZ:
10713 if (speed_p)
10714 *cost += extra_cost->alu.clz;
10715 return false;
10716
10717 case SMIN:
10718 if (XEXP (x, 1) == const0_rtx)
10719 {
10720 if (speed_p)
10721 *cost += extra_cost->alu.log_shift;
10722 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10723 return true;
10724 }
10725 /* Fall through. */
10726 case SMAX:
10727 case UMIN:
10728 case UMAX:
10729 *cost += COSTS_N_INSNS (1);
10730 return false;
10731
10732 case TRUNCATE:
10733 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10734 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10735 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10736 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10737 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10738 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10739 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10740 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10741 == ZERO_EXTEND))))
10742 {
10743 if (speed_p)
10744 *cost += extra_cost->mult[1].extend;
10745 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10746 ZERO_EXTEND, 0, speed_p)
10747 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10748 ZERO_EXTEND, 0, speed_p));
10749 return true;
10750 }
10751 *cost = LIBCALL_COST (1);
10752 return false;
10753
10754 case UNSPEC_VOLATILE:
10755 case UNSPEC:
10756 return arm_unspec_cost (x, outer_code, speed_p, cost);
10757
10758 case PC:
10759 /* Reading the PC is like reading any other register. Writing it
10760 is more expensive, but we take that into account elsewhere. */
10761 *cost = 0;
10762 return true;
10763
10764 case ZERO_EXTRACT:
10765 /* TODO: Simple zero_extract of bottom bits using AND. */
10766 /* Fall through. */
10767 case SIGN_EXTRACT:
10768 if (arm_arch6
10769 && mode == SImode
10770 && CONST_INT_P (XEXP (x, 1))
10771 && CONST_INT_P (XEXP (x, 2)))
10772 {
10773 if (speed_p)
10774 *cost += extra_cost->alu.bfx;
10775 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10776 return true;
10777 }
10778 /* Without UBFX/SBFX, need to resort to shift operations. */
10779 *cost += COSTS_N_INSNS (1);
10780 if (speed_p)
10781 *cost += 2 * extra_cost->alu.shift;
10782 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10783 return true;
10784
10785 case FLOAT_EXTEND:
10786 if (TARGET_HARD_FLOAT)
10787 {
10788 if (speed_p)
10789 *cost += extra_cost->fp[mode == DFmode].widen;
10790 if (!TARGET_VFP5
10791 && GET_MODE (XEXP (x, 0)) == HFmode)
10792 {
10793 /* Pre v8, widening HF->DF is a two-step process, first
10794 widening to SFmode. */
10795 *cost += COSTS_N_INSNS (1);
10796 if (speed_p)
10797 *cost += extra_cost->fp[0].widen;
10798 }
10799 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10800 return true;
10801 }
10802
10803 *cost = LIBCALL_COST (1);
10804 return false;
10805
10806 case FLOAT_TRUNCATE:
10807 if (TARGET_HARD_FLOAT)
10808 {
10809 if (speed_p)
10810 *cost += extra_cost->fp[mode == DFmode].narrow;
10811 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10812 return true;
10813 /* Vector modes? */
10814 }
10815 *cost = LIBCALL_COST (1);
10816 return false;
10817
10818 case FMA:
10819 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10820 {
10821 rtx op0 = XEXP (x, 0);
10822 rtx op1 = XEXP (x, 1);
10823 rtx op2 = XEXP (x, 2);
10824
10825
10826 /* vfms or vfnma. */
10827 if (GET_CODE (op0) == NEG)
10828 op0 = XEXP (op0, 0);
10829
10830 /* vfnms or vfnma. */
10831 if (GET_CODE (op2) == NEG)
10832 op2 = XEXP (op2, 0);
10833
10834 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10835 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10836 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10837
10838 if (speed_p)
10839 *cost += extra_cost->fp[mode ==DFmode].fma;
10840
10841 return true;
10842 }
10843
10844 *cost = LIBCALL_COST (3);
10845 return false;
10846
10847 case FIX:
10848 case UNSIGNED_FIX:
10849 if (TARGET_HARD_FLOAT)
10850 {
10851 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10852 a vcvt fixed-point conversion. */
10853 if (code == FIX && mode == SImode
10854 && GET_CODE (XEXP (x, 0)) == FIX
10855 && GET_MODE (XEXP (x, 0)) == SFmode
10856 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10857 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10858 > 0)
10859 {
10860 if (speed_p)
10861 *cost += extra_cost->fp[0].toint;
10862
10863 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10864 code, 0, speed_p);
10865 return true;
10866 }
10867
10868 if (GET_MODE_CLASS (mode) == MODE_INT)
10869 {
10870 mode = GET_MODE (XEXP (x, 0));
10871 if (speed_p)
10872 *cost += extra_cost->fp[mode == DFmode].toint;
10873 /* Strip of the 'cost' of rounding towards zero. */
10874 if (GET_CODE (XEXP (x, 0)) == FIX)
10875 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10876 0, speed_p);
10877 else
10878 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10879 /* ??? Increase the cost to deal with transferring from
10880 FP -> CORE registers? */
10881 return true;
10882 }
10883 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10884 && TARGET_VFP5)
10885 {
10886 if (speed_p)
10887 *cost += extra_cost->fp[mode == DFmode].roundint;
10888 return false;
10889 }
10890 /* Vector costs? */
10891 }
10892 *cost = LIBCALL_COST (1);
10893 return false;
10894
10895 case FLOAT:
10896 case UNSIGNED_FLOAT:
10897 if (TARGET_HARD_FLOAT)
10898 {
10899 /* ??? Increase the cost to deal with transferring from CORE
10900 -> FP registers? */
10901 if (speed_p)
10902 *cost += extra_cost->fp[mode == DFmode].fromint;
10903 return false;
10904 }
10905 *cost = LIBCALL_COST (1);
10906 return false;
10907
10908 case CALL:
10909 return true;
10910
10911 case ASM_OPERANDS:
10912 {
10913 /* Just a guess. Guess number of instructions in the asm
10914 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10915 though (see PR60663). */
10916 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10917 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10918
10919 *cost = COSTS_N_INSNS (asm_length + num_operands);
10920 return true;
10921 }
10922 default:
10923 if (mode != VOIDmode)
10924 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10925 else
10926 *cost = COSTS_N_INSNS (4); /* Who knows? */
10927 return false;
10928 }
10929 }
10930
10931 #undef HANDLE_NARROW_SHIFT_ARITH
10932
10933 /* RTX costs entry point. */
10934
10935 static bool
10936 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10937 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10938 {
10939 bool result;
10940 int code = GET_CODE (x);
10941 gcc_assert (current_tune->insn_extra_cost);
10942
10943 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10944 (enum rtx_code) outer_code,
10945 current_tune->insn_extra_cost,
10946 total, speed);
10947
10948 if (dump_file && (dump_flags & TDF_DETAILS))
10949 {
10950 print_rtl_single (dump_file, x);
10951 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10952 *total, result ? "final" : "partial");
10953 }
10954 return result;
10955 }
10956
10957 /* All address computations that can be done are free, but rtx cost returns
10958 the same for practically all of them. So we weight the different types
10959 of address here in the order (most pref first):
10960 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10961 static inline int
10962 arm_arm_address_cost (rtx x)
10963 {
10964 enum rtx_code c = GET_CODE (x);
10965
10966 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10967 return 0;
10968 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10969 return 10;
10970
10971 if (c == PLUS)
10972 {
10973 if (CONST_INT_P (XEXP (x, 1)))
10974 return 2;
10975
10976 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10977 return 3;
10978
10979 return 4;
10980 }
10981
10982 return 6;
10983 }
10984
10985 static inline int
10986 arm_thumb_address_cost (rtx x)
10987 {
10988 enum rtx_code c = GET_CODE (x);
10989
10990 if (c == REG)
10991 return 1;
10992 if (c == PLUS
10993 && REG_P (XEXP (x, 0))
10994 && CONST_INT_P (XEXP (x, 1)))
10995 return 1;
10996
10997 return 2;
10998 }
10999
11000 static int
11001 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11002 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11003 {
11004 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11005 }
11006
11007 /* Adjust cost hook for XScale. */
11008 static bool
11009 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11010 int * cost)
11011 {
11012 /* Some true dependencies can have a higher cost depending
11013 on precisely how certain input operands are used. */
11014 if (dep_type == 0
11015 && recog_memoized (insn) >= 0
11016 && recog_memoized (dep) >= 0)
11017 {
11018 int shift_opnum = get_attr_shift (insn);
11019 enum attr_type attr_type = get_attr_type (dep);
11020
11021 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11022 operand for INSN. If we have a shifted input operand and the
11023 instruction we depend on is another ALU instruction, then we may
11024 have to account for an additional stall. */
11025 if (shift_opnum != 0
11026 && (attr_type == TYPE_ALU_SHIFT_IMM
11027 || attr_type == TYPE_ALUS_SHIFT_IMM
11028 || attr_type == TYPE_LOGIC_SHIFT_IMM
11029 || attr_type == TYPE_LOGICS_SHIFT_IMM
11030 || attr_type == TYPE_ALU_SHIFT_REG
11031 || attr_type == TYPE_ALUS_SHIFT_REG
11032 || attr_type == TYPE_LOGIC_SHIFT_REG
11033 || attr_type == TYPE_LOGICS_SHIFT_REG
11034 || attr_type == TYPE_MOV_SHIFT
11035 || attr_type == TYPE_MVN_SHIFT
11036 || attr_type == TYPE_MOV_SHIFT_REG
11037 || attr_type == TYPE_MVN_SHIFT_REG))
11038 {
11039 rtx shifted_operand;
11040 int opno;
11041
11042 /* Get the shifted operand. */
11043 extract_insn (insn);
11044 shifted_operand = recog_data.operand[shift_opnum];
11045
11046 /* Iterate over all the operands in DEP. If we write an operand
11047 that overlaps with SHIFTED_OPERAND, then we have increase the
11048 cost of this dependency. */
11049 extract_insn (dep);
11050 preprocess_constraints (dep);
11051 for (opno = 0; opno < recog_data.n_operands; opno++)
11052 {
11053 /* We can ignore strict inputs. */
11054 if (recog_data.operand_type[opno] == OP_IN)
11055 continue;
11056
11057 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11058 shifted_operand))
11059 {
11060 *cost = 2;
11061 return false;
11062 }
11063 }
11064 }
11065 }
11066 return true;
11067 }
11068
11069 /* Adjust cost hook for Cortex A9. */
11070 static bool
11071 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11072 int * cost)
11073 {
11074 switch (dep_type)
11075 {
11076 case REG_DEP_ANTI:
11077 *cost = 0;
11078 return false;
11079
11080 case REG_DEP_TRUE:
11081 case REG_DEP_OUTPUT:
11082 if (recog_memoized (insn) >= 0
11083 && recog_memoized (dep) >= 0)
11084 {
11085 if (GET_CODE (PATTERN (insn)) == SET)
11086 {
11087 if (GET_MODE_CLASS
11088 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11089 || GET_MODE_CLASS
11090 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11091 {
11092 enum attr_type attr_type_insn = get_attr_type (insn);
11093 enum attr_type attr_type_dep = get_attr_type (dep);
11094
11095 /* By default all dependencies of the form
11096 s0 = s0 <op> s1
11097 s0 = s0 <op> s2
11098 have an extra latency of 1 cycle because
11099 of the input and output dependency in this
11100 case. However this gets modeled as an true
11101 dependency and hence all these checks. */
11102 if (REG_P (SET_DEST (PATTERN (insn)))
11103 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11104 {
11105 /* FMACS is a special case where the dependent
11106 instruction can be issued 3 cycles before
11107 the normal latency in case of an output
11108 dependency. */
11109 if ((attr_type_insn == TYPE_FMACS
11110 || attr_type_insn == TYPE_FMACD)
11111 && (attr_type_dep == TYPE_FMACS
11112 || attr_type_dep == TYPE_FMACD))
11113 {
11114 if (dep_type == REG_DEP_OUTPUT)
11115 *cost = insn_default_latency (dep) - 3;
11116 else
11117 *cost = insn_default_latency (dep);
11118 return false;
11119 }
11120 else
11121 {
11122 if (dep_type == REG_DEP_OUTPUT)
11123 *cost = insn_default_latency (dep) + 1;
11124 else
11125 *cost = insn_default_latency (dep);
11126 }
11127 return false;
11128 }
11129 }
11130 }
11131 }
11132 break;
11133
11134 default:
11135 gcc_unreachable ();
11136 }
11137
11138 return true;
11139 }
11140
11141 /* Adjust cost hook for FA726TE. */
11142 static bool
11143 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11144 int * cost)
11145 {
11146 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11147 have penalty of 3. */
11148 if (dep_type == REG_DEP_TRUE
11149 && recog_memoized (insn) >= 0
11150 && recog_memoized (dep) >= 0
11151 && get_attr_conds (dep) == CONDS_SET)
11152 {
11153 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11154 if (get_attr_conds (insn) == CONDS_USE
11155 && get_attr_type (insn) != TYPE_BRANCH)
11156 {
11157 *cost = 3;
11158 return false;
11159 }
11160
11161 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11162 || get_attr_conds (insn) == CONDS_USE)
11163 {
11164 *cost = 0;
11165 return false;
11166 }
11167 }
11168
11169 return true;
11170 }
11171
11172 /* Implement TARGET_REGISTER_MOVE_COST.
11173
11174 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11175 it is typically more expensive than a single memory access. We set
11176 the cost to less than two memory accesses so that floating
11177 point to integer conversion does not go through memory. */
11178
11179 int
11180 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11181 reg_class_t from, reg_class_t to)
11182 {
11183 if (TARGET_32BIT)
11184 {
11185 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11186 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11187 return 15;
11188 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11189 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11190 return 4;
11191 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11192 return 20;
11193 else
11194 return 2;
11195 }
11196 else
11197 {
11198 if (from == HI_REGS || to == HI_REGS)
11199 return 4;
11200 else
11201 return 2;
11202 }
11203 }
11204
11205 /* Implement TARGET_MEMORY_MOVE_COST. */
11206
11207 int
11208 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11209 bool in ATTRIBUTE_UNUSED)
11210 {
11211 if (TARGET_32BIT)
11212 return 10;
11213 else
11214 {
11215 if (GET_MODE_SIZE (mode) < 4)
11216 return 8;
11217 else
11218 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11219 }
11220 }
11221
11222 /* Vectorizer cost model implementation. */
11223
11224 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11225 static int
11226 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11227 tree vectype,
11228 int misalign ATTRIBUTE_UNUSED)
11229 {
11230 unsigned elements;
11231
11232 switch (type_of_cost)
11233 {
11234 case scalar_stmt:
11235 return current_tune->vec_costs->scalar_stmt_cost;
11236
11237 case scalar_load:
11238 return current_tune->vec_costs->scalar_load_cost;
11239
11240 case scalar_store:
11241 return current_tune->vec_costs->scalar_store_cost;
11242
11243 case vector_stmt:
11244 return current_tune->vec_costs->vec_stmt_cost;
11245
11246 case vector_load:
11247 return current_tune->vec_costs->vec_align_load_cost;
11248
11249 case vector_store:
11250 return current_tune->vec_costs->vec_store_cost;
11251
11252 case vec_to_scalar:
11253 return current_tune->vec_costs->vec_to_scalar_cost;
11254
11255 case scalar_to_vec:
11256 return current_tune->vec_costs->scalar_to_vec_cost;
11257
11258 case unaligned_load:
11259 case vector_gather_load:
11260 return current_tune->vec_costs->vec_unalign_load_cost;
11261
11262 case unaligned_store:
11263 case vector_scatter_store:
11264 return current_tune->vec_costs->vec_unalign_store_cost;
11265
11266 case cond_branch_taken:
11267 return current_tune->vec_costs->cond_taken_branch_cost;
11268
11269 case cond_branch_not_taken:
11270 return current_tune->vec_costs->cond_not_taken_branch_cost;
11271
11272 case vec_perm:
11273 case vec_promote_demote:
11274 return current_tune->vec_costs->vec_stmt_cost;
11275
11276 case vec_construct:
11277 elements = TYPE_VECTOR_SUBPARTS (vectype);
11278 return elements / 2 + 1;
11279
11280 default:
11281 gcc_unreachable ();
11282 }
11283 }
11284
11285 /* Implement targetm.vectorize.add_stmt_cost. */
11286
11287 static unsigned
11288 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11289 struct _stmt_vec_info *stmt_info, int misalign,
11290 enum vect_cost_model_location where)
11291 {
11292 unsigned *cost = (unsigned *) data;
11293 unsigned retval = 0;
11294
11295 if (flag_vect_cost_model)
11296 {
11297 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11298 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11299
11300 /* Statements in an inner loop relative to the loop being
11301 vectorized are weighted more heavily. The value here is
11302 arbitrary and could potentially be improved with analysis. */
11303 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11304 count *= 50; /* FIXME. */
11305
11306 retval = (unsigned) (count * stmt_cost);
11307 cost[where] += retval;
11308 }
11309
11310 return retval;
11311 }
11312
11313 /* Return true if and only if this insn can dual-issue only as older. */
11314 static bool
11315 cortexa7_older_only (rtx_insn *insn)
11316 {
11317 if (recog_memoized (insn) < 0)
11318 return false;
11319
11320 switch (get_attr_type (insn))
11321 {
11322 case TYPE_ALU_DSP_REG:
11323 case TYPE_ALU_SREG:
11324 case TYPE_ALUS_SREG:
11325 case TYPE_LOGIC_REG:
11326 case TYPE_LOGICS_REG:
11327 case TYPE_ADC_REG:
11328 case TYPE_ADCS_REG:
11329 case TYPE_ADR:
11330 case TYPE_BFM:
11331 case TYPE_REV:
11332 case TYPE_MVN_REG:
11333 case TYPE_SHIFT_IMM:
11334 case TYPE_SHIFT_REG:
11335 case TYPE_LOAD_BYTE:
11336 case TYPE_LOAD_4:
11337 case TYPE_STORE_4:
11338 case TYPE_FFARITHS:
11339 case TYPE_FADDS:
11340 case TYPE_FFARITHD:
11341 case TYPE_FADDD:
11342 case TYPE_FMOV:
11343 case TYPE_F_CVT:
11344 case TYPE_FCMPS:
11345 case TYPE_FCMPD:
11346 case TYPE_FCONSTS:
11347 case TYPE_FCONSTD:
11348 case TYPE_FMULS:
11349 case TYPE_FMACS:
11350 case TYPE_FMULD:
11351 case TYPE_FMACD:
11352 case TYPE_FDIVS:
11353 case TYPE_FDIVD:
11354 case TYPE_F_MRC:
11355 case TYPE_F_MRRC:
11356 case TYPE_F_FLAG:
11357 case TYPE_F_LOADS:
11358 case TYPE_F_STORES:
11359 return true;
11360 default:
11361 return false;
11362 }
11363 }
11364
11365 /* Return true if and only if this insn can dual-issue as younger. */
11366 static bool
11367 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11368 {
11369 if (recog_memoized (insn) < 0)
11370 {
11371 if (verbose > 5)
11372 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11373 return false;
11374 }
11375
11376 switch (get_attr_type (insn))
11377 {
11378 case TYPE_ALU_IMM:
11379 case TYPE_ALUS_IMM:
11380 case TYPE_LOGIC_IMM:
11381 case TYPE_LOGICS_IMM:
11382 case TYPE_EXTEND:
11383 case TYPE_MVN_IMM:
11384 case TYPE_MOV_IMM:
11385 case TYPE_MOV_REG:
11386 case TYPE_MOV_SHIFT:
11387 case TYPE_MOV_SHIFT_REG:
11388 case TYPE_BRANCH:
11389 case TYPE_CALL:
11390 return true;
11391 default:
11392 return false;
11393 }
11394 }
11395
11396
11397 /* Look for an instruction that can dual issue only as an older
11398 instruction, and move it in front of any instructions that can
11399 dual-issue as younger, while preserving the relative order of all
11400 other instructions in the ready list. This is a hueuristic to help
11401 dual-issue in later cycles, by postponing issue of more flexible
11402 instructions. This heuristic may affect dual issue opportunities
11403 in the current cycle. */
11404 static void
11405 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11406 int *n_readyp, int clock)
11407 {
11408 int i;
11409 int first_older_only = -1, first_younger = -1;
11410
11411 if (verbose > 5)
11412 fprintf (file,
11413 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11414 clock,
11415 *n_readyp);
11416
11417 /* Traverse the ready list from the head (the instruction to issue
11418 first), and looking for the first instruction that can issue as
11419 younger and the first instruction that can dual-issue only as
11420 older. */
11421 for (i = *n_readyp - 1; i >= 0; i--)
11422 {
11423 rtx_insn *insn = ready[i];
11424 if (cortexa7_older_only (insn))
11425 {
11426 first_older_only = i;
11427 if (verbose > 5)
11428 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11429 break;
11430 }
11431 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11432 first_younger = i;
11433 }
11434
11435 /* Nothing to reorder because either no younger insn found or insn
11436 that can dual-issue only as older appears before any insn that
11437 can dual-issue as younger. */
11438 if (first_younger == -1)
11439 {
11440 if (verbose > 5)
11441 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11442 return;
11443 }
11444
11445 /* Nothing to reorder because no older-only insn in the ready list. */
11446 if (first_older_only == -1)
11447 {
11448 if (verbose > 5)
11449 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11450 return;
11451 }
11452
11453 /* Move first_older_only insn before first_younger. */
11454 if (verbose > 5)
11455 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11456 INSN_UID(ready [first_older_only]),
11457 INSN_UID(ready [first_younger]));
11458 rtx_insn *first_older_only_insn = ready [first_older_only];
11459 for (i = first_older_only; i < first_younger; i++)
11460 {
11461 ready[i] = ready[i+1];
11462 }
11463
11464 ready[i] = first_older_only_insn;
11465 return;
11466 }
11467
11468 /* Implement TARGET_SCHED_REORDER. */
11469 static int
11470 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11471 int clock)
11472 {
11473 switch (arm_tune)
11474 {
11475 case TARGET_CPU_cortexa7:
11476 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11477 break;
11478 default:
11479 /* Do nothing for other cores. */
11480 break;
11481 }
11482
11483 return arm_issue_rate ();
11484 }
11485
11486 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11487 It corrects the value of COST based on the relationship between
11488 INSN and DEP through the dependence LINK. It returns the new
11489 value. There is a per-core adjust_cost hook to adjust scheduler costs
11490 and the per-core hook can choose to completely override the generic
11491 adjust_cost function. Only put bits of code into arm_adjust_cost that
11492 are common across all cores. */
11493 static int
11494 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11495 unsigned int)
11496 {
11497 rtx i_pat, d_pat;
11498
11499 /* When generating Thumb-1 code, we want to place flag-setting operations
11500 close to a conditional branch which depends on them, so that we can
11501 omit the comparison. */
11502 if (TARGET_THUMB1
11503 && dep_type == 0
11504 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11505 && recog_memoized (dep) >= 0
11506 && get_attr_conds (dep) == CONDS_SET)
11507 return 0;
11508
11509 if (current_tune->sched_adjust_cost != NULL)
11510 {
11511 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11512 return cost;
11513 }
11514
11515 /* XXX Is this strictly true? */
11516 if (dep_type == REG_DEP_ANTI
11517 || dep_type == REG_DEP_OUTPUT)
11518 return 0;
11519
11520 /* Call insns don't incur a stall, even if they follow a load. */
11521 if (dep_type == 0
11522 && CALL_P (insn))
11523 return 1;
11524
11525 if ((i_pat = single_set (insn)) != NULL
11526 && MEM_P (SET_SRC (i_pat))
11527 && (d_pat = single_set (dep)) != NULL
11528 && MEM_P (SET_DEST (d_pat)))
11529 {
11530 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11531 /* This is a load after a store, there is no conflict if the load reads
11532 from a cached area. Assume that loads from the stack, and from the
11533 constant pool are cached, and that others will miss. This is a
11534 hack. */
11535
11536 if ((GET_CODE (src_mem) == SYMBOL_REF
11537 && CONSTANT_POOL_ADDRESS_P (src_mem))
11538 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11539 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11540 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11541 return 1;
11542 }
11543
11544 return cost;
11545 }
11546
11547 int
11548 arm_max_conditional_execute (void)
11549 {
11550 return max_insns_skipped;
11551 }
11552
11553 static int
11554 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11555 {
11556 if (TARGET_32BIT)
11557 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11558 else
11559 return (optimize > 0) ? 2 : 0;
11560 }
11561
11562 static int
11563 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11564 {
11565 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11566 }
11567
11568 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11569 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11570 sequences of non-executed instructions in IT blocks probably take the same
11571 amount of time as executed instructions (and the IT instruction itself takes
11572 space in icache). This function was experimentally determined to give good
11573 results on a popular embedded benchmark. */
11574
11575 static int
11576 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11577 {
11578 return (TARGET_32BIT && speed_p) ? 1
11579 : arm_default_branch_cost (speed_p, predictable_p);
11580 }
11581
11582 static int
11583 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11584 {
11585 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11586 }
11587
11588 static bool fp_consts_inited = false;
11589
11590 static REAL_VALUE_TYPE value_fp0;
11591
11592 static void
11593 init_fp_table (void)
11594 {
11595 REAL_VALUE_TYPE r;
11596
11597 r = REAL_VALUE_ATOF ("0", DFmode);
11598 value_fp0 = r;
11599 fp_consts_inited = true;
11600 }
11601
11602 /* Return TRUE if rtx X is a valid immediate FP constant. */
11603 int
11604 arm_const_double_rtx (rtx x)
11605 {
11606 const REAL_VALUE_TYPE *r;
11607
11608 if (!fp_consts_inited)
11609 init_fp_table ();
11610
11611 r = CONST_DOUBLE_REAL_VALUE (x);
11612 if (REAL_VALUE_MINUS_ZERO (*r))
11613 return 0;
11614
11615 if (real_equal (r, &value_fp0))
11616 return 1;
11617
11618 return 0;
11619 }
11620
11621 /* VFPv3 has a fairly wide range of representable immediates, formed from
11622 "quarter-precision" floating-point values. These can be evaluated using this
11623 formula (with ^ for exponentiation):
11624
11625 -1^s * n * 2^-r
11626
11627 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11628 16 <= n <= 31 and 0 <= r <= 7.
11629
11630 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11631
11632 - A (most-significant) is the sign bit.
11633 - BCD are the exponent (encoded as r XOR 3).
11634 - EFGH are the mantissa (encoded as n - 16).
11635 */
11636
11637 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11638 fconst[sd] instruction, or -1 if X isn't suitable. */
11639 static int
11640 vfp3_const_double_index (rtx x)
11641 {
11642 REAL_VALUE_TYPE r, m;
11643 int sign, exponent;
11644 unsigned HOST_WIDE_INT mantissa, mant_hi;
11645 unsigned HOST_WIDE_INT mask;
11646 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11647 bool fail;
11648
11649 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11650 return -1;
11651
11652 r = *CONST_DOUBLE_REAL_VALUE (x);
11653
11654 /* We can't represent these things, so detect them first. */
11655 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11656 return -1;
11657
11658 /* Extract sign, exponent and mantissa. */
11659 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11660 r = real_value_abs (&r);
11661 exponent = REAL_EXP (&r);
11662 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11663 highest (sign) bit, with a fixed binary point at bit point_pos.
11664 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11665 bits for the mantissa, this may fail (low bits would be lost). */
11666 real_ldexp (&m, &r, point_pos - exponent);
11667 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11668 mantissa = w.elt (0);
11669 mant_hi = w.elt (1);
11670
11671 /* If there are bits set in the low part of the mantissa, we can't
11672 represent this value. */
11673 if (mantissa != 0)
11674 return -1;
11675
11676 /* Now make it so that mantissa contains the most-significant bits, and move
11677 the point_pos to indicate that the least-significant bits have been
11678 discarded. */
11679 point_pos -= HOST_BITS_PER_WIDE_INT;
11680 mantissa = mant_hi;
11681
11682 /* We can permit four significant bits of mantissa only, plus a high bit
11683 which is always 1. */
11684 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11685 if ((mantissa & mask) != 0)
11686 return -1;
11687
11688 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11689 mantissa >>= point_pos - 5;
11690
11691 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11692 floating-point immediate zero with Neon using an integer-zero load, but
11693 that case is handled elsewhere.) */
11694 if (mantissa == 0)
11695 return -1;
11696
11697 gcc_assert (mantissa >= 16 && mantissa <= 31);
11698
11699 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11700 normalized significands are in the range [1, 2). (Our mantissa is shifted
11701 left 4 places at this point relative to normalized IEEE754 values). GCC
11702 internally uses [0.5, 1) (see real.c), so the exponent returned from
11703 REAL_EXP must be altered. */
11704 exponent = 5 - exponent;
11705
11706 if (exponent < 0 || exponent > 7)
11707 return -1;
11708
11709 /* Sign, mantissa and exponent are now in the correct form to plug into the
11710 formula described in the comment above. */
11711 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11712 }
11713
11714 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11715 int
11716 vfp3_const_double_rtx (rtx x)
11717 {
11718 if (!TARGET_VFP3)
11719 return 0;
11720
11721 return vfp3_const_double_index (x) != -1;
11722 }
11723
11724 /* Recognize immediates which can be used in various Neon instructions. Legal
11725 immediates are described by the following table (for VMVN variants, the
11726 bitwise inverse of the constant shown is recognized. In either case, VMOV
11727 is output and the correct instruction to use for a given constant is chosen
11728 by the assembler). The constant shown is replicated across all elements of
11729 the destination vector.
11730
11731 insn elems variant constant (binary)
11732 ---- ----- ------- -----------------
11733 vmov i32 0 00000000 00000000 00000000 abcdefgh
11734 vmov i32 1 00000000 00000000 abcdefgh 00000000
11735 vmov i32 2 00000000 abcdefgh 00000000 00000000
11736 vmov i32 3 abcdefgh 00000000 00000000 00000000
11737 vmov i16 4 00000000 abcdefgh
11738 vmov i16 5 abcdefgh 00000000
11739 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11740 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11741 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11742 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11743 vmvn i16 10 00000000 abcdefgh
11744 vmvn i16 11 abcdefgh 00000000
11745 vmov i32 12 00000000 00000000 abcdefgh 11111111
11746 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11747 vmov i32 14 00000000 abcdefgh 11111111 11111111
11748 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11749 vmov i8 16 abcdefgh
11750 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11751 eeeeeeee ffffffff gggggggg hhhhhhhh
11752 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11753 vmov f32 19 00000000 00000000 00000000 00000000
11754
11755 For case 18, B = !b. Representable values are exactly those accepted by
11756 vfp3_const_double_index, but are output as floating-point numbers rather
11757 than indices.
11758
11759 For case 19, we will change it to vmov.i32 when assembling.
11760
11761 Variants 0-5 (inclusive) may also be used as immediates for the second
11762 operand of VORR/VBIC instructions.
11763
11764 The INVERSE argument causes the bitwise inverse of the given operand to be
11765 recognized instead (used for recognizing legal immediates for the VAND/VORN
11766 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11767 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11768 output, rather than the real insns vbic/vorr).
11769
11770 INVERSE makes no difference to the recognition of float vectors.
11771
11772 The return value is the variant of immediate as shown in the above table, or
11773 -1 if the given value doesn't match any of the listed patterns.
11774 */
11775 static int
11776 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11777 rtx *modconst, int *elementwidth)
11778 {
11779 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11780 matches = 1; \
11781 for (i = 0; i < idx; i += (STRIDE)) \
11782 if (!(TEST)) \
11783 matches = 0; \
11784 if (matches) \
11785 { \
11786 immtype = (CLASS); \
11787 elsize = (ELSIZE); \
11788 break; \
11789 }
11790
11791 unsigned int i, elsize = 0, idx = 0, n_elts;
11792 unsigned int innersize;
11793 unsigned char bytes[16];
11794 int immtype = -1, matches;
11795 unsigned int invmask = inverse ? 0xff : 0;
11796 bool vector = GET_CODE (op) == CONST_VECTOR;
11797
11798 if (vector)
11799 n_elts = CONST_VECTOR_NUNITS (op);
11800 else
11801 {
11802 n_elts = 1;
11803 if (mode == VOIDmode)
11804 mode = DImode;
11805 }
11806
11807 innersize = GET_MODE_UNIT_SIZE (mode);
11808
11809 /* Vectors of float constants. */
11810 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11811 {
11812 rtx el0 = CONST_VECTOR_ELT (op, 0);
11813
11814 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11815 return -1;
11816
11817 /* FP16 vectors cannot be represented. */
11818 if (GET_MODE_INNER (mode) == HFmode)
11819 return -1;
11820
11821 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11822 are distinct in this context. */
11823 if (!const_vec_duplicate_p (op))
11824 return -1;
11825
11826 if (modconst)
11827 *modconst = CONST_VECTOR_ELT (op, 0);
11828
11829 if (elementwidth)
11830 *elementwidth = 0;
11831
11832 if (el0 == CONST0_RTX (GET_MODE (el0)))
11833 return 19;
11834 else
11835 return 18;
11836 }
11837
11838 /* The tricks done in the code below apply for little-endian vector layout.
11839 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11840 FIXME: Implement logic for big-endian vectors. */
11841 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11842 return -1;
11843
11844 /* Splat vector constant out into a byte vector. */
11845 for (i = 0; i < n_elts; i++)
11846 {
11847 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11848 unsigned HOST_WIDE_INT elpart;
11849
11850 gcc_assert (CONST_INT_P (el));
11851 elpart = INTVAL (el);
11852
11853 for (unsigned int byte = 0; byte < innersize; byte++)
11854 {
11855 bytes[idx++] = (elpart & 0xff) ^ invmask;
11856 elpart >>= BITS_PER_UNIT;
11857 }
11858 }
11859
11860 /* Sanity check. */
11861 gcc_assert (idx == GET_MODE_SIZE (mode));
11862
11863 do
11864 {
11865 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11866 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11867
11868 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11869 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11870
11871 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11872 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11873
11874 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11875 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11876
11877 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11878
11879 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11880
11881 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11882 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11883
11884 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11885 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11886
11887 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11888 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11889
11890 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11891 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11892
11893 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11894
11895 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11896
11897 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11898 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11899
11900 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11901 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11902
11903 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11904 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11905
11906 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11907 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11908
11909 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11910
11911 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11912 && bytes[i] == bytes[(i + 8) % idx]);
11913 }
11914 while (0);
11915
11916 if (immtype == -1)
11917 return -1;
11918
11919 if (elementwidth)
11920 *elementwidth = elsize;
11921
11922 if (modconst)
11923 {
11924 unsigned HOST_WIDE_INT imm = 0;
11925
11926 /* Un-invert bytes of recognized vector, if necessary. */
11927 if (invmask != 0)
11928 for (i = 0; i < idx; i++)
11929 bytes[i] ^= invmask;
11930
11931 if (immtype == 17)
11932 {
11933 /* FIXME: Broken on 32-bit H_W_I hosts. */
11934 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11935
11936 for (i = 0; i < 8; i++)
11937 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11938 << (i * BITS_PER_UNIT);
11939
11940 *modconst = GEN_INT (imm);
11941 }
11942 else
11943 {
11944 unsigned HOST_WIDE_INT imm = 0;
11945
11946 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11947 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11948
11949 *modconst = GEN_INT (imm);
11950 }
11951 }
11952
11953 return immtype;
11954 #undef CHECK
11955 }
11956
11957 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11958 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11959 float elements), and a modified constant (whatever should be output for a
11960 VMOV) in *MODCONST. */
11961
11962 int
11963 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11964 rtx *modconst, int *elementwidth)
11965 {
11966 rtx tmpconst;
11967 int tmpwidth;
11968 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11969
11970 if (retval == -1)
11971 return 0;
11972
11973 if (modconst)
11974 *modconst = tmpconst;
11975
11976 if (elementwidth)
11977 *elementwidth = tmpwidth;
11978
11979 return 1;
11980 }
11981
11982 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11983 the immediate is valid, write a constant suitable for using as an operand
11984 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11985 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11986
11987 int
11988 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11989 rtx *modconst, int *elementwidth)
11990 {
11991 rtx tmpconst;
11992 int tmpwidth;
11993 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11994
11995 if (retval < 0 || retval > 5)
11996 return 0;
11997
11998 if (modconst)
11999 *modconst = tmpconst;
12000
12001 if (elementwidth)
12002 *elementwidth = tmpwidth;
12003
12004 return 1;
12005 }
12006
12007 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12008 the immediate is valid, write a constant suitable for using as an operand
12009 to VSHR/VSHL to *MODCONST and the corresponding element width to
12010 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12011 because they have different limitations. */
12012
12013 int
12014 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12015 rtx *modconst, int *elementwidth,
12016 bool isleftshift)
12017 {
12018 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12019 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12020 unsigned HOST_WIDE_INT last_elt = 0;
12021 unsigned HOST_WIDE_INT maxshift;
12022
12023 /* Split vector constant out into a byte vector. */
12024 for (i = 0; i < n_elts; i++)
12025 {
12026 rtx el = CONST_VECTOR_ELT (op, i);
12027 unsigned HOST_WIDE_INT elpart;
12028
12029 if (CONST_INT_P (el))
12030 elpart = INTVAL (el);
12031 else if (CONST_DOUBLE_P (el))
12032 return 0;
12033 else
12034 gcc_unreachable ();
12035
12036 if (i != 0 && elpart != last_elt)
12037 return 0;
12038
12039 last_elt = elpart;
12040 }
12041
12042 /* Shift less than element size. */
12043 maxshift = innersize * 8;
12044
12045 if (isleftshift)
12046 {
12047 /* Left shift immediate value can be from 0 to <size>-1. */
12048 if (last_elt >= maxshift)
12049 return 0;
12050 }
12051 else
12052 {
12053 /* Right shift immediate value can be from 1 to <size>. */
12054 if (last_elt == 0 || last_elt > maxshift)
12055 return 0;
12056 }
12057
12058 if (elementwidth)
12059 *elementwidth = innersize * 8;
12060
12061 if (modconst)
12062 *modconst = CONST_VECTOR_ELT (op, 0);
12063
12064 return 1;
12065 }
12066
12067 /* Return a string suitable for output of Neon immediate logic operation
12068 MNEM. */
12069
12070 char *
12071 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12072 int inverse, int quad)
12073 {
12074 int width, is_valid;
12075 static char templ[40];
12076
12077 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12078
12079 gcc_assert (is_valid != 0);
12080
12081 if (quad)
12082 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12083 else
12084 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12085
12086 return templ;
12087 }
12088
12089 /* Return a string suitable for output of Neon immediate shift operation
12090 (VSHR or VSHL) MNEM. */
12091
12092 char *
12093 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12094 machine_mode mode, int quad,
12095 bool isleftshift)
12096 {
12097 int width, is_valid;
12098 static char templ[40];
12099
12100 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12101 gcc_assert (is_valid != 0);
12102
12103 if (quad)
12104 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12105 else
12106 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12107
12108 return templ;
12109 }
12110
12111 /* Output a sequence of pairwise operations to implement a reduction.
12112 NOTE: We do "too much work" here, because pairwise operations work on two
12113 registers-worth of operands in one go. Unfortunately we can't exploit those
12114 extra calculations to do the full operation in fewer steps, I don't think.
12115 Although all vector elements of the result but the first are ignored, we
12116 actually calculate the same result in each of the elements. An alternative
12117 such as initially loading a vector with zero to use as each of the second
12118 operands would use up an additional register and take an extra instruction,
12119 for no particular gain. */
12120
12121 void
12122 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12123 rtx (*reduc) (rtx, rtx, rtx))
12124 {
12125 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12126 rtx tmpsum = op1;
12127
12128 for (i = parts / 2; i >= 1; i /= 2)
12129 {
12130 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12131 emit_insn (reduc (dest, tmpsum, tmpsum));
12132 tmpsum = dest;
12133 }
12134 }
12135
12136 /* If VALS is a vector constant that can be loaded into a register
12137 using VDUP, generate instructions to do so and return an RTX to
12138 assign to the register. Otherwise return NULL_RTX. */
12139
12140 static rtx
12141 neon_vdup_constant (rtx vals)
12142 {
12143 machine_mode mode = GET_MODE (vals);
12144 machine_mode inner_mode = GET_MODE_INNER (mode);
12145 rtx x;
12146
12147 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12148 return NULL_RTX;
12149
12150 if (!const_vec_duplicate_p (vals, &x))
12151 /* The elements are not all the same. We could handle repeating
12152 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12153 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12154 vdup.i16). */
12155 return NULL_RTX;
12156
12157 /* We can load this constant by using VDUP and a constant in a
12158 single ARM register. This will be cheaper than a vector
12159 load. */
12160
12161 x = copy_to_mode_reg (inner_mode, x);
12162 return gen_rtx_VEC_DUPLICATE (mode, x);
12163 }
12164
12165 /* Generate code to load VALS, which is a PARALLEL containing only
12166 constants (for vec_init) or CONST_VECTOR, efficiently into a
12167 register. Returns an RTX to copy into the register, or NULL_RTX
12168 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12169
12170 rtx
12171 neon_make_constant (rtx vals)
12172 {
12173 machine_mode mode = GET_MODE (vals);
12174 rtx target;
12175 rtx const_vec = NULL_RTX;
12176 int n_elts = GET_MODE_NUNITS (mode);
12177 int n_const = 0;
12178 int i;
12179
12180 if (GET_CODE (vals) == CONST_VECTOR)
12181 const_vec = vals;
12182 else if (GET_CODE (vals) == PARALLEL)
12183 {
12184 /* A CONST_VECTOR must contain only CONST_INTs and
12185 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12186 Only store valid constants in a CONST_VECTOR. */
12187 for (i = 0; i < n_elts; ++i)
12188 {
12189 rtx x = XVECEXP (vals, 0, i);
12190 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12191 n_const++;
12192 }
12193 if (n_const == n_elts)
12194 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12195 }
12196 else
12197 gcc_unreachable ();
12198
12199 if (const_vec != NULL
12200 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12201 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12202 return const_vec;
12203 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12204 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12205 pipeline cycle; creating the constant takes one or two ARM
12206 pipeline cycles. */
12207 return target;
12208 else if (const_vec != NULL_RTX)
12209 /* Load from constant pool. On Cortex-A8 this takes two cycles
12210 (for either double or quad vectors). We can not take advantage
12211 of single-cycle VLD1 because we need a PC-relative addressing
12212 mode. */
12213 return const_vec;
12214 else
12215 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12216 We can not construct an initializer. */
12217 return NULL_RTX;
12218 }
12219
12220 /* Initialize vector TARGET to VALS. */
12221
12222 void
12223 neon_expand_vector_init (rtx target, rtx vals)
12224 {
12225 machine_mode mode = GET_MODE (target);
12226 machine_mode inner_mode = GET_MODE_INNER (mode);
12227 int n_elts = GET_MODE_NUNITS (mode);
12228 int n_var = 0, one_var = -1;
12229 bool all_same = true;
12230 rtx x, mem;
12231 int i;
12232
12233 for (i = 0; i < n_elts; ++i)
12234 {
12235 x = XVECEXP (vals, 0, i);
12236 if (!CONSTANT_P (x))
12237 ++n_var, one_var = i;
12238
12239 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12240 all_same = false;
12241 }
12242
12243 if (n_var == 0)
12244 {
12245 rtx constant = neon_make_constant (vals);
12246 if (constant != NULL_RTX)
12247 {
12248 emit_move_insn (target, constant);
12249 return;
12250 }
12251 }
12252
12253 /* Splat a single non-constant element if we can. */
12254 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12255 {
12256 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12257 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12258 return;
12259 }
12260
12261 /* One field is non-constant. Load constant then overwrite varying
12262 field. This is more efficient than using the stack. */
12263 if (n_var == 1)
12264 {
12265 rtx copy = copy_rtx (vals);
12266 rtx index = GEN_INT (one_var);
12267
12268 /* Load constant part of vector, substitute neighboring value for
12269 varying element. */
12270 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12271 neon_expand_vector_init (target, copy);
12272
12273 /* Insert variable. */
12274 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12275 switch (mode)
12276 {
12277 case E_V8QImode:
12278 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12279 break;
12280 case E_V16QImode:
12281 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12282 break;
12283 case E_V4HImode:
12284 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12285 break;
12286 case E_V8HImode:
12287 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12288 break;
12289 case E_V2SImode:
12290 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12291 break;
12292 case E_V4SImode:
12293 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12294 break;
12295 case E_V2SFmode:
12296 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12297 break;
12298 case E_V4SFmode:
12299 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12300 break;
12301 case E_V2DImode:
12302 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12303 break;
12304 default:
12305 gcc_unreachable ();
12306 }
12307 return;
12308 }
12309
12310 /* Construct the vector in memory one field at a time
12311 and load the whole vector. */
12312 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12313 for (i = 0; i < n_elts; i++)
12314 emit_move_insn (adjust_address_nv (mem, inner_mode,
12315 i * GET_MODE_SIZE (inner_mode)),
12316 XVECEXP (vals, 0, i));
12317 emit_move_insn (target, mem);
12318 }
12319
12320 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12321 ERR if it doesn't. EXP indicates the source location, which includes the
12322 inlining history for intrinsics. */
12323
12324 static void
12325 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12326 const_tree exp, const char *desc)
12327 {
12328 HOST_WIDE_INT lane;
12329
12330 gcc_assert (CONST_INT_P (operand));
12331
12332 lane = INTVAL (operand);
12333
12334 if (lane < low || lane >= high)
12335 {
12336 if (exp)
12337 error ("%K%s %wd out of range %wd - %wd",
12338 exp, desc, lane, low, high - 1);
12339 else
12340 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12341 }
12342 }
12343
12344 /* Bounds-check lanes. */
12345
12346 void
12347 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12348 const_tree exp)
12349 {
12350 bounds_check (operand, low, high, exp, "lane");
12351 }
12352
12353 /* Bounds-check constants. */
12354
12355 void
12356 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12357 {
12358 bounds_check (operand, low, high, NULL_TREE, "constant");
12359 }
12360
12361 HOST_WIDE_INT
12362 neon_element_bits (machine_mode mode)
12363 {
12364 return GET_MODE_UNIT_BITSIZE (mode);
12365 }
12366
12367 \f
12368 /* Predicates for `match_operand' and `match_operator'. */
12369
12370 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12371 WB is true if full writeback address modes are allowed and is false
12372 if limited writeback address modes (POST_INC and PRE_DEC) are
12373 allowed. */
12374
12375 int
12376 arm_coproc_mem_operand (rtx op, bool wb)
12377 {
12378 rtx ind;
12379
12380 /* Reject eliminable registers. */
12381 if (! (reload_in_progress || reload_completed || lra_in_progress)
12382 && ( reg_mentioned_p (frame_pointer_rtx, op)
12383 || reg_mentioned_p (arg_pointer_rtx, op)
12384 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12385 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12386 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12387 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12388 return FALSE;
12389
12390 /* Constants are converted into offsets from labels. */
12391 if (!MEM_P (op))
12392 return FALSE;
12393
12394 ind = XEXP (op, 0);
12395
12396 if (reload_completed
12397 && (GET_CODE (ind) == LABEL_REF
12398 || (GET_CODE (ind) == CONST
12399 && GET_CODE (XEXP (ind, 0)) == PLUS
12400 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12401 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12402 return TRUE;
12403
12404 /* Match: (mem (reg)). */
12405 if (REG_P (ind))
12406 return arm_address_register_rtx_p (ind, 0);
12407
12408 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12409 acceptable in any case (subject to verification by
12410 arm_address_register_rtx_p). We need WB to be true to accept
12411 PRE_INC and POST_DEC. */
12412 if (GET_CODE (ind) == POST_INC
12413 || GET_CODE (ind) == PRE_DEC
12414 || (wb
12415 && (GET_CODE (ind) == PRE_INC
12416 || GET_CODE (ind) == POST_DEC)))
12417 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12418
12419 if (wb
12420 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12421 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12422 && GET_CODE (XEXP (ind, 1)) == PLUS
12423 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12424 ind = XEXP (ind, 1);
12425
12426 /* Match:
12427 (plus (reg)
12428 (const)). */
12429 if (GET_CODE (ind) == PLUS
12430 && REG_P (XEXP (ind, 0))
12431 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12432 && CONST_INT_P (XEXP (ind, 1))
12433 && INTVAL (XEXP (ind, 1)) > -1024
12434 && INTVAL (XEXP (ind, 1)) < 1024
12435 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12436 return TRUE;
12437
12438 return FALSE;
12439 }
12440
12441 /* Return TRUE if OP is a memory operand which we can load or store a vector
12442 to/from. TYPE is one of the following values:
12443 0 - Vector load/stor (vldr)
12444 1 - Core registers (ldm)
12445 2 - Element/structure loads (vld1)
12446 */
12447 int
12448 neon_vector_mem_operand (rtx op, int type, bool strict)
12449 {
12450 rtx ind;
12451
12452 /* Reject eliminable registers. */
12453 if (strict && ! (reload_in_progress || reload_completed)
12454 && (reg_mentioned_p (frame_pointer_rtx, op)
12455 || reg_mentioned_p (arg_pointer_rtx, op)
12456 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12457 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12458 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12459 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12460 return FALSE;
12461
12462 /* Constants are converted into offsets from labels. */
12463 if (!MEM_P (op))
12464 return FALSE;
12465
12466 ind = XEXP (op, 0);
12467
12468 if (reload_completed
12469 && (GET_CODE (ind) == LABEL_REF
12470 || (GET_CODE (ind) == CONST
12471 && GET_CODE (XEXP (ind, 0)) == PLUS
12472 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12473 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12474 return TRUE;
12475
12476 /* Match: (mem (reg)). */
12477 if (REG_P (ind))
12478 return arm_address_register_rtx_p (ind, 0);
12479
12480 /* Allow post-increment with Neon registers. */
12481 if ((type != 1 && GET_CODE (ind) == POST_INC)
12482 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12483 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12484
12485 /* Allow post-increment by register for VLDn */
12486 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12487 && GET_CODE (XEXP (ind, 1)) == PLUS
12488 && REG_P (XEXP (XEXP (ind, 1), 1)))
12489 return true;
12490
12491 /* Match:
12492 (plus (reg)
12493 (const)). */
12494 if (type == 0
12495 && GET_CODE (ind) == PLUS
12496 && REG_P (XEXP (ind, 0))
12497 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12498 && CONST_INT_P (XEXP (ind, 1))
12499 && INTVAL (XEXP (ind, 1)) > -1024
12500 /* For quad modes, we restrict the constant offset to be slightly less
12501 than what the instruction format permits. We have no such constraint
12502 on double mode offsets. (This must match arm_legitimate_index_p.) */
12503 && (INTVAL (XEXP (ind, 1))
12504 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12505 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12506 return TRUE;
12507
12508 return FALSE;
12509 }
12510
12511 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12512 type. */
12513 int
12514 neon_struct_mem_operand (rtx op)
12515 {
12516 rtx ind;
12517
12518 /* Reject eliminable registers. */
12519 if (! (reload_in_progress || reload_completed)
12520 && ( reg_mentioned_p (frame_pointer_rtx, op)
12521 || reg_mentioned_p (arg_pointer_rtx, op)
12522 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12523 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12524 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12525 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12526 return FALSE;
12527
12528 /* Constants are converted into offsets from labels. */
12529 if (!MEM_P (op))
12530 return FALSE;
12531
12532 ind = XEXP (op, 0);
12533
12534 if (reload_completed
12535 && (GET_CODE (ind) == LABEL_REF
12536 || (GET_CODE (ind) == CONST
12537 && GET_CODE (XEXP (ind, 0)) == PLUS
12538 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12539 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12540 return TRUE;
12541
12542 /* Match: (mem (reg)). */
12543 if (REG_P (ind))
12544 return arm_address_register_rtx_p (ind, 0);
12545
12546 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12547 if (GET_CODE (ind) == POST_INC
12548 || GET_CODE (ind) == PRE_DEC)
12549 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12550
12551 return FALSE;
12552 }
12553
12554 /* Return true if X is a register that will be eliminated later on. */
12555 int
12556 arm_eliminable_register (rtx x)
12557 {
12558 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12559 || REGNO (x) == ARG_POINTER_REGNUM
12560 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12561 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12562 }
12563
12564 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12565 coprocessor registers. Otherwise return NO_REGS. */
12566
12567 enum reg_class
12568 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12569 {
12570 if (mode == HFmode)
12571 {
12572 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12573 return GENERAL_REGS;
12574 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12575 return NO_REGS;
12576 return GENERAL_REGS;
12577 }
12578
12579 /* The neon move patterns handle all legitimate vector and struct
12580 addresses. */
12581 if (TARGET_NEON
12582 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12583 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12584 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12585 || VALID_NEON_STRUCT_MODE (mode)))
12586 return NO_REGS;
12587
12588 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12589 return NO_REGS;
12590
12591 return GENERAL_REGS;
12592 }
12593
12594 /* Values which must be returned in the most-significant end of the return
12595 register. */
12596
12597 static bool
12598 arm_return_in_msb (const_tree valtype)
12599 {
12600 return (TARGET_AAPCS_BASED
12601 && BYTES_BIG_ENDIAN
12602 && (AGGREGATE_TYPE_P (valtype)
12603 || TREE_CODE (valtype) == COMPLEX_TYPE
12604 || FIXED_POINT_TYPE_P (valtype)));
12605 }
12606
12607 /* Return TRUE if X references a SYMBOL_REF. */
12608 int
12609 symbol_mentioned_p (rtx x)
12610 {
12611 const char * fmt;
12612 int i;
12613
12614 if (GET_CODE (x) == SYMBOL_REF)
12615 return 1;
12616
12617 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12618 are constant offsets, not symbols. */
12619 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12620 return 0;
12621
12622 fmt = GET_RTX_FORMAT (GET_CODE (x));
12623
12624 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12625 {
12626 if (fmt[i] == 'E')
12627 {
12628 int j;
12629
12630 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12631 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12632 return 1;
12633 }
12634 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12635 return 1;
12636 }
12637
12638 return 0;
12639 }
12640
12641 /* Return TRUE if X references a LABEL_REF. */
12642 int
12643 label_mentioned_p (rtx x)
12644 {
12645 const char * fmt;
12646 int i;
12647
12648 if (GET_CODE (x) == LABEL_REF)
12649 return 1;
12650
12651 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12652 instruction, but they are constant offsets, not symbols. */
12653 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12654 return 0;
12655
12656 fmt = GET_RTX_FORMAT (GET_CODE (x));
12657 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12658 {
12659 if (fmt[i] == 'E')
12660 {
12661 int j;
12662
12663 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12664 if (label_mentioned_p (XVECEXP (x, i, j)))
12665 return 1;
12666 }
12667 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12668 return 1;
12669 }
12670
12671 return 0;
12672 }
12673
12674 int
12675 tls_mentioned_p (rtx x)
12676 {
12677 switch (GET_CODE (x))
12678 {
12679 case CONST:
12680 return tls_mentioned_p (XEXP (x, 0));
12681
12682 case UNSPEC:
12683 if (XINT (x, 1) == UNSPEC_TLS)
12684 return 1;
12685
12686 /* Fall through. */
12687 default:
12688 return 0;
12689 }
12690 }
12691
12692 /* Must not copy any rtx that uses a pc-relative address.
12693 Also, disallow copying of load-exclusive instructions that
12694 may appear after splitting of compare-and-swap-style operations
12695 so as to prevent those loops from being transformed away from their
12696 canonical forms (see PR 69904). */
12697
12698 static bool
12699 arm_cannot_copy_insn_p (rtx_insn *insn)
12700 {
12701 /* The tls call insn cannot be copied, as it is paired with a data
12702 word. */
12703 if (recog_memoized (insn) == CODE_FOR_tlscall)
12704 return true;
12705
12706 subrtx_iterator::array_type array;
12707 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12708 {
12709 const_rtx x = *iter;
12710 if (GET_CODE (x) == UNSPEC
12711 && (XINT (x, 1) == UNSPEC_PIC_BASE
12712 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12713 return true;
12714 }
12715
12716 rtx set = single_set (insn);
12717 if (set)
12718 {
12719 rtx src = SET_SRC (set);
12720 if (GET_CODE (src) == ZERO_EXTEND)
12721 src = XEXP (src, 0);
12722
12723 /* Catch the load-exclusive and load-acquire operations. */
12724 if (GET_CODE (src) == UNSPEC_VOLATILE
12725 && (XINT (src, 1) == VUNSPEC_LL
12726 || XINT (src, 1) == VUNSPEC_LAX))
12727 return true;
12728 }
12729 return false;
12730 }
12731
12732 enum rtx_code
12733 minmax_code (rtx x)
12734 {
12735 enum rtx_code code = GET_CODE (x);
12736
12737 switch (code)
12738 {
12739 case SMAX:
12740 return GE;
12741 case SMIN:
12742 return LE;
12743 case UMIN:
12744 return LEU;
12745 case UMAX:
12746 return GEU;
12747 default:
12748 gcc_unreachable ();
12749 }
12750 }
12751
12752 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12753
12754 bool
12755 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12756 int *mask, bool *signed_sat)
12757 {
12758 /* The high bound must be a power of two minus one. */
12759 int log = exact_log2 (INTVAL (hi_bound) + 1);
12760 if (log == -1)
12761 return false;
12762
12763 /* The low bound is either zero (for usat) or one less than the
12764 negation of the high bound (for ssat). */
12765 if (INTVAL (lo_bound) == 0)
12766 {
12767 if (mask)
12768 *mask = log;
12769 if (signed_sat)
12770 *signed_sat = false;
12771
12772 return true;
12773 }
12774
12775 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12776 {
12777 if (mask)
12778 *mask = log + 1;
12779 if (signed_sat)
12780 *signed_sat = true;
12781
12782 return true;
12783 }
12784
12785 return false;
12786 }
12787
12788 /* Return 1 if memory locations are adjacent. */
12789 int
12790 adjacent_mem_locations (rtx a, rtx b)
12791 {
12792 /* We don't guarantee to preserve the order of these memory refs. */
12793 if (volatile_refs_p (a) || volatile_refs_p (b))
12794 return 0;
12795
12796 if ((REG_P (XEXP (a, 0))
12797 || (GET_CODE (XEXP (a, 0)) == PLUS
12798 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12799 && (REG_P (XEXP (b, 0))
12800 || (GET_CODE (XEXP (b, 0)) == PLUS
12801 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12802 {
12803 HOST_WIDE_INT val0 = 0, val1 = 0;
12804 rtx reg0, reg1;
12805 int val_diff;
12806
12807 if (GET_CODE (XEXP (a, 0)) == PLUS)
12808 {
12809 reg0 = XEXP (XEXP (a, 0), 0);
12810 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12811 }
12812 else
12813 reg0 = XEXP (a, 0);
12814
12815 if (GET_CODE (XEXP (b, 0)) == PLUS)
12816 {
12817 reg1 = XEXP (XEXP (b, 0), 0);
12818 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12819 }
12820 else
12821 reg1 = XEXP (b, 0);
12822
12823 /* Don't accept any offset that will require multiple
12824 instructions to handle, since this would cause the
12825 arith_adjacentmem pattern to output an overlong sequence. */
12826 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12827 return 0;
12828
12829 /* Don't allow an eliminable register: register elimination can make
12830 the offset too large. */
12831 if (arm_eliminable_register (reg0))
12832 return 0;
12833
12834 val_diff = val1 - val0;
12835
12836 if (arm_ld_sched)
12837 {
12838 /* If the target has load delay slots, then there's no benefit
12839 to using an ldm instruction unless the offset is zero and
12840 we are optimizing for size. */
12841 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12842 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12843 && (val_diff == 4 || val_diff == -4));
12844 }
12845
12846 return ((REGNO (reg0) == REGNO (reg1))
12847 && (val_diff == 4 || val_diff == -4));
12848 }
12849
12850 return 0;
12851 }
12852
12853 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12854 for load operations, false for store operations. CONSECUTIVE is true
12855 if the register numbers in the operation must be consecutive in the register
12856 bank. RETURN_PC is true if value is to be loaded in PC.
12857 The pattern we are trying to match for load is:
12858 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12859 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12860 :
12861 :
12862 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12863 ]
12864 where
12865 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12866 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12867 3. If consecutive is TRUE, then for kth register being loaded,
12868 REGNO (R_dk) = REGNO (R_d0) + k.
12869 The pattern for store is similar. */
12870 bool
12871 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12872 bool consecutive, bool return_pc)
12873 {
12874 HOST_WIDE_INT count = XVECLEN (op, 0);
12875 rtx reg, mem, addr;
12876 unsigned regno;
12877 unsigned first_regno;
12878 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12879 rtx elt;
12880 bool addr_reg_in_reglist = false;
12881 bool update = false;
12882 int reg_increment;
12883 int offset_adj;
12884 int regs_per_val;
12885
12886 /* If not in SImode, then registers must be consecutive
12887 (e.g., VLDM instructions for DFmode). */
12888 gcc_assert ((mode == SImode) || consecutive);
12889 /* Setting return_pc for stores is illegal. */
12890 gcc_assert (!return_pc || load);
12891
12892 /* Set up the increments and the regs per val based on the mode. */
12893 reg_increment = GET_MODE_SIZE (mode);
12894 regs_per_val = reg_increment / 4;
12895 offset_adj = return_pc ? 1 : 0;
12896
12897 if (count <= 1
12898 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12899 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12900 return false;
12901
12902 /* Check if this is a write-back. */
12903 elt = XVECEXP (op, 0, offset_adj);
12904 if (GET_CODE (SET_SRC (elt)) == PLUS)
12905 {
12906 i++;
12907 base = 1;
12908 update = true;
12909
12910 /* The offset adjustment must be the number of registers being
12911 popped times the size of a single register. */
12912 if (!REG_P (SET_DEST (elt))
12913 || !REG_P (XEXP (SET_SRC (elt), 0))
12914 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12915 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12916 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12917 ((count - 1 - offset_adj) * reg_increment))
12918 return false;
12919 }
12920
12921 i = i + offset_adj;
12922 base = base + offset_adj;
12923 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12924 success depends on the type: VLDM can do just one reg,
12925 LDM must do at least two. */
12926 if ((count <= i) && (mode == SImode))
12927 return false;
12928
12929 elt = XVECEXP (op, 0, i - 1);
12930 if (GET_CODE (elt) != SET)
12931 return false;
12932
12933 if (load)
12934 {
12935 reg = SET_DEST (elt);
12936 mem = SET_SRC (elt);
12937 }
12938 else
12939 {
12940 reg = SET_SRC (elt);
12941 mem = SET_DEST (elt);
12942 }
12943
12944 if (!REG_P (reg) || !MEM_P (mem))
12945 return false;
12946
12947 regno = REGNO (reg);
12948 first_regno = regno;
12949 addr = XEXP (mem, 0);
12950 if (GET_CODE (addr) == PLUS)
12951 {
12952 if (!CONST_INT_P (XEXP (addr, 1)))
12953 return false;
12954
12955 offset = INTVAL (XEXP (addr, 1));
12956 addr = XEXP (addr, 0);
12957 }
12958
12959 if (!REG_P (addr))
12960 return false;
12961
12962 /* Don't allow SP to be loaded unless it is also the base register. It
12963 guarantees that SP is reset correctly when an LDM instruction
12964 is interrupted. Otherwise, we might end up with a corrupt stack. */
12965 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12966 return false;
12967
12968 for (; i < count; i++)
12969 {
12970 elt = XVECEXP (op, 0, i);
12971 if (GET_CODE (elt) != SET)
12972 return false;
12973
12974 if (load)
12975 {
12976 reg = SET_DEST (elt);
12977 mem = SET_SRC (elt);
12978 }
12979 else
12980 {
12981 reg = SET_SRC (elt);
12982 mem = SET_DEST (elt);
12983 }
12984
12985 if (!REG_P (reg)
12986 || GET_MODE (reg) != mode
12987 || REGNO (reg) <= regno
12988 || (consecutive
12989 && (REGNO (reg) !=
12990 (unsigned int) (first_regno + regs_per_val * (i - base))))
12991 /* Don't allow SP to be loaded unless it is also the base register. It
12992 guarantees that SP is reset correctly when an LDM instruction
12993 is interrupted. Otherwise, we might end up with a corrupt stack. */
12994 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12995 || !MEM_P (mem)
12996 || GET_MODE (mem) != mode
12997 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12998 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12999 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13000 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13001 offset + (i - base) * reg_increment))
13002 && (!REG_P (XEXP (mem, 0))
13003 || offset + (i - base) * reg_increment != 0)))
13004 return false;
13005
13006 regno = REGNO (reg);
13007 if (regno == REGNO (addr))
13008 addr_reg_in_reglist = true;
13009 }
13010
13011 if (load)
13012 {
13013 if (update && addr_reg_in_reglist)
13014 return false;
13015
13016 /* For Thumb-1, address register is always modified - either by write-back
13017 or by explicit load. If the pattern does not describe an update,
13018 then the address register must be in the list of loaded registers. */
13019 if (TARGET_THUMB1)
13020 return update || addr_reg_in_reglist;
13021 }
13022
13023 return true;
13024 }
13025
13026 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13027 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13028 instruction. ADD_OFFSET is nonzero if the base address register needs
13029 to be modified with an add instruction before we can use it. */
13030
13031 static bool
13032 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13033 int nops, HOST_WIDE_INT add_offset)
13034 {
13035 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13036 if the offset isn't small enough. The reason 2 ldrs are faster
13037 is because these ARMs are able to do more than one cache access
13038 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13039 whilst the ARM8 has a double bandwidth cache. This means that
13040 these cores can do both an instruction fetch and a data fetch in
13041 a single cycle, so the trick of calculating the address into a
13042 scratch register (one of the result regs) and then doing a load
13043 multiple actually becomes slower (and no smaller in code size).
13044 That is the transformation
13045
13046 ldr rd1, [rbase + offset]
13047 ldr rd2, [rbase + offset + 4]
13048
13049 to
13050
13051 add rd1, rbase, offset
13052 ldmia rd1, {rd1, rd2}
13053
13054 produces worse code -- '3 cycles + any stalls on rd2' instead of
13055 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13056 access per cycle, the first sequence could never complete in less
13057 than 6 cycles, whereas the ldm sequence would only take 5 and
13058 would make better use of sequential accesses if not hitting the
13059 cache.
13060
13061 We cheat here and test 'arm_ld_sched' which we currently know to
13062 only be true for the ARM8, ARM9 and StrongARM. If this ever
13063 changes, then the test below needs to be reworked. */
13064 if (nops == 2 && arm_ld_sched && add_offset != 0)
13065 return false;
13066
13067 /* XScale has load-store double instructions, but they have stricter
13068 alignment requirements than load-store multiple, so we cannot
13069 use them.
13070
13071 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13072 the pipeline until completion.
13073
13074 NREGS CYCLES
13075 1 3
13076 2 4
13077 3 5
13078 4 6
13079
13080 An ldr instruction takes 1-3 cycles, but does not block the
13081 pipeline.
13082
13083 NREGS CYCLES
13084 1 1-3
13085 2 2-6
13086 3 3-9
13087 4 4-12
13088
13089 Best case ldr will always win. However, the more ldr instructions
13090 we issue, the less likely we are to be able to schedule them well.
13091 Using ldr instructions also increases code size.
13092
13093 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13094 for counts of 3 or 4 regs. */
13095 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13096 return false;
13097 return true;
13098 }
13099
13100 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13101 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13102 an array ORDER which describes the sequence to use when accessing the
13103 offsets that produces an ascending order. In this sequence, each
13104 offset must be larger by exactly 4 than the previous one. ORDER[0]
13105 must have been filled in with the lowest offset by the caller.
13106 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13107 we use to verify that ORDER produces an ascending order of registers.
13108 Return true if it was possible to construct such an order, false if
13109 not. */
13110
13111 static bool
13112 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13113 int *unsorted_regs)
13114 {
13115 int i;
13116 for (i = 1; i < nops; i++)
13117 {
13118 int j;
13119
13120 order[i] = order[i - 1];
13121 for (j = 0; j < nops; j++)
13122 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13123 {
13124 /* We must find exactly one offset that is higher than the
13125 previous one by 4. */
13126 if (order[i] != order[i - 1])
13127 return false;
13128 order[i] = j;
13129 }
13130 if (order[i] == order[i - 1])
13131 return false;
13132 /* The register numbers must be ascending. */
13133 if (unsorted_regs != NULL
13134 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13135 return false;
13136 }
13137 return true;
13138 }
13139
13140 /* Used to determine in a peephole whether a sequence of load
13141 instructions can be changed into a load-multiple instruction.
13142 NOPS is the number of separate load instructions we are examining. The
13143 first NOPS entries in OPERANDS are the destination registers, the
13144 next NOPS entries are memory operands. If this function is
13145 successful, *BASE is set to the common base register of the memory
13146 accesses; *LOAD_OFFSET is set to the first memory location's offset
13147 from that base register.
13148 REGS is an array filled in with the destination register numbers.
13149 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13150 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13151 the sequence of registers in REGS matches the loads from ascending memory
13152 locations, and the function verifies that the register numbers are
13153 themselves ascending. If CHECK_REGS is false, the register numbers
13154 are stored in the order they are found in the operands. */
13155 static int
13156 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13157 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13158 {
13159 int unsorted_regs[MAX_LDM_STM_OPS];
13160 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13161 int order[MAX_LDM_STM_OPS];
13162 rtx base_reg_rtx = NULL;
13163 int base_reg = -1;
13164 int i, ldm_case;
13165
13166 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13167 easily extended if required. */
13168 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13169
13170 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13171
13172 /* Loop over the operands and check that the memory references are
13173 suitable (i.e. immediate offsets from the same base register). At
13174 the same time, extract the target register, and the memory
13175 offsets. */
13176 for (i = 0; i < nops; i++)
13177 {
13178 rtx reg;
13179 rtx offset;
13180
13181 /* Convert a subreg of a mem into the mem itself. */
13182 if (GET_CODE (operands[nops + i]) == SUBREG)
13183 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13184
13185 gcc_assert (MEM_P (operands[nops + i]));
13186
13187 /* Don't reorder volatile memory references; it doesn't seem worth
13188 looking for the case where the order is ok anyway. */
13189 if (MEM_VOLATILE_P (operands[nops + i]))
13190 return 0;
13191
13192 offset = const0_rtx;
13193
13194 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13195 || (GET_CODE (reg) == SUBREG
13196 && REG_P (reg = SUBREG_REG (reg))))
13197 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13198 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13199 || (GET_CODE (reg) == SUBREG
13200 && REG_P (reg = SUBREG_REG (reg))))
13201 && (CONST_INT_P (offset
13202 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13203 {
13204 if (i == 0)
13205 {
13206 base_reg = REGNO (reg);
13207 base_reg_rtx = reg;
13208 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13209 return 0;
13210 }
13211 else if (base_reg != (int) REGNO (reg))
13212 /* Not addressed from the same base register. */
13213 return 0;
13214
13215 unsorted_regs[i] = (REG_P (operands[i])
13216 ? REGNO (operands[i])
13217 : REGNO (SUBREG_REG (operands[i])));
13218
13219 /* If it isn't an integer register, or if it overwrites the
13220 base register but isn't the last insn in the list, then
13221 we can't do this. */
13222 if (unsorted_regs[i] < 0
13223 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13224 || unsorted_regs[i] > 14
13225 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13226 return 0;
13227
13228 /* Don't allow SP to be loaded unless it is also the base
13229 register. It guarantees that SP is reset correctly when
13230 an LDM instruction is interrupted. Otherwise, we might
13231 end up with a corrupt stack. */
13232 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13233 return 0;
13234
13235 unsorted_offsets[i] = INTVAL (offset);
13236 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13237 order[0] = i;
13238 }
13239 else
13240 /* Not a suitable memory address. */
13241 return 0;
13242 }
13243
13244 /* All the useful information has now been extracted from the
13245 operands into unsorted_regs and unsorted_offsets; additionally,
13246 order[0] has been set to the lowest offset in the list. Sort
13247 the offsets into order, verifying that they are adjacent, and
13248 check that the register numbers are ascending. */
13249 if (!compute_offset_order (nops, unsorted_offsets, order,
13250 check_regs ? unsorted_regs : NULL))
13251 return 0;
13252
13253 if (saved_order)
13254 memcpy (saved_order, order, sizeof order);
13255
13256 if (base)
13257 {
13258 *base = base_reg;
13259
13260 for (i = 0; i < nops; i++)
13261 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13262
13263 *load_offset = unsorted_offsets[order[0]];
13264 }
13265
13266 if (TARGET_THUMB1
13267 && !peep2_reg_dead_p (nops, base_reg_rtx))
13268 return 0;
13269
13270 if (unsorted_offsets[order[0]] == 0)
13271 ldm_case = 1; /* ldmia */
13272 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13273 ldm_case = 2; /* ldmib */
13274 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13275 ldm_case = 3; /* ldmda */
13276 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13277 ldm_case = 4; /* ldmdb */
13278 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13279 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13280 ldm_case = 5;
13281 else
13282 return 0;
13283
13284 if (!multiple_operation_profitable_p (false, nops,
13285 ldm_case == 5
13286 ? unsorted_offsets[order[0]] : 0))
13287 return 0;
13288
13289 return ldm_case;
13290 }
13291
13292 /* Used to determine in a peephole whether a sequence of store instructions can
13293 be changed into a store-multiple instruction.
13294 NOPS is the number of separate store instructions we are examining.
13295 NOPS_TOTAL is the total number of instructions recognized by the peephole
13296 pattern.
13297 The first NOPS entries in OPERANDS are the source registers, the next
13298 NOPS entries are memory operands. If this function is successful, *BASE is
13299 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13300 to the first memory location's offset from that base register. REGS is an
13301 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13302 likewise filled with the corresponding rtx's.
13303 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13304 numbers to an ascending order of stores.
13305 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13306 from ascending memory locations, and the function verifies that the register
13307 numbers are themselves ascending. If CHECK_REGS is false, the register
13308 numbers are stored in the order they are found in the operands. */
13309 static int
13310 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13311 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13312 HOST_WIDE_INT *load_offset, bool check_regs)
13313 {
13314 int unsorted_regs[MAX_LDM_STM_OPS];
13315 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13316 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13317 int order[MAX_LDM_STM_OPS];
13318 int base_reg = -1;
13319 rtx base_reg_rtx = NULL;
13320 int i, stm_case;
13321
13322 /* Write back of base register is currently only supported for Thumb 1. */
13323 int base_writeback = TARGET_THUMB1;
13324
13325 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13326 easily extended if required. */
13327 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13328
13329 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13330
13331 /* Loop over the operands and check that the memory references are
13332 suitable (i.e. immediate offsets from the same base register). At
13333 the same time, extract the target register, and the memory
13334 offsets. */
13335 for (i = 0; i < nops; i++)
13336 {
13337 rtx reg;
13338 rtx offset;
13339
13340 /* Convert a subreg of a mem into the mem itself. */
13341 if (GET_CODE (operands[nops + i]) == SUBREG)
13342 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13343
13344 gcc_assert (MEM_P (operands[nops + i]));
13345
13346 /* Don't reorder volatile memory references; it doesn't seem worth
13347 looking for the case where the order is ok anyway. */
13348 if (MEM_VOLATILE_P (operands[nops + i]))
13349 return 0;
13350
13351 offset = const0_rtx;
13352
13353 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13354 || (GET_CODE (reg) == SUBREG
13355 && REG_P (reg = SUBREG_REG (reg))))
13356 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13357 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13358 || (GET_CODE (reg) == SUBREG
13359 && REG_P (reg = SUBREG_REG (reg))))
13360 && (CONST_INT_P (offset
13361 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13362 {
13363 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13364 ? operands[i] : SUBREG_REG (operands[i]));
13365 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13366
13367 if (i == 0)
13368 {
13369 base_reg = REGNO (reg);
13370 base_reg_rtx = reg;
13371 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13372 return 0;
13373 }
13374 else if (base_reg != (int) REGNO (reg))
13375 /* Not addressed from the same base register. */
13376 return 0;
13377
13378 /* If it isn't an integer register, then we can't do this. */
13379 if (unsorted_regs[i] < 0
13380 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13381 /* The effects are unpredictable if the base register is
13382 both updated and stored. */
13383 || (base_writeback && unsorted_regs[i] == base_reg)
13384 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13385 || unsorted_regs[i] > 14)
13386 return 0;
13387
13388 unsorted_offsets[i] = INTVAL (offset);
13389 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13390 order[0] = i;
13391 }
13392 else
13393 /* Not a suitable memory address. */
13394 return 0;
13395 }
13396
13397 /* All the useful information has now been extracted from the
13398 operands into unsorted_regs and unsorted_offsets; additionally,
13399 order[0] has been set to the lowest offset in the list. Sort
13400 the offsets into order, verifying that they are adjacent, and
13401 check that the register numbers are ascending. */
13402 if (!compute_offset_order (nops, unsorted_offsets, order,
13403 check_regs ? unsorted_regs : NULL))
13404 return 0;
13405
13406 if (saved_order)
13407 memcpy (saved_order, order, sizeof order);
13408
13409 if (base)
13410 {
13411 *base = base_reg;
13412
13413 for (i = 0; i < nops; i++)
13414 {
13415 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13416 if (reg_rtxs)
13417 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13418 }
13419
13420 *load_offset = unsorted_offsets[order[0]];
13421 }
13422
13423 if (TARGET_THUMB1
13424 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13425 return 0;
13426
13427 if (unsorted_offsets[order[0]] == 0)
13428 stm_case = 1; /* stmia */
13429 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13430 stm_case = 2; /* stmib */
13431 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13432 stm_case = 3; /* stmda */
13433 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13434 stm_case = 4; /* stmdb */
13435 else
13436 return 0;
13437
13438 if (!multiple_operation_profitable_p (false, nops, 0))
13439 return 0;
13440
13441 return stm_case;
13442 }
13443 \f
13444 /* Routines for use in generating RTL. */
13445
13446 /* Generate a load-multiple instruction. COUNT is the number of loads in
13447 the instruction; REGS and MEMS are arrays containing the operands.
13448 BASEREG is the base register to be used in addressing the memory operands.
13449 WBACK_OFFSET is nonzero if the instruction should update the base
13450 register. */
13451
13452 static rtx
13453 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13454 HOST_WIDE_INT wback_offset)
13455 {
13456 int i = 0, j;
13457 rtx result;
13458
13459 if (!multiple_operation_profitable_p (false, count, 0))
13460 {
13461 rtx seq;
13462
13463 start_sequence ();
13464
13465 for (i = 0; i < count; i++)
13466 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13467
13468 if (wback_offset != 0)
13469 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13470
13471 seq = get_insns ();
13472 end_sequence ();
13473
13474 return seq;
13475 }
13476
13477 result = gen_rtx_PARALLEL (VOIDmode,
13478 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13479 if (wback_offset != 0)
13480 {
13481 XVECEXP (result, 0, 0)
13482 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13483 i = 1;
13484 count++;
13485 }
13486
13487 for (j = 0; i < count; i++, j++)
13488 XVECEXP (result, 0, i)
13489 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13490
13491 return result;
13492 }
13493
13494 /* Generate a store-multiple instruction. COUNT is the number of stores in
13495 the instruction; REGS and MEMS are arrays containing the operands.
13496 BASEREG is the base register to be used in addressing the memory operands.
13497 WBACK_OFFSET is nonzero if the instruction should update the base
13498 register. */
13499
13500 static rtx
13501 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13502 HOST_WIDE_INT wback_offset)
13503 {
13504 int i = 0, j;
13505 rtx result;
13506
13507 if (GET_CODE (basereg) == PLUS)
13508 basereg = XEXP (basereg, 0);
13509
13510 if (!multiple_operation_profitable_p (false, count, 0))
13511 {
13512 rtx seq;
13513
13514 start_sequence ();
13515
13516 for (i = 0; i < count; i++)
13517 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13518
13519 if (wback_offset != 0)
13520 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13521
13522 seq = get_insns ();
13523 end_sequence ();
13524
13525 return seq;
13526 }
13527
13528 result = gen_rtx_PARALLEL (VOIDmode,
13529 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13530 if (wback_offset != 0)
13531 {
13532 XVECEXP (result, 0, 0)
13533 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13534 i = 1;
13535 count++;
13536 }
13537
13538 for (j = 0; i < count; i++, j++)
13539 XVECEXP (result, 0, i)
13540 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13541
13542 return result;
13543 }
13544
13545 /* Generate either a load-multiple or a store-multiple instruction. This
13546 function can be used in situations where we can start with a single MEM
13547 rtx and adjust its address upwards.
13548 COUNT is the number of operations in the instruction, not counting a
13549 possible update of the base register. REGS is an array containing the
13550 register operands.
13551 BASEREG is the base register to be used in addressing the memory operands,
13552 which are constructed from BASEMEM.
13553 WRITE_BACK specifies whether the generated instruction should include an
13554 update of the base register.
13555 OFFSETP is used to pass an offset to and from this function; this offset
13556 is not used when constructing the address (instead BASEMEM should have an
13557 appropriate offset in its address), it is used only for setting
13558 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13559
13560 static rtx
13561 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13562 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13563 {
13564 rtx mems[MAX_LDM_STM_OPS];
13565 HOST_WIDE_INT offset = *offsetp;
13566 int i;
13567
13568 gcc_assert (count <= MAX_LDM_STM_OPS);
13569
13570 if (GET_CODE (basereg) == PLUS)
13571 basereg = XEXP (basereg, 0);
13572
13573 for (i = 0; i < count; i++)
13574 {
13575 rtx addr = plus_constant (Pmode, basereg, i * 4);
13576 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13577 offset += 4;
13578 }
13579
13580 if (write_back)
13581 *offsetp = offset;
13582
13583 if (is_load)
13584 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13585 write_back ? 4 * count : 0);
13586 else
13587 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13588 write_back ? 4 * count : 0);
13589 }
13590
13591 rtx
13592 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13593 rtx basemem, HOST_WIDE_INT *offsetp)
13594 {
13595 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13596 offsetp);
13597 }
13598
13599 rtx
13600 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13601 rtx basemem, HOST_WIDE_INT *offsetp)
13602 {
13603 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13604 offsetp);
13605 }
13606
13607 /* Called from a peephole2 expander to turn a sequence of loads into an
13608 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13609 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13610 is true if we can reorder the registers because they are used commutatively
13611 subsequently.
13612 Returns true iff we could generate a new instruction. */
13613
13614 bool
13615 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13616 {
13617 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13618 rtx mems[MAX_LDM_STM_OPS];
13619 int i, j, base_reg;
13620 rtx base_reg_rtx;
13621 HOST_WIDE_INT offset;
13622 int write_back = FALSE;
13623 int ldm_case;
13624 rtx addr;
13625
13626 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13627 &base_reg, &offset, !sort_regs);
13628
13629 if (ldm_case == 0)
13630 return false;
13631
13632 if (sort_regs)
13633 for (i = 0; i < nops - 1; i++)
13634 for (j = i + 1; j < nops; j++)
13635 if (regs[i] > regs[j])
13636 {
13637 int t = regs[i];
13638 regs[i] = regs[j];
13639 regs[j] = t;
13640 }
13641 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13642
13643 if (TARGET_THUMB1)
13644 {
13645 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13646 gcc_assert (ldm_case == 1 || ldm_case == 5);
13647 write_back = TRUE;
13648 }
13649
13650 if (ldm_case == 5)
13651 {
13652 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13653 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13654 offset = 0;
13655 if (!TARGET_THUMB1)
13656 base_reg_rtx = newbase;
13657 }
13658
13659 for (i = 0; i < nops; i++)
13660 {
13661 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13662 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13663 SImode, addr, 0);
13664 }
13665 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13666 write_back ? offset + i * 4 : 0));
13667 return true;
13668 }
13669
13670 /* Called from a peephole2 expander to turn a sequence of stores into an
13671 STM instruction. OPERANDS are the operands found by the peephole matcher;
13672 NOPS indicates how many separate stores we are trying to combine.
13673 Returns true iff we could generate a new instruction. */
13674
13675 bool
13676 gen_stm_seq (rtx *operands, int nops)
13677 {
13678 int i;
13679 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13680 rtx mems[MAX_LDM_STM_OPS];
13681 int base_reg;
13682 rtx base_reg_rtx;
13683 HOST_WIDE_INT offset;
13684 int write_back = FALSE;
13685 int stm_case;
13686 rtx addr;
13687 bool base_reg_dies;
13688
13689 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13690 mem_order, &base_reg, &offset, true);
13691
13692 if (stm_case == 0)
13693 return false;
13694
13695 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13696
13697 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13698 if (TARGET_THUMB1)
13699 {
13700 gcc_assert (base_reg_dies);
13701 write_back = TRUE;
13702 }
13703
13704 if (stm_case == 5)
13705 {
13706 gcc_assert (base_reg_dies);
13707 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13708 offset = 0;
13709 }
13710
13711 addr = plus_constant (Pmode, base_reg_rtx, offset);
13712
13713 for (i = 0; i < nops; i++)
13714 {
13715 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13716 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13717 SImode, addr, 0);
13718 }
13719 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13720 write_back ? offset + i * 4 : 0));
13721 return true;
13722 }
13723
13724 /* Called from a peephole2 expander to turn a sequence of stores that are
13725 preceded by constant loads into an STM instruction. OPERANDS are the
13726 operands found by the peephole matcher; NOPS indicates how many
13727 separate stores we are trying to combine; there are 2 * NOPS
13728 instructions in the peephole.
13729 Returns true iff we could generate a new instruction. */
13730
13731 bool
13732 gen_const_stm_seq (rtx *operands, int nops)
13733 {
13734 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13735 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13736 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13737 rtx mems[MAX_LDM_STM_OPS];
13738 int base_reg;
13739 rtx base_reg_rtx;
13740 HOST_WIDE_INT offset;
13741 int write_back = FALSE;
13742 int stm_case;
13743 rtx addr;
13744 bool base_reg_dies;
13745 int i, j;
13746 HARD_REG_SET allocated;
13747
13748 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13749 mem_order, &base_reg, &offset, false);
13750
13751 if (stm_case == 0)
13752 return false;
13753
13754 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13755
13756 /* If the same register is used more than once, try to find a free
13757 register. */
13758 CLEAR_HARD_REG_SET (allocated);
13759 for (i = 0; i < nops; i++)
13760 {
13761 for (j = i + 1; j < nops; j++)
13762 if (regs[i] == regs[j])
13763 {
13764 rtx t = peep2_find_free_register (0, nops * 2,
13765 TARGET_THUMB1 ? "l" : "r",
13766 SImode, &allocated);
13767 if (t == NULL_RTX)
13768 return false;
13769 reg_rtxs[i] = t;
13770 regs[i] = REGNO (t);
13771 }
13772 }
13773
13774 /* Compute an ordering that maps the register numbers to an ascending
13775 sequence. */
13776 reg_order[0] = 0;
13777 for (i = 0; i < nops; i++)
13778 if (regs[i] < regs[reg_order[0]])
13779 reg_order[0] = i;
13780
13781 for (i = 1; i < nops; i++)
13782 {
13783 int this_order = reg_order[i - 1];
13784 for (j = 0; j < nops; j++)
13785 if (regs[j] > regs[reg_order[i - 1]]
13786 && (this_order == reg_order[i - 1]
13787 || regs[j] < regs[this_order]))
13788 this_order = j;
13789 reg_order[i] = this_order;
13790 }
13791
13792 /* Ensure that registers that must be live after the instruction end
13793 up with the correct value. */
13794 for (i = 0; i < nops; i++)
13795 {
13796 int this_order = reg_order[i];
13797 if ((this_order != mem_order[i]
13798 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13799 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13800 return false;
13801 }
13802
13803 /* Load the constants. */
13804 for (i = 0; i < nops; i++)
13805 {
13806 rtx op = operands[2 * nops + mem_order[i]];
13807 sorted_regs[i] = regs[reg_order[i]];
13808 emit_move_insn (reg_rtxs[reg_order[i]], op);
13809 }
13810
13811 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13812
13813 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13814 if (TARGET_THUMB1)
13815 {
13816 gcc_assert (base_reg_dies);
13817 write_back = TRUE;
13818 }
13819
13820 if (stm_case == 5)
13821 {
13822 gcc_assert (base_reg_dies);
13823 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13824 offset = 0;
13825 }
13826
13827 addr = plus_constant (Pmode, base_reg_rtx, offset);
13828
13829 for (i = 0; i < nops; i++)
13830 {
13831 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13832 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13833 SImode, addr, 0);
13834 }
13835 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13836 write_back ? offset + i * 4 : 0));
13837 return true;
13838 }
13839
13840 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13841 unaligned copies on processors which support unaligned semantics for those
13842 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13843 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13844 An interleave factor of 1 (the minimum) will perform no interleaving.
13845 Load/store multiple are used for aligned addresses where possible. */
13846
13847 static void
13848 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13849 HOST_WIDE_INT length,
13850 unsigned int interleave_factor)
13851 {
13852 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13853 int *regnos = XALLOCAVEC (int, interleave_factor);
13854 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13855 HOST_WIDE_INT i, j;
13856 HOST_WIDE_INT remaining = length, words;
13857 rtx halfword_tmp = NULL, byte_tmp = NULL;
13858 rtx dst, src;
13859 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13860 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13861 HOST_WIDE_INT srcoffset, dstoffset;
13862 HOST_WIDE_INT src_autoinc, dst_autoinc;
13863 rtx mem, addr;
13864
13865 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13866
13867 /* Use hard registers if we have aligned source or destination so we can use
13868 load/store multiple with contiguous registers. */
13869 if (dst_aligned || src_aligned)
13870 for (i = 0; i < interleave_factor; i++)
13871 regs[i] = gen_rtx_REG (SImode, i);
13872 else
13873 for (i = 0; i < interleave_factor; i++)
13874 regs[i] = gen_reg_rtx (SImode);
13875
13876 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13877 src = copy_addr_to_reg (XEXP (srcbase, 0));
13878
13879 srcoffset = dstoffset = 0;
13880
13881 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13882 For copying the last bytes we want to subtract this offset again. */
13883 src_autoinc = dst_autoinc = 0;
13884
13885 for (i = 0; i < interleave_factor; i++)
13886 regnos[i] = i;
13887
13888 /* Copy BLOCK_SIZE_BYTES chunks. */
13889
13890 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13891 {
13892 /* Load words. */
13893 if (src_aligned && interleave_factor > 1)
13894 {
13895 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13896 TRUE, srcbase, &srcoffset));
13897 src_autoinc += UNITS_PER_WORD * interleave_factor;
13898 }
13899 else
13900 {
13901 for (j = 0; j < interleave_factor; j++)
13902 {
13903 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13904 - src_autoinc));
13905 mem = adjust_automodify_address (srcbase, SImode, addr,
13906 srcoffset + j * UNITS_PER_WORD);
13907 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13908 }
13909 srcoffset += block_size_bytes;
13910 }
13911
13912 /* Store words. */
13913 if (dst_aligned && interleave_factor > 1)
13914 {
13915 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13916 TRUE, dstbase, &dstoffset));
13917 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13918 }
13919 else
13920 {
13921 for (j = 0; j < interleave_factor; j++)
13922 {
13923 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13924 - dst_autoinc));
13925 mem = adjust_automodify_address (dstbase, SImode, addr,
13926 dstoffset + j * UNITS_PER_WORD);
13927 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13928 }
13929 dstoffset += block_size_bytes;
13930 }
13931
13932 remaining -= block_size_bytes;
13933 }
13934
13935 /* Copy any whole words left (note these aren't interleaved with any
13936 subsequent halfword/byte load/stores in the interests of simplicity). */
13937
13938 words = remaining / UNITS_PER_WORD;
13939
13940 gcc_assert (words < interleave_factor);
13941
13942 if (src_aligned && words > 1)
13943 {
13944 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13945 &srcoffset));
13946 src_autoinc += UNITS_PER_WORD * words;
13947 }
13948 else
13949 {
13950 for (j = 0; j < words; j++)
13951 {
13952 addr = plus_constant (Pmode, src,
13953 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13954 mem = adjust_automodify_address (srcbase, SImode, addr,
13955 srcoffset + j * UNITS_PER_WORD);
13956 if (src_aligned)
13957 emit_move_insn (regs[j], mem);
13958 else
13959 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13960 }
13961 srcoffset += words * UNITS_PER_WORD;
13962 }
13963
13964 if (dst_aligned && words > 1)
13965 {
13966 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13967 &dstoffset));
13968 dst_autoinc += words * UNITS_PER_WORD;
13969 }
13970 else
13971 {
13972 for (j = 0; j < words; j++)
13973 {
13974 addr = plus_constant (Pmode, dst,
13975 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13976 mem = adjust_automodify_address (dstbase, SImode, addr,
13977 dstoffset + j * UNITS_PER_WORD);
13978 if (dst_aligned)
13979 emit_move_insn (mem, regs[j]);
13980 else
13981 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13982 }
13983 dstoffset += words * UNITS_PER_WORD;
13984 }
13985
13986 remaining -= words * UNITS_PER_WORD;
13987
13988 gcc_assert (remaining < 4);
13989
13990 /* Copy a halfword if necessary. */
13991
13992 if (remaining >= 2)
13993 {
13994 halfword_tmp = gen_reg_rtx (SImode);
13995
13996 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13997 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13998 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13999
14000 /* Either write out immediately, or delay until we've loaded the last
14001 byte, depending on interleave factor. */
14002 if (interleave_factor == 1)
14003 {
14004 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14005 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14006 emit_insn (gen_unaligned_storehi (mem,
14007 gen_lowpart (HImode, halfword_tmp)));
14008 halfword_tmp = NULL;
14009 dstoffset += 2;
14010 }
14011
14012 remaining -= 2;
14013 srcoffset += 2;
14014 }
14015
14016 gcc_assert (remaining < 2);
14017
14018 /* Copy last byte. */
14019
14020 if ((remaining & 1) != 0)
14021 {
14022 byte_tmp = gen_reg_rtx (SImode);
14023
14024 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14025 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14026 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14027
14028 if (interleave_factor == 1)
14029 {
14030 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14031 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14032 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14033 byte_tmp = NULL;
14034 dstoffset++;
14035 }
14036
14037 remaining--;
14038 srcoffset++;
14039 }
14040
14041 /* Store last halfword if we haven't done so already. */
14042
14043 if (halfword_tmp)
14044 {
14045 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14046 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14047 emit_insn (gen_unaligned_storehi (mem,
14048 gen_lowpart (HImode, halfword_tmp)));
14049 dstoffset += 2;
14050 }
14051
14052 /* Likewise for last byte. */
14053
14054 if (byte_tmp)
14055 {
14056 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14057 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14058 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14059 dstoffset++;
14060 }
14061
14062 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14063 }
14064
14065 /* From mips_adjust_block_mem:
14066
14067 Helper function for doing a loop-based block operation on memory
14068 reference MEM. Each iteration of the loop will operate on LENGTH
14069 bytes of MEM.
14070
14071 Create a new base register for use within the loop and point it to
14072 the start of MEM. Create a new memory reference that uses this
14073 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14074
14075 static void
14076 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14077 rtx *loop_mem)
14078 {
14079 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14080
14081 /* Although the new mem does not refer to a known location,
14082 it does keep up to LENGTH bytes of alignment. */
14083 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14084 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14085 }
14086
14087 /* From mips_block_move_loop:
14088
14089 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14090 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14091 the memory regions do not overlap. */
14092
14093 static void
14094 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14095 unsigned int interleave_factor,
14096 HOST_WIDE_INT bytes_per_iter)
14097 {
14098 rtx src_reg, dest_reg, final_src, test;
14099 HOST_WIDE_INT leftover;
14100
14101 leftover = length % bytes_per_iter;
14102 length -= leftover;
14103
14104 /* Create registers and memory references for use within the loop. */
14105 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14106 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14107
14108 /* Calculate the value that SRC_REG should have after the last iteration of
14109 the loop. */
14110 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14111 0, 0, OPTAB_WIDEN);
14112
14113 /* Emit the start of the loop. */
14114 rtx_code_label *label = gen_label_rtx ();
14115 emit_label (label);
14116
14117 /* Emit the loop body. */
14118 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14119 interleave_factor);
14120
14121 /* Move on to the next block. */
14122 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14123 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14124
14125 /* Emit the loop condition. */
14126 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14127 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14128
14129 /* Mop up any left-over bytes. */
14130 if (leftover)
14131 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14132 }
14133
14134 /* Emit a block move when either the source or destination is unaligned (not
14135 aligned to a four-byte boundary). This may need further tuning depending on
14136 core type, optimize_size setting, etc. */
14137
14138 static int
14139 arm_movmemqi_unaligned (rtx *operands)
14140 {
14141 HOST_WIDE_INT length = INTVAL (operands[2]);
14142
14143 if (optimize_size)
14144 {
14145 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14146 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14147 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14148 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14149 or dst_aligned though: allow more interleaving in those cases since the
14150 resulting code can be smaller. */
14151 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14152 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14153
14154 if (length > 12)
14155 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14156 interleave_factor, bytes_per_iter);
14157 else
14158 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14159 interleave_factor);
14160 }
14161 else
14162 {
14163 /* Note that the loop created by arm_block_move_unaligned_loop may be
14164 subject to loop unrolling, which makes tuning this condition a little
14165 redundant. */
14166 if (length > 32)
14167 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14168 else
14169 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14170 }
14171
14172 return 1;
14173 }
14174
14175 int
14176 arm_gen_movmemqi (rtx *operands)
14177 {
14178 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14179 HOST_WIDE_INT srcoffset, dstoffset;
14180 rtx src, dst, srcbase, dstbase;
14181 rtx part_bytes_reg = NULL;
14182 rtx mem;
14183
14184 if (!CONST_INT_P (operands[2])
14185 || !CONST_INT_P (operands[3])
14186 || INTVAL (operands[2]) > 64)
14187 return 0;
14188
14189 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14190 return arm_movmemqi_unaligned (operands);
14191
14192 if (INTVAL (operands[3]) & 3)
14193 return 0;
14194
14195 dstbase = operands[0];
14196 srcbase = operands[1];
14197
14198 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14199 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14200
14201 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14202 out_words_to_go = INTVAL (operands[2]) / 4;
14203 last_bytes = INTVAL (operands[2]) & 3;
14204 dstoffset = srcoffset = 0;
14205
14206 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14207 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14208
14209 while (in_words_to_go >= 2)
14210 {
14211 if (in_words_to_go > 4)
14212 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14213 TRUE, srcbase, &srcoffset));
14214 else
14215 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14216 src, FALSE, srcbase,
14217 &srcoffset));
14218
14219 if (out_words_to_go)
14220 {
14221 if (out_words_to_go > 4)
14222 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14223 TRUE, dstbase, &dstoffset));
14224 else if (out_words_to_go != 1)
14225 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14226 out_words_to_go, dst,
14227 (last_bytes == 0
14228 ? FALSE : TRUE),
14229 dstbase, &dstoffset));
14230 else
14231 {
14232 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14233 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14234 if (last_bytes != 0)
14235 {
14236 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14237 dstoffset += 4;
14238 }
14239 }
14240 }
14241
14242 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14243 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14244 }
14245
14246 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14247 if (out_words_to_go)
14248 {
14249 rtx sreg;
14250
14251 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14252 sreg = copy_to_reg (mem);
14253
14254 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14255 emit_move_insn (mem, sreg);
14256 in_words_to_go--;
14257
14258 gcc_assert (!in_words_to_go); /* Sanity check */
14259 }
14260
14261 if (in_words_to_go)
14262 {
14263 gcc_assert (in_words_to_go > 0);
14264
14265 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14266 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14267 }
14268
14269 gcc_assert (!last_bytes || part_bytes_reg);
14270
14271 if (BYTES_BIG_ENDIAN && last_bytes)
14272 {
14273 rtx tmp = gen_reg_rtx (SImode);
14274
14275 /* The bytes we want are in the top end of the word. */
14276 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14277 GEN_INT (8 * (4 - last_bytes))));
14278 part_bytes_reg = tmp;
14279
14280 while (last_bytes)
14281 {
14282 mem = adjust_automodify_address (dstbase, QImode,
14283 plus_constant (Pmode, dst,
14284 last_bytes - 1),
14285 dstoffset + last_bytes - 1);
14286 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14287
14288 if (--last_bytes)
14289 {
14290 tmp = gen_reg_rtx (SImode);
14291 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14292 part_bytes_reg = tmp;
14293 }
14294 }
14295
14296 }
14297 else
14298 {
14299 if (last_bytes > 1)
14300 {
14301 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14302 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14303 last_bytes -= 2;
14304 if (last_bytes)
14305 {
14306 rtx tmp = gen_reg_rtx (SImode);
14307 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14308 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14309 part_bytes_reg = tmp;
14310 dstoffset += 2;
14311 }
14312 }
14313
14314 if (last_bytes)
14315 {
14316 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14317 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14318 }
14319 }
14320
14321 return 1;
14322 }
14323
14324 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14325 by mode size. */
14326 inline static rtx
14327 next_consecutive_mem (rtx mem)
14328 {
14329 machine_mode mode = GET_MODE (mem);
14330 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14331 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14332
14333 return adjust_automodify_address (mem, mode, addr, offset);
14334 }
14335
14336 /* Copy using LDRD/STRD instructions whenever possible.
14337 Returns true upon success. */
14338 bool
14339 gen_movmem_ldrd_strd (rtx *operands)
14340 {
14341 unsigned HOST_WIDE_INT len;
14342 HOST_WIDE_INT align;
14343 rtx src, dst, base;
14344 rtx reg0;
14345 bool src_aligned, dst_aligned;
14346 bool src_volatile, dst_volatile;
14347
14348 gcc_assert (CONST_INT_P (operands[2]));
14349 gcc_assert (CONST_INT_P (operands[3]));
14350
14351 len = UINTVAL (operands[2]);
14352 if (len > 64)
14353 return false;
14354
14355 /* Maximum alignment we can assume for both src and dst buffers. */
14356 align = INTVAL (operands[3]);
14357
14358 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14359 return false;
14360
14361 /* Place src and dst addresses in registers
14362 and update the corresponding mem rtx. */
14363 dst = operands[0];
14364 dst_volatile = MEM_VOLATILE_P (dst);
14365 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14366 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14367 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14368
14369 src = operands[1];
14370 src_volatile = MEM_VOLATILE_P (src);
14371 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14372 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14373 src = adjust_automodify_address (src, VOIDmode, base, 0);
14374
14375 if (!unaligned_access && !(src_aligned && dst_aligned))
14376 return false;
14377
14378 if (src_volatile || dst_volatile)
14379 return false;
14380
14381 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14382 if (!(dst_aligned || src_aligned))
14383 return arm_gen_movmemqi (operands);
14384
14385 /* If the either src or dst is unaligned we'll be accessing it as pairs
14386 of unaligned SImode accesses. Otherwise we can generate DImode
14387 ldrd/strd instructions. */
14388 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14389 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14390
14391 while (len >= 8)
14392 {
14393 len -= 8;
14394 reg0 = gen_reg_rtx (DImode);
14395 rtx low_reg = NULL_RTX;
14396 rtx hi_reg = NULL_RTX;
14397
14398 if (!src_aligned || !dst_aligned)
14399 {
14400 low_reg = gen_lowpart (SImode, reg0);
14401 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14402 }
14403 if (src_aligned)
14404 emit_move_insn (reg0, src);
14405 else
14406 {
14407 emit_insn (gen_unaligned_loadsi (low_reg, src));
14408 src = next_consecutive_mem (src);
14409 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14410 }
14411
14412 if (dst_aligned)
14413 emit_move_insn (dst, reg0);
14414 else
14415 {
14416 emit_insn (gen_unaligned_storesi (dst, low_reg));
14417 dst = next_consecutive_mem (dst);
14418 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14419 }
14420
14421 src = next_consecutive_mem (src);
14422 dst = next_consecutive_mem (dst);
14423 }
14424
14425 gcc_assert (len < 8);
14426 if (len >= 4)
14427 {
14428 /* More than a word but less than a double-word to copy. Copy a word. */
14429 reg0 = gen_reg_rtx (SImode);
14430 src = adjust_address (src, SImode, 0);
14431 dst = adjust_address (dst, SImode, 0);
14432 if (src_aligned)
14433 emit_move_insn (reg0, src);
14434 else
14435 emit_insn (gen_unaligned_loadsi (reg0, src));
14436
14437 if (dst_aligned)
14438 emit_move_insn (dst, reg0);
14439 else
14440 emit_insn (gen_unaligned_storesi (dst, reg0));
14441
14442 src = next_consecutive_mem (src);
14443 dst = next_consecutive_mem (dst);
14444 len -= 4;
14445 }
14446
14447 if (len == 0)
14448 return true;
14449
14450 /* Copy the remaining bytes. */
14451 if (len >= 2)
14452 {
14453 dst = adjust_address (dst, HImode, 0);
14454 src = adjust_address (src, HImode, 0);
14455 reg0 = gen_reg_rtx (SImode);
14456 if (src_aligned)
14457 emit_insn (gen_zero_extendhisi2 (reg0, src));
14458 else
14459 emit_insn (gen_unaligned_loadhiu (reg0, src));
14460
14461 if (dst_aligned)
14462 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14463 else
14464 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14465
14466 src = next_consecutive_mem (src);
14467 dst = next_consecutive_mem (dst);
14468 if (len == 2)
14469 return true;
14470 }
14471
14472 dst = adjust_address (dst, QImode, 0);
14473 src = adjust_address (src, QImode, 0);
14474 reg0 = gen_reg_rtx (QImode);
14475 emit_move_insn (reg0, src);
14476 emit_move_insn (dst, reg0);
14477 return true;
14478 }
14479
14480 /* Select a dominance comparison mode if possible for a test of the general
14481 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14482 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14483 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14484 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14485 In all cases OP will be either EQ or NE, but we don't need to know which
14486 here. If we are unable to support a dominance comparison we return
14487 CC mode. This will then fail to match for the RTL expressions that
14488 generate this call. */
14489 machine_mode
14490 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14491 {
14492 enum rtx_code cond1, cond2;
14493 int swapped = 0;
14494
14495 /* Currently we will probably get the wrong result if the individual
14496 comparisons are not simple. This also ensures that it is safe to
14497 reverse a comparison if necessary. */
14498 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14499 != CCmode)
14500 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14501 != CCmode))
14502 return CCmode;
14503
14504 /* The if_then_else variant of this tests the second condition if the
14505 first passes, but is true if the first fails. Reverse the first
14506 condition to get a true "inclusive-or" expression. */
14507 if (cond_or == DOM_CC_NX_OR_Y)
14508 cond1 = reverse_condition (cond1);
14509
14510 /* If the comparisons are not equal, and one doesn't dominate the other,
14511 then we can't do this. */
14512 if (cond1 != cond2
14513 && !comparison_dominates_p (cond1, cond2)
14514 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14515 return CCmode;
14516
14517 if (swapped)
14518 std::swap (cond1, cond2);
14519
14520 switch (cond1)
14521 {
14522 case EQ:
14523 if (cond_or == DOM_CC_X_AND_Y)
14524 return CC_DEQmode;
14525
14526 switch (cond2)
14527 {
14528 case EQ: return CC_DEQmode;
14529 case LE: return CC_DLEmode;
14530 case LEU: return CC_DLEUmode;
14531 case GE: return CC_DGEmode;
14532 case GEU: return CC_DGEUmode;
14533 default: gcc_unreachable ();
14534 }
14535
14536 case LT:
14537 if (cond_or == DOM_CC_X_AND_Y)
14538 return CC_DLTmode;
14539
14540 switch (cond2)
14541 {
14542 case LT:
14543 return CC_DLTmode;
14544 case LE:
14545 return CC_DLEmode;
14546 case NE:
14547 return CC_DNEmode;
14548 default:
14549 gcc_unreachable ();
14550 }
14551
14552 case GT:
14553 if (cond_or == DOM_CC_X_AND_Y)
14554 return CC_DGTmode;
14555
14556 switch (cond2)
14557 {
14558 case GT:
14559 return CC_DGTmode;
14560 case GE:
14561 return CC_DGEmode;
14562 case NE:
14563 return CC_DNEmode;
14564 default:
14565 gcc_unreachable ();
14566 }
14567
14568 case LTU:
14569 if (cond_or == DOM_CC_X_AND_Y)
14570 return CC_DLTUmode;
14571
14572 switch (cond2)
14573 {
14574 case LTU:
14575 return CC_DLTUmode;
14576 case LEU:
14577 return CC_DLEUmode;
14578 case NE:
14579 return CC_DNEmode;
14580 default:
14581 gcc_unreachable ();
14582 }
14583
14584 case GTU:
14585 if (cond_or == DOM_CC_X_AND_Y)
14586 return CC_DGTUmode;
14587
14588 switch (cond2)
14589 {
14590 case GTU:
14591 return CC_DGTUmode;
14592 case GEU:
14593 return CC_DGEUmode;
14594 case NE:
14595 return CC_DNEmode;
14596 default:
14597 gcc_unreachable ();
14598 }
14599
14600 /* The remaining cases only occur when both comparisons are the
14601 same. */
14602 case NE:
14603 gcc_assert (cond1 == cond2);
14604 return CC_DNEmode;
14605
14606 case LE:
14607 gcc_assert (cond1 == cond2);
14608 return CC_DLEmode;
14609
14610 case GE:
14611 gcc_assert (cond1 == cond2);
14612 return CC_DGEmode;
14613
14614 case LEU:
14615 gcc_assert (cond1 == cond2);
14616 return CC_DLEUmode;
14617
14618 case GEU:
14619 gcc_assert (cond1 == cond2);
14620 return CC_DGEUmode;
14621
14622 default:
14623 gcc_unreachable ();
14624 }
14625 }
14626
14627 machine_mode
14628 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14629 {
14630 /* All floating point compares return CCFP if it is an equality
14631 comparison, and CCFPE otherwise. */
14632 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14633 {
14634 switch (op)
14635 {
14636 case EQ:
14637 case NE:
14638 case UNORDERED:
14639 case ORDERED:
14640 case UNLT:
14641 case UNLE:
14642 case UNGT:
14643 case UNGE:
14644 case UNEQ:
14645 case LTGT:
14646 return CCFPmode;
14647
14648 case LT:
14649 case LE:
14650 case GT:
14651 case GE:
14652 return CCFPEmode;
14653
14654 default:
14655 gcc_unreachable ();
14656 }
14657 }
14658
14659 /* A compare with a shifted operand. Because of canonicalization, the
14660 comparison will have to be swapped when we emit the assembler. */
14661 if (GET_MODE (y) == SImode
14662 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14663 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14664 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14665 || GET_CODE (x) == ROTATERT))
14666 return CC_SWPmode;
14667
14668 /* This operation is performed swapped, but since we only rely on the Z
14669 flag we don't need an additional mode. */
14670 if (GET_MODE (y) == SImode
14671 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14672 && GET_CODE (x) == NEG
14673 && (op == EQ || op == NE))
14674 return CC_Zmode;
14675
14676 /* This is a special case that is used by combine to allow a
14677 comparison of a shifted byte load to be split into a zero-extend
14678 followed by a comparison of the shifted integer (only valid for
14679 equalities and unsigned inequalities). */
14680 if (GET_MODE (x) == SImode
14681 && GET_CODE (x) == ASHIFT
14682 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14683 && GET_CODE (XEXP (x, 0)) == SUBREG
14684 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14685 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14686 && (op == EQ || op == NE
14687 || op == GEU || op == GTU || op == LTU || op == LEU)
14688 && CONST_INT_P (y))
14689 return CC_Zmode;
14690
14691 /* A construct for a conditional compare, if the false arm contains
14692 0, then both conditions must be true, otherwise either condition
14693 must be true. Not all conditions are possible, so CCmode is
14694 returned if it can't be done. */
14695 if (GET_CODE (x) == IF_THEN_ELSE
14696 && (XEXP (x, 2) == const0_rtx
14697 || XEXP (x, 2) == const1_rtx)
14698 && COMPARISON_P (XEXP (x, 0))
14699 && COMPARISON_P (XEXP (x, 1)))
14700 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14701 INTVAL (XEXP (x, 2)));
14702
14703 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14704 if (GET_CODE (x) == AND
14705 && (op == EQ || op == NE)
14706 && COMPARISON_P (XEXP (x, 0))
14707 && COMPARISON_P (XEXP (x, 1)))
14708 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14709 DOM_CC_X_AND_Y);
14710
14711 if (GET_CODE (x) == IOR
14712 && (op == EQ || op == NE)
14713 && COMPARISON_P (XEXP (x, 0))
14714 && COMPARISON_P (XEXP (x, 1)))
14715 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14716 DOM_CC_X_OR_Y);
14717
14718 /* An operation (on Thumb) where we want to test for a single bit.
14719 This is done by shifting that bit up into the top bit of a
14720 scratch register; we can then branch on the sign bit. */
14721 if (TARGET_THUMB1
14722 && GET_MODE (x) == SImode
14723 && (op == EQ || op == NE)
14724 && GET_CODE (x) == ZERO_EXTRACT
14725 && XEXP (x, 1) == const1_rtx)
14726 return CC_Nmode;
14727
14728 /* An operation that sets the condition codes as a side-effect, the
14729 V flag is not set correctly, so we can only use comparisons where
14730 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14731 instead.) */
14732 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14733 if (GET_MODE (x) == SImode
14734 && y == const0_rtx
14735 && (op == EQ || op == NE || op == LT || op == GE)
14736 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14737 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14738 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14739 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14740 || GET_CODE (x) == LSHIFTRT
14741 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14742 || GET_CODE (x) == ROTATERT
14743 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14744 return CC_NOOVmode;
14745
14746 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14747 return CC_Zmode;
14748
14749 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14750 && GET_CODE (x) == PLUS
14751 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14752 return CC_Cmode;
14753
14754 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14755 {
14756 switch (op)
14757 {
14758 case EQ:
14759 case NE:
14760 /* A DImode comparison against zero can be implemented by
14761 or'ing the two halves together. */
14762 if (y == const0_rtx)
14763 return CC_Zmode;
14764
14765 /* We can do an equality test in three Thumb instructions. */
14766 if (!TARGET_32BIT)
14767 return CC_Zmode;
14768
14769 /* FALLTHROUGH */
14770
14771 case LTU:
14772 case LEU:
14773 case GTU:
14774 case GEU:
14775 /* DImode unsigned comparisons can be implemented by cmp +
14776 cmpeq without a scratch register. Not worth doing in
14777 Thumb-2. */
14778 if (TARGET_32BIT)
14779 return CC_CZmode;
14780
14781 /* FALLTHROUGH */
14782
14783 case LT:
14784 case LE:
14785 case GT:
14786 case GE:
14787 /* DImode signed and unsigned comparisons can be implemented
14788 by cmp + sbcs with a scratch register, but that does not
14789 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14790 gcc_assert (op != EQ && op != NE);
14791 return CC_NCVmode;
14792
14793 default:
14794 gcc_unreachable ();
14795 }
14796 }
14797
14798 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14799 return GET_MODE (x);
14800
14801 return CCmode;
14802 }
14803
14804 /* X and Y are two things to compare using CODE. Emit the compare insn and
14805 return the rtx for register 0 in the proper mode. FP means this is a
14806 floating point compare: I don't think that it is needed on the arm. */
14807 rtx
14808 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14809 {
14810 machine_mode mode;
14811 rtx cc_reg;
14812 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14813
14814 /* We might have X as a constant, Y as a register because of the predicates
14815 used for cmpdi. If so, force X to a register here. */
14816 if (dimode_comparison && !REG_P (x))
14817 x = force_reg (DImode, x);
14818
14819 mode = SELECT_CC_MODE (code, x, y);
14820 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14821
14822 if (dimode_comparison
14823 && mode != CC_CZmode)
14824 {
14825 rtx clobber, set;
14826
14827 /* To compare two non-zero values for equality, XOR them and
14828 then compare against zero. Not used for ARM mode; there
14829 CC_CZmode is cheaper. */
14830 if (mode == CC_Zmode && y != const0_rtx)
14831 {
14832 gcc_assert (!reload_completed);
14833 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14834 y = const0_rtx;
14835 }
14836
14837 /* A scratch register is required. */
14838 if (reload_completed)
14839 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14840 else
14841 scratch = gen_rtx_SCRATCH (SImode);
14842
14843 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14844 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14845 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14846 }
14847 else
14848 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14849
14850 return cc_reg;
14851 }
14852
14853 /* Generate a sequence of insns that will generate the correct return
14854 address mask depending on the physical architecture that the program
14855 is running on. */
14856 rtx
14857 arm_gen_return_addr_mask (void)
14858 {
14859 rtx reg = gen_reg_rtx (Pmode);
14860
14861 emit_insn (gen_return_addr_mask (reg));
14862 return reg;
14863 }
14864
14865 void
14866 arm_reload_in_hi (rtx *operands)
14867 {
14868 rtx ref = operands[1];
14869 rtx base, scratch;
14870 HOST_WIDE_INT offset = 0;
14871
14872 if (GET_CODE (ref) == SUBREG)
14873 {
14874 offset = SUBREG_BYTE (ref);
14875 ref = SUBREG_REG (ref);
14876 }
14877
14878 if (REG_P (ref))
14879 {
14880 /* We have a pseudo which has been spilt onto the stack; there
14881 are two cases here: the first where there is a simple
14882 stack-slot replacement and a second where the stack-slot is
14883 out of range, or is used as a subreg. */
14884 if (reg_equiv_mem (REGNO (ref)))
14885 {
14886 ref = reg_equiv_mem (REGNO (ref));
14887 base = find_replacement (&XEXP (ref, 0));
14888 }
14889 else
14890 /* The slot is out of range, or was dressed up in a SUBREG. */
14891 base = reg_equiv_address (REGNO (ref));
14892
14893 /* PR 62554: If there is no equivalent memory location then just move
14894 the value as an SImode register move. This happens when the target
14895 architecture variant does not have an HImode register move. */
14896 if (base == NULL)
14897 {
14898 gcc_assert (REG_P (operands[0]));
14899 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14900 gen_rtx_SUBREG (SImode, ref, 0)));
14901 return;
14902 }
14903 }
14904 else
14905 base = find_replacement (&XEXP (ref, 0));
14906
14907 /* Handle the case where the address is too complex to be offset by 1. */
14908 if (GET_CODE (base) == MINUS
14909 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14910 {
14911 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14912
14913 emit_set_insn (base_plus, base);
14914 base = base_plus;
14915 }
14916 else if (GET_CODE (base) == PLUS)
14917 {
14918 /* The addend must be CONST_INT, or we would have dealt with it above. */
14919 HOST_WIDE_INT hi, lo;
14920
14921 offset += INTVAL (XEXP (base, 1));
14922 base = XEXP (base, 0);
14923
14924 /* Rework the address into a legal sequence of insns. */
14925 /* Valid range for lo is -4095 -> 4095 */
14926 lo = (offset >= 0
14927 ? (offset & 0xfff)
14928 : -((-offset) & 0xfff));
14929
14930 /* Corner case, if lo is the max offset then we would be out of range
14931 once we have added the additional 1 below, so bump the msb into the
14932 pre-loading insn(s). */
14933 if (lo == 4095)
14934 lo &= 0x7ff;
14935
14936 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14937 ^ (HOST_WIDE_INT) 0x80000000)
14938 - (HOST_WIDE_INT) 0x80000000);
14939
14940 gcc_assert (hi + lo == offset);
14941
14942 if (hi != 0)
14943 {
14944 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14945
14946 /* Get the base address; addsi3 knows how to handle constants
14947 that require more than one insn. */
14948 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14949 base = base_plus;
14950 offset = lo;
14951 }
14952 }
14953
14954 /* Operands[2] may overlap operands[0] (though it won't overlap
14955 operands[1]), that's why we asked for a DImode reg -- so we can
14956 use the bit that does not overlap. */
14957 if (REGNO (operands[2]) == REGNO (operands[0]))
14958 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14959 else
14960 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14961
14962 emit_insn (gen_zero_extendqisi2 (scratch,
14963 gen_rtx_MEM (QImode,
14964 plus_constant (Pmode, base,
14965 offset))));
14966 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14967 gen_rtx_MEM (QImode,
14968 plus_constant (Pmode, base,
14969 offset + 1))));
14970 if (!BYTES_BIG_ENDIAN)
14971 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14972 gen_rtx_IOR (SImode,
14973 gen_rtx_ASHIFT
14974 (SImode,
14975 gen_rtx_SUBREG (SImode, operands[0], 0),
14976 GEN_INT (8)),
14977 scratch));
14978 else
14979 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14980 gen_rtx_IOR (SImode,
14981 gen_rtx_ASHIFT (SImode, scratch,
14982 GEN_INT (8)),
14983 gen_rtx_SUBREG (SImode, operands[0], 0)));
14984 }
14985
14986 /* Handle storing a half-word to memory during reload by synthesizing as two
14987 byte stores. Take care not to clobber the input values until after we
14988 have moved them somewhere safe. This code assumes that if the DImode
14989 scratch in operands[2] overlaps either the input value or output address
14990 in some way, then that value must die in this insn (we absolutely need
14991 two scratch registers for some corner cases). */
14992 void
14993 arm_reload_out_hi (rtx *operands)
14994 {
14995 rtx ref = operands[0];
14996 rtx outval = operands[1];
14997 rtx base, scratch;
14998 HOST_WIDE_INT offset = 0;
14999
15000 if (GET_CODE (ref) == SUBREG)
15001 {
15002 offset = SUBREG_BYTE (ref);
15003 ref = SUBREG_REG (ref);
15004 }
15005
15006 if (REG_P (ref))
15007 {
15008 /* We have a pseudo which has been spilt onto the stack; there
15009 are two cases here: the first where there is a simple
15010 stack-slot replacement and a second where the stack-slot is
15011 out of range, or is used as a subreg. */
15012 if (reg_equiv_mem (REGNO (ref)))
15013 {
15014 ref = reg_equiv_mem (REGNO (ref));
15015 base = find_replacement (&XEXP (ref, 0));
15016 }
15017 else
15018 /* The slot is out of range, or was dressed up in a SUBREG. */
15019 base = reg_equiv_address (REGNO (ref));
15020
15021 /* PR 62254: If there is no equivalent memory location then just move
15022 the value as an SImode register move. This happens when the target
15023 architecture variant does not have an HImode register move. */
15024 if (base == NULL)
15025 {
15026 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15027
15028 if (REG_P (outval))
15029 {
15030 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15031 gen_rtx_SUBREG (SImode, outval, 0)));
15032 }
15033 else /* SUBREG_P (outval) */
15034 {
15035 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15036 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15037 SUBREG_REG (outval)));
15038 else
15039 /* FIXME: Handle other cases ? */
15040 gcc_unreachable ();
15041 }
15042 return;
15043 }
15044 }
15045 else
15046 base = find_replacement (&XEXP (ref, 0));
15047
15048 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15049
15050 /* Handle the case where the address is too complex to be offset by 1. */
15051 if (GET_CODE (base) == MINUS
15052 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15053 {
15054 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15055
15056 /* Be careful not to destroy OUTVAL. */
15057 if (reg_overlap_mentioned_p (base_plus, outval))
15058 {
15059 /* Updating base_plus might destroy outval, see if we can
15060 swap the scratch and base_plus. */
15061 if (!reg_overlap_mentioned_p (scratch, outval))
15062 std::swap (scratch, base_plus);
15063 else
15064 {
15065 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15066
15067 /* Be conservative and copy OUTVAL into the scratch now,
15068 this should only be necessary if outval is a subreg
15069 of something larger than a word. */
15070 /* XXX Might this clobber base? I can't see how it can,
15071 since scratch is known to overlap with OUTVAL, and
15072 must be wider than a word. */
15073 emit_insn (gen_movhi (scratch_hi, outval));
15074 outval = scratch_hi;
15075 }
15076 }
15077
15078 emit_set_insn (base_plus, base);
15079 base = base_plus;
15080 }
15081 else if (GET_CODE (base) == PLUS)
15082 {
15083 /* The addend must be CONST_INT, or we would have dealt with it above. */
15084 HOST_WIDE_INT hi, lo;
15085
15086 offset += INTVAL (XEXP (base, 1));
15087 base = XEXP (base, 0);
15088
15089 /* Rework the address into a legal sequence of insns. */
15090 /* Valid range for lo is -4095 -> 4095 */
15091 lo = (offset >= 0
15092 ? (offset & 0xfff)
15093 : -((-offset) & 0xfff));
15094
15095 /* Corner case, if lo is the max offset then we would be out of range
15096 once we have added the additional 1 below, so bump the msb into the
15097 pre-loading insn(s). */
15098 if (lo == 4095)
15099 lo &= 0x7ff;
15100
15101 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15102 ^ (HOST_WIDE_INT) 0x80000000)
15103 - (HOST_WIDE_INT) 0x80000000);
15104
15105 gcc_assert (hi + lo == offset);
15106
15107 if (hi != 0)
15108 {
15109 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15110
15111 /* Be careful not to destroy OUTVAL. */
15112 if (reg_overlap_mentioned_p (base_plus, outval))
15113 {
15114 /* Updating base_plus might destroy outval, see if we
15115 can swap the scratch and base_plus. */
15116 if (!reg_overlap_mentioned_p (scratch, outval))
15117 std::swap (scratch, base_plus);
15118 else
15119 {
15120 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15121
15122 /* Be conservative and copy outval into scratch now,
15123 this should only be necessary if outval is a
15124 subreg of something larger than a word. */
15125 /* XXX Might this clobber base? I can't see how it
15126 can, since scratch is known to overlap with
15127 outval. */
15128 emit_insn (gen_movhi (scratch_hi, outval));
15129 outval = scratch_hi;
15130 }
15131 }
15132
15133 /* Get the base address; addsi3 knows how to handle constants
15134 that require more than one insn. */
15135 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15136 base = base_plus;
15137 offset = lo;
15138 }
15139 }
15140
15141 if (BYTES_BIG_ENDIAN)
15142 {
15143 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15144 plus_constant (Pmode, base,
15145 offset + 1)),
15146 gen_lowpart (QImode, outval)));
15147 emit_insn (gen_lshrsi3 (scratch,
15148 gen_rtx_SUBREG (SImode, outval, 0),
15149 GEN_INT (8)));
15150 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15151 offset)),
15152 gen_lowpart (QImode, scratch)));
15153 }
15154 else
15155 {
15156 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15157 offset)),
15158 gen_lowpart (QImode, outval)));
15159 emit_insn (gen_lshrsi3 (scratch,
15160 gen_rtx_SUBREG (SImode, outval, 0),
15161 GEN_INT (8)));
15162 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15163 plus_constant (Pmode, base,
15164 offset + 1)),
15165 gen_lowpart (QImode, scratch)));
15166 }
15167 }
15168
15169 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15170 (padded to the size of a word) should be passed in a register. */
15171
15172 static bool
15173 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15174 {
15175 if (TARGET_AAPCS_BASED)
15176 return must_pass_in_stack_var_size (mode, type);
15177 else
15178 return must_pass_in_stack_var_size_or_pad (mode, type);
15179 }
15180
15181
15182 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15183 byte of a stack argument has useful data. For legacy APCS ABIs we use
15184 the default. For AAPCS based ABIs small aggregate types are placed
15185 in the lowest memory address. */
15186
15187 static pad_direction
15188 arm_function_arg_padding (machine_mode mode, const_tree type)
15189 {
15190 if (!TARGET_AAPCS_BASED)
15191 return default_function_arg_padding (mode, type);
15192
15193 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15194 return PAD_DOWNWARD;
15195
15196 return PAD_UPWARD;
15197 }
15198
15199
15200 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15201 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15202 register has useful data, and return the opposite if the most
15203 significant byte does. */
15204
15205 bool
15206 arm_pad_reg_upward (machine_mode mode,
15207 tree type, int first ATTRIBUTE_UNUSED)
15208 {
15209 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15210 {
15211 /* For AAPCS, small aggregates, small fixed-point types,
15212 and small complex types are always padded upwards. */
15213 if (type)
15214 {
15215 if ((AGGREGATE_TYPE_P (type)
15216 || TREE_CODE (type) == COMPLEX_TYPE
15217 || FIXED_POINT_TYPE_P (type))
15218 && int_size_in_bytes (type) <= 4)
15219 return true;
15220 }
15221 else
15222 {
15223 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15224 && GET_MODE_SIZE (mode) <= 4)
15225 return true;
15226 }
15227 }
15228
15229 /* Otherwise, use default padding. */
15230 return !BYTES_BIG_ENDIAN;
15231 }
15232
15233 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15234 assuming that the address in the base register is word aligned. */
15235 bool
15236 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15237 {
15238 HOST_WIDE_INT max_offset;
15239
15240 /* Offset must be a multiple of 4 in Thumb mode. */
15241 if (TARGET_THUMB2 && ((offset & 3) != 0))
15242 return false;
15243
15244 if (TARGET_THUMB2)
15245 max_offset = 1020;
15246 else if (TARGET_ARM)
15247 max_offset = 255;
15248 else
15249 return false;
15250
15251 return ((offset <= max_offset) && (offset >= -max_offset));
15252 }
15253
15254 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15255 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15256 Assumes that the address in the base register RN is word aligned. Pattern
15257 guarantees that both memory accesses use the same base register,
15258 the offsets are constants within the range, and the gap between the offsets is 4.
15259 If preload complete then check that registers are legal. WBACK indicates whether
15260 address is updated. LOAD indicates whether memory access is load or store. */
15261 bool
15262 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15263 bool wback, bool load)
15264 {
15265 unsigned int t, t2, n;
15266
15267 if (!reload_completed)
15268 return true;
15269
15270 if (!offset_ok_for_ldrd_strd (offset))
15271 return false;
15272
15273 t = REGNO (rt);
15274 t2 = REGNO (rt2);
15275 n = REGNO (rn);
15276
15277 if ((TARGET_THUMB2)
15278 && ((wback && (n == t || n == t2))
15279 || (t == SP_REGNUM)
15280 || (t == PC_REGNUM)
15281 || (t2 == SP_REGNUM)
15282 || (t2 == PC_REGNUM)
15283 || (!load && (n == PC_REGNUM))
15284 || (load && (t == t2))
15285 /* Triggers Cortex-M3 LDRD errata. */
15286 || (!wback && load && fix_cm3_ldrd && (n == t))))
15287 return false;
15288
15289 if ((TARGET_ARM)
15290 && ((wback && (n == t || n == t2))
15291 || (t2 == PC_REGNUM)
15292 || (t % 2 != 0) /* First destination register is not even. */
15293 || (t2 != t + 1)
15294 /* PC can be used as base register (for offset addressing only),
15295 but it is depricated. */
15296 || (n == PC_REGNUM)))
15297 return false;
15298
15299 return true;
15300 }
15301
15302 /* Return true if a 64-bit access with alignment ALIGN and with a
15303 constant offset OFFSET from the base pointer is permitted on this
15304 architecture. */
15305 static bool
15306 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15307 {
15308 return (unaligned_access
15309 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15310 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15311 }
15312
15313 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15314 operand MEM's address contains an immediate offset from the base
15315 register and has no side effects, in which case it sets BASE,
15316 OFFSET and ALIGN accordingly. */
15317 static bool
15318 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15319 {
15320 rtx addr;
15321
15322 gcc_assert (base != NULL && offset != NULL);
15323
15324 /* TODO: Handle more general memory operand patterns, such as
15325 PRE_DEC and PRE_INC. */
15326
15327 if (side_effects_p (mem))
15328 return false;
15329
15330 /* Can't deal with subregs. */
15331 if (GET_CODE (mem) == SUBREG)
15332 return false;
15333
15334 gcc_assert (MEM_P (mem));
15335
15336 *offset = const0_rtx;
15337 *align = MEM_ALIGN (mem);
15338
15339 addr = XEXP (mem, 0);
15340
15341 /* If addr isn't valid for DImode, then we can't handle it. */
15342 if (!arm_legitimate_address_p (DImode, addr,
15343 reload_in_progress || reload_completed))
15344 return false;
15345
15346 if (REG_P (addr))
15347 {
15348 *base = addr;
15349 return true;
15350 }
15351 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15352 {
15353 *base = XEXP (addr, 0);
15354 *offset = XEXP (addr, 1);
15355 return (REG_P (*base) && CONST_INT_P (*offset));
15356 }
15357
15358 return false;
15359 }
15360
15361 /* Called from a peephole2 to replace two word-size accesses with a
15362 single LDRD/STRD instruction. Returns true iff we can generate a
15363 new instruction sequence. That is, both accesses use the same base
15364 register and the gap between constant offsets is 4. This function
15365 may reorder its operands to match ldrd/strd RTL templates.
15366 OPERANDS are the operands found by the peephole matcher;
15367 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15368 corresponding memory operands. LOAD indicaates whether the access
15369 is load or store. CONST_STORE indicates a store of constant
15370 integer values held in OPERANDS[4,5] and assumes that the pattern
15371 is of length 4 insn, for the purpose of checking dead registers.
15372 COMMUTE indicates that register operands may be reordered. */
15373 bool
15374 gen_operands_ldrd_strd (rtx *operands, bool load,
15375 bool const_store, bool commute)
15376 {
15377 int nops = 2;
15378 HOST_WIDE_INT offsets[2], offset, align[2];
15379 rtx base = NULL_RTX;
15380 rtx cur_base, cur_offset, tmp;
15381 int i, gap;
15382 HARD_REG_SET regset;
15383
15384 gcc_assert (!const_store || !load);
15385 /* Check that the memory references are immediate offsets from the
15386 same base register. Extract the base register, the destination
15387 registers, and the corresponding memory offsets. */
15388 for (i = 0; i < nops; i++)
15389 {
15390 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15391 &align[i]))
15392 return false;
15393
15394 if (i == 0)
15395 base = cur_base;
15396 else if (REGNO (base) != REGNO (cur_base))
15397 return false;
15398
15399 offsets[i] = INTVAL (cur_offset);
15400 if (GET_CODE (operands[i]) == SUBREG)
15401 {
15402 tmp = SUBREG_REG (operands[i]);
15403 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15404 operands[i] = tmp;
15405 }
15406 }
15407
15408 /* Make sure there is no dependency between the individual loads. */
15409 if (load && REGNO (operands[0]) == REGNO (base))
15410 return false; /* RAW */
15411
15412 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15413 return false; /* WAW */
15414
15415 /* If the same input register is used in both stores
15416 when storing different constants, try to find a free register.
15417 For example, the code
15418 mov r0, 0
15419 str r0, [r2]
15420 mov r0, 1
15421 str r0, [r2, #4]
15422 can be transformed into
15423 mov r1, 0
15424 mov r0, 1
15425 strd r1, r0, [r2]
15426 in Thumb mode assuming that r1 is free.
15427 For ARM mode do the same but only if the starting register
15428 can be made to be even. */
15429 if (const_store
15430 && REGNO (operands[0]) == REGNO (operands[1])
15431 && INTVAL (operands[4]) != INTVAL (operands[5]))
15432 {
15433 if (TARGET_THUMB2)
15434 {
15435 CLEAR_HARD_REG_SET (regset);
15436 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15437 if (tmp == NULL_RTX)
15438 return false;
15439
15440 /* Use the new register in the first load to ensure that
15441 if the original input register is not dead after peephole,
15442 then it will have the correct constant value. */
15443 operands[0] = tmp;
15444 }
15445 else if (TARGET_ARM)
15446 {
15447 int regno = REGNO (operands[0]);
15448 if (!peep2_reg_dead_p (4, operands[0]))
15449 {
15450 /* When the input register is even and is not dead after the
15451 pattern, it has to hold the second constant but we cannot
15452 form a legal STRD in ARM mode with this register as the second
15453 register. */
15454 if (regno % 2 == 0)
15455 return false;
15456
15457 /* Is regno-1 free? */
15458 SET_HARD_REG_SET (regset);
15459 CLEAR_HARD_REG_BIT(regset, regno - 1);
15460 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15461 if (tmp == NULL_RTX)
15462 return false;
15463
15464 operands[0] = tmp;
15465 }
15466 else
15467 {
15468 /* Find a DImode register. */
15469 CLEAR_HARD_REG_SET (regset);
15470 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15471 if (tmp != NULL_RTX)
15472 {
15473 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15474 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15475 }
15476 else
15477 {
15478 /* Can we use the input register to form a DI register? */
15479 SET_HARD_REG_SET (regset);
15480 CLEAR_HARD_REG_BIT(regset,
15481 regno % 2 == 0 ? regno + 1 : regno - 1);
15482 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15483 if (tmp == NULL_RTX)
15484 return false;
15485 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15486 }
15487 }
15488
15489 gcc_assert (operands[0] != NULL_RTX);
15490 gcc_assert (operands[1] != NULL_RTX);
15491 gcc_assert (REGNO (operands[0]) % 2 == 0);
15492 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15493 }
15494 }
15495
15496 /* Make sure the instructions are ordered with lower memory access first. */
15497 if (offsets[0] > offsets[1])
15498 {
15499 gap = offsets[0] - offsets[1];
15500 offset = offsets[1];
15501
15502 /* Swap the instructions such that lower memory is accessed first. */
15503 std::swap (operands[0], operands[1]);
15504 std::swap (operands[2], operands[3]);
15505 std::swap (align[0], align[1]);
15506 if (const_store)
15507 std::swap (operands[4], operands[5]);
15508 }
15509 else
15510 {
15511 gap = offsets[1] - offsets[0];
15512 offset = offsets[0];
15513 }
15514
15515 /* Make sure accesses are to consecutive memory locations. */
15516 if (gap != 4)
15517 return false;
15518
15519 if (!align_ok_ldrd_strd (align[0], offset))
15520 return false;
15521
15522 /* Make sure we generate legal instructions. */
15523 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15524 false, load))
15525 return true;
15526
15527 /* In Thumb state, where registers are almost unconstrained, there
15528 is little hope to fix it. */
15529 if (TARGET_THUMB2)
15530 return false;
15531
15532 if (load && commute)
15533 {
15534 /* Try reordering registers. */
15535 std::swap (operands[0], operands[1]);
15536 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15537 false, load))
15538 return true;
15539 }
15540
15541 if (const_store)
15542 {
15543 /* If input registers are dead after this pattern, they can be
15544 reordered or replaced by other registers that are free in the
15545 current pattern. */
15546 if (!peep2_reg_dead_p (4, operands[0])
15547 || !peep2_reg_dead_p (4, operands[1]))
15548 return false;
15549
15550 /* Try to reorder the input registers. */
15551 /* For example, the code
15552 mov r0, 0
15553 mov r1, 1
15554 str r1, [r2]
15555 str r0, [r2, #4]
15556 can be transformed into
15557 mov r1, 0
15558 mov r0, 1
15559 strd r0, [r2]
15560 */
15561 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15562 false, false))
15563 {
15564 std::swap (operands[0], operands[1]);
15565 return true;
15566 }
15567
15568 /* Try to find a free DI register. */
15569 CLEAR_HARD_REG_SET (regset);
15570 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15571 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15572 while (true)
15573 {
15574 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15575 if (tmp == NULL_RTX)
15576 return false;
15577
15578 /* DREG must be an even-numbered register in DImode.
15579 Split it into SI registers. */
15580 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15581 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15582 gcc_assert (operands[0] != NULL_RTX);
15583 gcc_assert (operands[1] != NULL_RTX);
15584 gcc_assert (REGNO (operands[0]) % 2 == 0);
15585 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15586
15587 return (operands_ok_ldrd_strd (operands[0], operands[1],
15588 base, offset,
15589 false, load));
15590 }
15591 }
15592
15593 return false;
15594 }
15595
15596
15597
15598 \f
15599 /* Print a symbolic form of X to the debug file, F. */
15600 static void
15601 arm_print_value (FILE *f, rtx x)
15602 {
15603 switch (GET_CODE (x))
15604 {
15605 case CONST_INT:
15606 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15607 return;
15608
15609 case CONST_DOUBLE:
15610 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15611 return;
15612
15613 case CONST_VECTOR:
15614 {
15615 int i;
15616
15617 fprintf (f, "<");
15618 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15619 {
15620 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15621 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15622 fputc (',', f);
15623 }
15624 fprintf (f, ">");
15625 }
15626 return;
15627
15628 case CONST_STRING:
15629 fprintf (f, "\"%s\"", XSTR (x, 0));
15630 return;
15631
15632 case SYMBOL_REF:
15633 fprintf (f, "`%s'", XSTR (x, 0));
15634 return;
15635
15636 case LABEL_REF:
15637 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15638 return;
15639
15640 case CONST:
15641 arm_print_value (f, XEXP (x, 0));
15642 return;
15643
15644 case PLUS:
15645 arm_print_value (f, XEXP (x, 0));
15646 fprintf (f, "+");
15647 arm_print_value (f, XEXP (x, 1));
15648 return;
15649
15650 case PC:
15651 fprintf (f, "pc");
15652 return;
15653
15654 default:
15655 fprintf (f, "????");
15656 return;
15657 }
15658 }
15659 \f
15660 /* Routines for manipulation of the constant pool. */
15661
15662 /* Arm instructions cannot load a large constant directly into a
15663 register; they have to come from a pc relative load. The constant
15664 must therefore be placed in the addressable range of the pc
15665 relative load. Depending on the precise pc relative load
15666 instruction the range is somewhere between 256 bytes and 4k. This
15667 means that we often have to dump a constant inside a function, and
15668 generate code to branch around it.
15669
15670 It is important to minimize this, since the branches will slow
15671 things down and make the code larger.
15672
15673 Normally we can hide the table after an existing unconditional
15674 branch so that there is no interruption of the flow, but in the
15675 worst case the code looks like this:
15676
15677 ldr rn, L1
15678 ...
15679 b L2
15680 align
15681 L1: .long value
15682 L2:
15683 ...
15684
15685 ldr rn, L3
15686 ...
15687 b L4
15688 align
15689 L3: .long value
15690 L4:
15691 ...
15692
15693 We fix this by performing a scan after scheduling, which notices
15694 which instructions need to have their operands fetched from the
15695 constant table and builds the table.
15696
15697 The algorithm starts by building a table of all the constants that
15698 need fixing up and all the natural barriers in the function (places
15699 where a constant table can be dropped without breaking the flow).
15700 For each fixup we note how far the pc-relative replacement will be
15701 able to reach and the offset of the instruction into the function.
15702
15703 Having built the table we then group the fixes together to form
15704 tables that are as large as possible (subject to addressing
15705 constraints) and emit each table of constants after the last
15706 barrier that is within range of all the instructions in the group.
15707 If a group does not contain a barrier, then we forcibly create one
15708 by inserting a jump instruction into the flow. Once the table has
15709 been inserted, the insns are then modified to reference the
15710 relevant entry in the pool.
15711
15712 Possible enhancements to the algorithm (not implemented) are:
15713
15714 1) For some processors and object formats, there may be benefit in
15715 aligning the pools to the start of cache lines; this alignment
15716 would need to be taken into account when calculating addressability
15717 of a pool. */
15718
15719 /* These typedefs are located at the start of this file, so that
15720 they can be used in the prototypes there. This comment is to
15721 remind readers of that fact so that the following structures
15722 can be understood more easily.
15723
15724 typedef struct minipool_node Mnode;
15725 typedef struct minipool_fixup Mfix; */
15726
15727 struct minipool_node
15728 {
15729 /* Doubly linked chain of entries. */
15730 Mnode * next;
15731 Mnode * prev;
15732 /* The maximum offset into the code that this entry can be placed. While
15733 pushing fixes for forward references, all entries are sorted in order
15734 of increasing max_address. */
15735 HOST_WIDE_INT max_address;
15736 /* Similarly for an entry inserted for a backwards ref. */
15737 HOST_WIDE_INT min_address;
15738 /* The number of fixes referencing this entry. This can become zero
15739 if we "unpush" an entry. In this case we ignore the entry when we
15740 come to emit the code. */
15741 int refcount;
15742 /* The offset from the start of the minipool. */
15743 HOST_WIDE_INT offset;
15744 /* The value in table. */
15745 rtx value;
15746 /* The mode of value. */
15747 machine_mode mode;
15748 /* The size of the value. With iWMMXt enabled
15749 sizes > 4 also imply an alignment of 8-bytes. */
15750 int fix_size;
15751 };
15752
15753 struct minipool_fixup
15754 {
15755 Mfix * next;
15756 rtx_insn * insn;
15757 HOST_WIDE_INT address;
15758 rtx * loc;
15759 machine_mode mode;
15760 int fix_size;
15761 rtx value;
15762 Mnode * minipool;
15763 HOST_WIDE_INT forwards;
15764 HOST_WIDE_INT backwards;
15765 };
15766
15767 /* Fixes less than a word need padding out to a word boundary. */
15768 #define MINIPOOL_FIX_SIZE(mode) \
15769 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15770
15771 static Mnode * minipool_vector_head;
15772 static Mnode * minipool_vector_tail;
15773 static rtx_code_label *minipool_vector_label;
15774 static int minipool_pad;
15775
15776 /* The linked list of all minipool fixes required for this function. */
15777 Mfix * minipool_fix_head;
15778 Mfix * minipool_fix_tail;
15779 /* The fix entry for the current minipool, once it has been placed. */
15780 Mfix * minipool_barrier;
15781
15782 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15783 #define JUMP_TABLES_IN_TEXT_SECTION 0
15784 #endif
15785
15786 static HOST_WIDE_INT
15787 get_jump_table_size (rtx_jump_table_data *insn)
15788 {
15789 /* ADDR_VECs only take room if read-only data does into the text
15790 section. */
15791 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15792 {
15793 rtx body = PATTERN (insn);
15794 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15795 HOST_WIDE_INT size;
15796 HOST_WIDE_INT modesize;
15797
15798 modesize = GET_MODE_SIZE (GET_MODE (body));
15799 size = modesize * XVECLEN (body, elt);
15800 switch (modesize)
15801 {
15802 case 1:
15803 /* Round up size of TBB table to a halfword boundary. */
15804 size = (size + 1) & ~HOST_WIDE_INT_1;
15805 break;
15806 case 2:
15807 /* No padding necessary for TBH. */
15808 break;
15809 case 4:
15810 /* Add two bytes for alignment on Thumb. */
15811 if (TARGET_THUMB)
15812 size += 2;
15813 break;
15814 default:
15815 gcc_unreachable ();
15816 }
15817 return size;
15818 }
15819
15820 return 0;
15821 }
15822
15823 /* Return the maximum amount of padding that will be inserted before
15824 label LABEL. */
15825
15826 static HOST_WIDE_INT
15827 get_label_padding (rtx label)
15828 {
15829 HOST_WIDE_INT align, min_insn_size;
15830
15831 align = 1 << label_to_alignment (label);
15832 min_insn_size = TARGET_THUMB ? 2 : 4;
15833 return align > min_insn_size ? align - min_insn_size : 0;
15834 }
15835
15836 /* Move a minipool fix MP from its current location to before MAX_MP.
15837 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15838 constraints may need updating. */
15839 static Mnode *
15840 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15841 HOST_WIDE_INT max_address)
15842 {
15843 /* The code below assumes these are different. */
15844 gcc_assert (mp != max_mp);
15845
15846 if (max_mp == NULL)
15847 {
15848 if (max_address < mp->max_address)
15849 mp->max_address = max_address;
15850 }
15851 else
15852 {
15853 if (max_address > max_mp->max_address - mp->fix_size)
15854 mp->max_address = max_mp->max_address - mp->fix_size;
15855 else
15856 mp->max_address = max_address;
15857
15858 /* Unlink MP from its current position. Since max_mp is non-null,
15859 mp->prev must be non-null. */
15860 mp->prev->next = mp->next;
15861 if (mp->next != NULL)
15862 mp->next->prev = mp->prev;
15863 else
15864 minipool_vector_tail = mp->prev;
15865
15866 /* Re-insert it before MAX_MP. */
15867 mp->next = max_mp;
15868 mp->prev = max_mp->prev;
15869 max_mp->prev = mp;
15870
15871 if (mp->prev != NULL)
15872 mp->prev->next = mp;
15873 else
15874 minipool_vector_head = mp;
15875 }
15876
15877 /* Save the new entry. */
15878 max_mp = mp;
15879
15880 /* Scan over the preceding entries and adjust their addresses as
15881 required. */
15882 while (mp->prev != NULL
15883 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15884 {
15885 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15886 mp = mp->prev;
15887 }
15888
15889 return max_mp;
15890 }
15891
15892 /* Add a constant to the minipool for a forward reference. Returns the
15893 node added or NULL if the constant will not fit in this pool. */
15894 static Mnode *
15895 add_minipool_forward_ref (Mfix *fix)
15896 {
15897 /* If set, max_mp is the first pool_entry that has a lower
15898 constraint than the one we are trying to add. */
15899 Mnode * max_mp = NULL;
15900 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15901 Mnode * mp;
15902
15903 /* If the minipool starts before the end of FIX->INSN then this FIX
15904 can not be placed into the current pool. Furthermore, adding the
15905 new constant pool entry may cause the pool to start FIX_SIZE bytes
15906 earlier. */
15907 if (minipool_vector_head &&
15908 (fix->address + get_attr_length (fix->insn)
15909 >= minipool_vector_head->max_address - fix->fix_size))
15910 return NULL;
15911
15912 /* Scan the pool to see if a constant with the same value has
15913 already been added. While we are doing this, also note the
15914 location where we must insert the constant if it doesn't already
15915 exist. */
15916 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15917 {
15918 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15919 && fix->mode == mp->mode
15920 && (!LABEL_P (fix->value)
15921 || (CODE_LABEL_NUMBER (fix->value)
15922 == CODE_LABEL_NUMBER (mp->value)))
15923 && rtx_equal_p (fix->value, mp->value))
15924 {
15925 /* More than one fix references this entry. */
15926 mp->refcount++;
15927 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15928 }
15929
15930 /* Note the insertion point if necessary. */
15931 if (max_mp == NULL
15932 && mp->max_address > max_address)
15933 max_mp = mp;
15934
15935 /* If we are inserting an 8-bytes aligned quantity and
15936 we have not already found an insertion point, then
15937 make sure that all such 8-byte aligned quantities are
15938 placed at the start of the pool. */
15939 if (ARM_DOUBLEWORD_ALIGN
15940 && max_mp == NULL
15941 && fix->fix_size >= 8
15942 && mp->fix_size < 8)
15943 {
15944 max_mp = mp;
15945 max_address = mp->max_address;
15946 }
15947 }
15948
15949 /* The value is not currently in the minipool, so we need to create
15950 a new entry for it. If MAX_MP is NULL, the entry will be put on
15951 the end of the list since the placement is less constrained than
15952 any existing entry. Otherwise, we insert the new fix before
15953 MAX_MP and, if necessary, adjust the constraints on the other
15954 entries. */
15955 mp = XNEW (Mnode);
15956 mp->fix_size = fix->fix_size;
15957 mp->mode = fix->mode;
15958 mp->value = fix->value;
15959 mp->refcount = 1;
15960 /* Not yet required for a backwards ref. */
15961 mp->min_address = -65536;
15962
15963 if (max_mp == NULL)
15964 {
15965 mp->max_address = max_address;
15966 mp->next = NULL;
15967 mp->prev = minipool_vector_tail;
15968
15969 if (mp->prev == NULL)
15970 {
15971 minipool_vector_head = mp;
15972 minipool_vector_label = gen_label_rtx ();
15973 }
15974 else
15975 mp->prev->next = mp;
15976
15977 minipool_vector_tail = mp;
15978 }
15979 else
15980 {
15981 if (max_address > max_mp->max_address - mp->fix_size)
15982 mp->max_address = max_mp->max_address - mp->fix_size;
15983 else
15984 mp->max_address = max_address;
15985
15986 mp->next = max_mp;
15987 mp->prev = max_mp->prev;
15988 max_mp->prev = mp;
15989 if (mp->prev != NULL)
15990 mp->prev->next = mp;
15991 else
15992 minipool_vector_head = mp;
15993 }
15994
15995 /* Save the new entry. */
15996 max_mp = mp;
15997
15998 /* Scan over the preceding entries and adjust their addresses as
15999 required. */
16000 while (mp->prev != NULL
16001 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16002 {
16003 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16004 mp = mp->prev;
16005 }
16006
16007 return max_mp;
16008 }
16009
16010 static Mnode *
16011 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16012 HOST_WIDE_INT min_address)
16013 {
16014 HOST_WIDE_INT offset;
16015
16016 /* The code below assumes these are different. */
16017 gcc_assert (mp != min_mp);
16018
16019 if (min_mp == NULL)
16020 {
16021 if (min_address > mp->min_address)
16022 mp->min_address = min_address;
16023 }
16024 else
16025 {
16026 /* We will adjust this below if it is too loose. */
16027 mp->min_address = min_address;
16028
16029 /* Unlink MP from its current position. Since min_mp is non-null,
16030 mp->next must be non-null. */
16031 mp->next->prev = mp->prev;
16032 if (mp->prev != NULL)
16033 mp->prev->next = mp->next;
16034 else
16035 minipool_vector_head = mp->next;
16036
16037 /* Reinsert it after MIN_MP. */
16038 mp->prev = min_mp;
16039 mp->next = min_mp->next;
16040 min_mp->next = mp;
16041 if (mp->next != NULL)
16042 mp->next->prev = mp;
16043 else
16044 minipool_vector_tail = mp;
16045 }
16046
16047 min_mp = mp;
16048
16049 offset = 0;
16050 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16051 {
16052 mp->offset = offset;
16053 if (mp->refcount > 0)
16054 offset += mp->fix_size;
16055
16056 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16057 mp->next->min_address = mp->min_address + mp->fix_size;
16058 }
16059
16060 return min_mp;
16061 }
16062
16063 /* Add a constant to the minipool for a backward reference. Returns the
16064 node added or NULL if the constant will not fit in this pool.
16065
16066 Note that the code for insertion for a backwards reference can be
16067 somewhat confusing because the calculated offsets for each fix do
16068 not take into account the size of the pool (which is still under
16069 construction. */
16070 static Mnode *
16071 add_minipool_backward_ref (Mfix *fix)
16072 {
16073 /* If set, min_mp is the last pool_entry that has a lower constraint
16074 than the one we are trying to add. */
16075 Mnode *min_mp = NULL;
16076 /* This can be negative, since it is only a constraint. */
16077 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16078 Mnode *mp;
16079
16080 /* If we can't reach the current pool from this insn, or if we can't
16081 insert this entry at the end of the pool without pushing other
16082 fixes out of range, then we don't try. This ensures that we
16083 can't fail later on. */
16084 if (min_address >= minipool_barrier->address
16085 || (minipool_vector_tail->min_address + fix->fix_size
16086 >= minipool_barrier->address))
16087 return NULL;
16088
16089 /* Scan the pool to see if a constant with the same value has
16090 already been added. While we are doing this, also note the
16091 location where we must insert the constant if it doesn't already
16092 exist. */
16093 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16094 {
16095 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16096 && fix->mode == mp->mode
16097 && (!LABEL_P (fix->value)
16098 || (CODE_LABEL_NUMBER (fix->value)
16099 == CODE_LABEL_NUMBER (mp->value)))
16100 && rtx_equal_p (fix->value, mp->value)
16101 /* Check that there is enough slack to move this entry to the
16102 end of the table (this is conservative). */
16103 && (mp->max_address
16104 > (minipool_barrier->address
16105 + minipool_vector_tail->offset
16106 + minipool_vector_tail->fix_size)))
16107 {
16108 mp->refcount++;
16109 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16110 }
16111
16112 if (min_mp != NULL)
16113 mp->min_address += fix->fix_size;
16114 else
16115 {
16116 /* Note the insertion point if necessary. */
16117 if (mp->min_address < min_address)
16118 {
16119 /* For now, we do not allow the insertion of 8-byte alignment
16120 requiring nodes anywhere but at the start of the pool. */
16121 if (ARM_DOUBLEWORD_ALIGN
16122 && fix->fix_size >= 8 && mp->fix_size < 8)
16123 return NULL;
16124 else
16125 min_mp = mp;
16126 }
16127 else if (mp->max_address
16128 < minipool_barrier->address + mp->offset + fix->fix_size)
16129 {
16130 /* Inserting before this entry would push the fix beyond
16131 its maximum address (which can happen if we have
16132 re-located a forwards fix); force the new fix to come
16133 after it. */
16134 if (ARM_DOUBLEWORD_ALIGN
16135 && fix->fix_size >= 8 && mp->fix_size < 8)
16136 return NULL;
16137 else
16138 {
16139 min_mp = mp;
16140 min_address = mp->min_address + fix->fix_size;
16141 }
16142 }
16143 /* Do not insert a non-8-byte aligned quantity before 8-byte
16144 aligned quantities. */
16145 else if (ARM_DOUBLEWORD_ALIGN
16146 && fix->fix_size < 8
16147 && mp->fix_size >= 8)
16148 {
16149 min_mp = mp;
16150 min_address = mp->min_address + fix->fix_size;
16151 }
16152 }
16153 }
16154
16155 /* We need to create a new entry. */
16156 mp = XNEW (Mnode);
16157 mp->fix_size = fix->fix_size;
16158 mp->mode = fix->mode;
16159 mp->value = fix->value;
16160 mp->refcount = 1;
16161 mp->max_address = minipool_barrier->address + 65536;
16162
16163 mp->min_address = min_address;
16164
16165 if (min_mp == NULL)
16166 {
16167 mp->prev = NULL;
16168 mp->next = minipool_vector_head;
16169
16170 if (mp->next == NULL)
16171 {
16172 minipool_vector_tail = mp;
16173 minipool_vector_label = gen_label_rtx ();
16174 }
16175 else
16176 mp->next->prev = mp;
16177
16178 minipool_vector_head = mp;
16179 }
16180 else
16181 {
16182 mp->next = min_mp->next;
16183 mp->prev = min_mp;
16184 min_mp->next = mp;
16185
16186 if (mp->next != NULL)
16187 mp->next->prev = mp;
16188 else
16189 minipool_vector_tail = mp;
16190 }
16191
16192 /* Save the new entry. */
16193 min_mp = mp;
16194
16195 if (mp->prev)
16196 mp = mp->prev;
16197 else
16198 mp->offset = 0;
16199
16200 /* Scan over the following entries and adjust their offsets. */
16201 while (mp->next != NULL)
16202 {
16203 if (mp->next->min_address < mp->min_address + mp->fix_size)
16204 mp->next->min_address = mp->min_address + mp->fix_size;
16205
16206 if (mp->refcount)
16207 mp->next->offset = mp->offset + mp->fix_size;
16208 else
16209 mp->next->offset = mp->offset;
16210
16211 mp = mp->next;
16212 }
16213
16214 return min_mp;
16215 }
16216
16217 static void
16218 assign_minipool_offsets (Mfix *barrier)
16219 {
16220 HOST_WIDE_INT offset = 0;
16221 Mnode *mp;
16222
16223 minipool_barrier = barrier;
16224
16225 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16226 {
16227 mp->offset = offset;
16228
16229 if (mp->refcount > 0)
16230 offset += mp->fix_size;
16231 }
16232 }
16233
16234 /* Output the literal table */
16235 static void
16236 dump_minipool (rtx_insn *scan)
16237 {
16238 Mnode * mp;
16239 Mnode * nmp;
16240 int align64 = 0;
16241
16242 if (ARM_DOUBLEWORD_ALIGN)
16243 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16244 if (mp->refcount > 0 && mp->fix_size >= 8)
16245 {
16246 align64 = 1;
16247 break;
16248 }
16249
16250 if (dump_file)
16251 fprintf (dump_file,
16252 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16253 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16254
16255 scan = emit_label_after (gen_label_rtx (), scan);
16256 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16257 scan = emit_label_after (minipool_vector_label, scan);
16258
16259 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16260 {
16261 if (mp->refcount > 0)
16262 {
16263 if (dump_file)
16264 {
16265 fprintf (dump_file,
16266 ";; Offset %u, min %ld, max %ld ",
16267 (unsigned) mp->offset, (unsigned long) mp->min_address,
16268 (unsigned long) mp->max_address);
16269 arm_print_value (dump_file, mp->value);
16270 fputc ('\n', dump_file);
16271 }
16272
16273 rtx val = copy_rtx (mp->value);
16274
16275 switch (GET_MODE_SIZE (mp->mode))
16276 {
16277 #ifdef HAVE_consttable_1
16278 case 1:
16279 scan = emit_insn_after (gen_consttable_1 (val), scan);
16280 break;
16281
16282 #endif
16283 #ifdef HAVE_consttable_2
16284 case 2:
16285 scan = emit_insn_after (gen_consttable_2 (val), scan);
16286 break;
16287
16288 #endif
16289 #ifdef HAVE_consttable_4
16290 case 4:
16291 scan = emit_insn_after (gen_consttable_4 (val), scan);
16292 break;
16293
16294 #endif
16295 #ifdef HAVE_consttable_8
16296 case 8:
16297 scan = emit_insn_after (gen_consttable_8 (val), scan);
16298 break;
16299
16300 #endif
16301 #ifdef HAVE_consttable_16
16302 case 16:
16303 scan = emit_insn_after (gen_consttable_16 (val), scan);
16304 break;
16305
16306 #endif
16307 default:
16308 gcc_unreachable ();
16309 }
16310 }
16311
16312 nmp = mp->next;
16313 free (mp);
16314 }
16315
16316 minipool_vector_head = minipool_vector_tail = NULL;
16317 scan = emit_insn_after (gen_consttable_end (), scan);
16318 scan = emit_barrier_after (scan);
16319 }
16320
16321 /* Return the cost of forcibly inserting a barrier after INSN. */
16322 static int
16323 arm_barrier_cost (rtx_insn *insn)
16324 {
16325 /* Basing the location of the pool on the loop depth is preferable,
16326 but at the moment, the basic block information seems to be
16327 corrupt by this stage of the compilation. */
16328 int base_cost = 50;
16329 rtx_insn *next = next_nonnote_insn (insn);
16330
16331 if (next != NULL && LABEL_P (next))
16332 base_cost -= 20;
16333
16334 switch (GET_CODE (insn))
16335 {
16336 case CODE_LABEL:
16337 /* It will always be better to place the table before the label, rather
16338 than after it. */
16339 return 50;
16340
16341 case INSN:
16342 case CALL_INSN:
16343 return base_cost;
16344
16345 case JUMP_INSN:
16346 return base_cost - 10;
16347
16348 default:
16349 return base_cost + 10;
16350 }
16351 }
16352
16353 /* Find the best place in the insn stream in the range
16354 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16355 Create the barrier by inserting a jump and add a new fix entry for
16356 it. */
16357 static Mfix *
16358 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16359 {
16360 HOST_WIDE_INT count = 0;
16361 rtx_barrier *barrier;
16362 rtx_insn *from = fix->insn;
16363 /* The instruction after which we will insert the jump. */
16364 rtx_insn *selected = NULL;
16365 int selected_cost;
16366 /* The address at which the jump instruction will be placed. */
16367 HOST_WIDE_INT selected_address;
16368 Mfix * new_fix;
16369 HOST_WIDE_INT max_count = max_address - fix->address;
16370 rtx_code_label *label = gen_label_rtx ();
16371
16372 selected_cost = arm_barrier_cost (from);
16373 selected_address = fix->address;
16374
16375 while (from && count < max_count)
16376 {
16377 rtx_jump_table_data *tmp;
16378 int new_cost;
16379
16380 /* This code shouldn't have been called if there was a natural barrier
16381 within range. */
16382 gcc_assert (!BARRIER_P (from));
16383
16384 /* Count the length of this insn. This must stay in sync with the
16385 code that pushes minipool fixes. */
16386 if (LABEL_P (from))
16387 count += get_label_padding (from);
16388 else
16389 count += get_attr_length (from);
16390
16391 /* If there is a jump table, add its length. */
16392 if (tablejump_p (from, NULL, &tmp))
16393 {
16394 count += get_jump_table_size (tmp);
16395
16396 /* Jump tables aren't in a basic block, so base the cost on
16397 the dispatch insn. If we select this location, we will
16398 still put the pool after the table. */
16399 new_cost = arm_barrier_cost (from);
16400
16401 if (count < max_count
16402 && (!selected || new_cost <= selected_cost))
16403 {
16404 selected = tmp;
16405 selected_cost = new_cost;
16406 selected_address = fix->address + count;
16407 }
16408
16409 /* Continue after the dispatch table. */
16410 from = NEXT_INSN (tmp);
16411 continue;
16412 }
16413
16414 new_cost = arm_barrier_cost (from);
16415
16416 if (count < max_count
16417 && (!selected || new_cost <= selected_cost))
16418 {
16419 selected = from;
16420 selected_cost = new_cost;
16421 selected_address = fix->address + count;
16422 }
16423
16424 from = NEXT_INSN (from);
16425 }
16426
16427 /* Make sure that we found a place to insert the jump. */
16428 gcc_assert (selected);
16429
16430 /* Make sure we do not split a call and its corresponding
16431 CALL_ARG_LOCATION note. */
16432 if (CALL_P (selected))
16433 {
16434 rtx_insn *next = NEXT_INSN (selected);
16435 if (next && NOTE_P (next)
16436 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16437 selected = next;
16438 }
16439
16440 /* Create a new JUMP_INSN that branches around a barrier. */
16441 from = emit_jump_insn_after (gen_jump (label), selected);
16442 JUMP_LABEL (from) = label;
16443 barrier = emit_barrier_after (from);
16444 emit_label_after (label, barrier);
16445
16446 /* Create a minipool barrier entry for the new barrier. */
16447 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16448 new_fix->insn = barrier;
16449 new_fix->address = selected_address;
16450 new_fix->next = fix->next;
16451 fix->next = new_fix;
16452
16453 return new_fix;
16454 }
16455
16456 /* Record that there is a natural barrier in the insn stream at
16457 ADDRESS. */
16458 static void
16459 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16460 {
16461 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16462
16463 fix->insn = insn;
16464 fix->address = address;
16465
16466 fix->next = NULL;
16467 if (minipool_fix_head != NULL)
16468 minipool_fix_tail->next = fix;
16469 else
16470 minipool_fix_head = fix;
16471
16472 minipool_fix_tail = fix;
16473 }
16474
16475 /* Record INSN, which will need fixing up to load a value from the
16476 minipool. ADDRESS is the offset of the insn since the start of the
16477 function; LOC is a pointer to the part of the insn which requires
16478 fixing; VALUE is the constant that must be loaded, which is of type
16479 MODE. */
16480 static void
16481 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16482 machine_mode mode, rtx value)
16483 {
16484 gcc_assert (!arm_disable_literal_pool);
16485 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16486
16487 fix->insn = insn;
16488 fix->address = address;
16489 fix->loc = loc;
16490 fix->mode = mode;
16491 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16492 fix->value = value;
16493 fix->forwards = get_attr_pool_range (insn);
16494 fix->backwards = get_attr_neg_pool_range (insn);
16495 fix->minipool = NULL;
16496
16497 /* If an insn doesn't have a range defined for it, then it isn't
16498 expecting to be reworked by this code. Better to stop now than
16499 to generate duff assembly code. */
16500 gcc_assert (fix->forwards || fix->backwards);
16501
16502 /* If an entry requires 8-byte alignment then assume all constant pools
16503 require 4 bytes of padding. Trying to do this later on a per-pool
16504 basis is awkward because existing pool entries have to be modified. */
16505 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16506 minipool_pad = 4;
16507
16508 if (dump_file)
16509 {
16510 fprintf (dump_file,
16511 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16512 GET_MODE_NAME (mode),
16513 INSN_UID (insn), (unsigned long) address,
16514 -1 * (long)fix->backwards, (long)fix->forwards);
16515 arm_print_value (dump_file, fix->value);
16516 fprintf (dump_file, "\n");
16517 }
16518
16519 /* Add it to the chain of fixes. */
16520 fix->next = NULL;
16521
16522 if (minipool_fix_head != NULL)
16523 minipool_fix_tail->next = fix;
16524 else
16525 minipool_fix_head = fix;
16526
16527 minipool_fix_tail = fix;
16528 }
16529
16530 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16531 Returns the number of insns needed, or 99 if we always want to synthesize
16532 the value. */
16533 int
16534 arm_max_const_double_inline_cost ()
16535 {
16536 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16537 }
16538
16539 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16540 Returns the number of insns needed, or 99 if we don't know how to
16541 do it. */
16542 int
16543 arm_const_double_inline_cost (rtx val)
16544 {
16545 rtx lowpart, highpart;
16546 machine_mode mode;
16547
16548 mode = GET_MODE (val);
16549
16550 if (mode == VOIDmode)
16551 mode = DImode;
16552
16553 gcc_assert (GET_MODE_SIZE (mode) == 8);
16554
16555 lowpart = gen_lowpart (SImode, val);
16556 highpart = gen_highpart_mode (SImode, mode, val);
16557
16558 gcc_assert (CONST_INT_P (lowpart));
16559 gcc_assert (CONST_INT_P (highpart));
16560
16561 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16562 NULL_RTX, NULL_RTX, 0, 0)
16563 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16564 NULL_RTX, NULL_RTX, 0, 0));
16565 }
16566
16567 /* Cost of loading a SImode constant. */
16568 static inline int
16569 arm_const_inline_cost (enum rtx_code code, rtx val)
16570 {
16571 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16572 NULL_RTX, NULL_RTX, 1, 0);
16573 }
16574
16575 /* Return true if it is worthwhile to split a 64-bit constant into two
16576 32-bit operations. This is the case if optimizing for size, or
16577 if we have load delay slots, or if one 32-bit part can be done with
16578 a single data operation. */
16579 bool
16580 arm_const_double_by_parts (rtx val)
16581 {
16582 machine_mode mode = GET_MODE (val);
16583 rtx part;
16584
16585 if (optimize_size || arm_ld_sched)
16586 return true;
16587
16588 if (mode == VOIDmode)
16589 mode = DImode;
16590
16591 part = gen_highpart_mode (SImode, mode, val);
16592
16593 gcc_assert (CONST_INT_P (part));
16594
16595 if (const_ok_for_arm (INTVAL (part))
16596 || const_ok_for_arm (~INTVAL (part)))
16597 return true;
16598
16599 part = gen_lowpart (SImode, val);
16600
16601 gcc_assert (CONST_INT_P (part));
16602
16603 if (const_ok_for_arm (INTVAL (part))
16604 || const_ok_for_arm (~INTVAL (part)))
16605 return true;
16606
16607 return false;
16608 }
16609
16610 /* Return true if it is possible to inline both the high and low parts
16611 of a 64-bit constant into 32-bit data processing instructions. */
16612 bool
16613 arm_const_double_by_immediates (rtx val)
16614 {
16615 machine_mode mode = GET_MODE (val);
16616 rtx part;
16617
16618 if (mode == VOIDmode)
16619 mode = DImode;
16620
16621 part = gen_highpart_mode (SImode, mode, val);
16622
16623 gcc_assert (CONST_INT_P (part));
16624
16625 if (!const_ok_for_arm (INTVAL (part)))
16626 return false;
16627
16628 part = gen_lowpart (SImode, val);
16629
16630 gcc_assert (CONST_INT_P (part));
16631
16632 if (!const_ok_for_arm (INTVAL (part)))
16633 return false;
16634
16635 return true;
16636 }
16637
16638 /* Scan INSN and note any of its operands that need fixing.
16639 If DO_PUSHES is false we do not actually push any of the fixups
16640 needed. */
16641 static void
16642 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16643 {
16644 int opno;
16645
16646 extract_constrain_insn (insn);
16647
16648 if (recog_data.n_alternatives == 0)
16649 return;
16650
16651 /* Fill in recog_op_alt with information about the constraints of
16652 this insn. */
16653 preprocess_constraints (insn);
16654
16655 const operand_alternative *op_alt = which_op_alt ();
16656 for (opno = 0; opno < recog_data.n_operands; opno++)
16657 {
16658 /* Things we need to fix can only occur in inputs. */
16659 if (recog_data.operand_type[opno] != OP_IN)
16660 continue;
16661
16662 /* If this alternative is a memory reference, then any mention
16663 of constants in this alternative is really to fool reload
16664 into allowing us to accept one there. We need to fix them up
16665 now so that we output the right code. */
16666 if (op_alt[opno].memory_ok)
16667 {
16668 rtx op = recog_data.operand[opno];
16669
16670 if (CONSTANT_P (op))
16671 {
16672 if (do_pushes)
16673 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16674 recog_data.operand_mode[opno], op);
16675 }
16676 else if (MEM_P (op)
16677 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16678 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16679 {
16680 if (do_pushes)
16681 {
16682 rtx cop = avoid_constant_pool_reference (op);
16683
16684 /* Casting the address of something to a mode narrower
16685 than a word can cause avoid_constant_pool_reference()
16686 to return the pool reference itself. That's no good to
16687 us here. Lets just hope that we can use the
16688 constant pool value directly. */
16689 if (op == cop)
16690 cop = get_pool_constant (XEXP (op, 0));
16691
16692 push_minipool_fix (insn, address,
16693 recog_data.operand_loc[opno],
16694 recog_data.operand_mode[opno], cop);
16695 }
16696
16697 }
16698 }
16699 }
16700
16701 return;
16702 }
16703
16704 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16705 and unions in the context of ARMv8-M Security Extensions. It is used as a
16706 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16707 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16708 or four masks, depending on whether it is being computed for a
16709 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16710 respectively. The tree for the type of the argument or a field within an
16711 argument is passed in ARG_TYPE, the current register this argument or field
16712 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16713 argument or field starts at is passed in STARTING_BIT and the last used bit
16714 is kept in LAST_USED_BIT which is also updated accordingly. */
16715
16716 static unsigned HOST_WIDE_INT
16717 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16718 uint32_t * padding_bits_to_clear,
16719 unsigned starting_bit, int * last_used_bit)
16720
16721 {
16722 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16723
16724 if (TREE_CODE (arg_type) == RECORD_TYPE)
16725 {
16726 unsigned current_bit = starting_bit;
16727 tree field;
16728 long int offset, size;
16729
16730
16731 field = TYPE_FIELDS (arg_type);
16732 while (field)
16733 {
16734 /* The offset within a structure is always an offset from
16735 the start of that structure. Make sure we take that into the
16736 calculation of the register based offset that we use here. */
16737 offset = starting_bit;
16738 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16739 offset %= 32;
16740
16741 /* This is the actual size of the field, for bitfields this is the
16742 bitfield width and not the container size. */
16743 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16744
16745 if (*last_used_bit != offset)
16746 {
16747 if (offset < *last_used_bit)
16748 {
16749 /* This field's offset is before the 'last_used_bit', that
16750 means this field goes on the next register. So we need to
16751 pad the rest of the current register and increase the
16752 register number. */
16753 uint32_t mask;
16754 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16755 mask++;
16756
16757 padding_bits_to_clear[*regno] |= mask;
16758 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16759 (*regno)++;
16760 }
16761 else
16762 {
16763 /* Otherwise we pad the bits between the last field's end and
16764 the start of the new field. */
16765 uint32_t mask;
16766
16767 mask = ((uint32_t)-1) >> (32 - offset);
16768 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16769 padding_bits_to_clear[*regno] |= mask;
16770 }
16771 current_bit = offset;
16772 }
16773
16774 /* Calculate further padding bits for inner structs/unions too. */
16775 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16776 {
16777 *last_used_bit = current_bit;
16778 not_to_clear_reg_mask
16779 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16780 padding_bits_to_clear, offset,
16781 last_used_bit);
16782 }
16783 else
16784 {
16785 /* Update 'current_bit' with this field's size. If the
16786 'current_bit' lies in a subsequent register, update 'regno' and
16787 reset 'current_bit' to point to the current bit in that new
16788 register. */
16789 current_bit += size;
16790 while (current_bit >= 32)
16791 {
16792 current_bit-=32;
16793 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16794 (*regno)++;
16795 }
16796 *last_used_bit = current_bit;
16797 }
16798
16799 field = TREE_CHAIN (field);
16800 }
16801 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16802 }
16803 else if (TREE_CODE (arg_type) == UNION_TYPE)
16804 {
16805 tree field, field_t;
16806 int i, regno_t, field_size;
16807 int max_reg = -1;
16808 int max_bit = -1;
16809 uint32_t mask;
16810 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16811 = {-1, -1, -1, -1};
16812
16813 /* To compute the padding bits in a union we only consider bits as
16814 padding bits if they are always either a padding bit or fall outside a
16815 fields size for all fields in the union. */
16816 field = TYPE_FIELDS (arg_type);
16817 while (field)
16818 {
16819 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16820 = {0U, 0U, 0U, 0U};
16821 int last_used_bit_t = *last_used_bit;
16822 regno_t = *regno;
16823 field_t = TREE_TYPE (field);
16824
16825 /* If the field's type is either a record or a union make sure to
16826 compute their padding bits too. */
16827 if (RECORD_OR_UNION_TYPE_P (field_t))
16828 not_to_clear_reg_mask
16829 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16830 &padding_bits_to_clear_t[0],
16831 starting_bit, &last_used_bit_t);
16832 else
16833 {
16834 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16835 regno_t = (field_size / 32) + *regno;
16836 last_used_bit_t = (starting_bit + field_size) % 32;
16837 }
16838
16839 for (i = *regno; i < regno_t; i++)
16840 {
16841 /* For all but the last register used by this field only keep the
16842 padding bits that were padding bits in this field. */
16843 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16844 }
16845
16846 /* For the last register, keep all padding bits that were padding
16847 bits in this field and any padding bits that are still valid
16848 as padding bits but fall outside of this field's size. */
16849 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16850 padding_bits_to_clear_res[regno_t]
16851 &= padding_bits_to_clear_t[regno_t] | mask;
16852
16853 /* Update the maximum size of the fields in terms of registers used
16854 ('max_reg') and the 'last_used_bit' in said register. */
16855 if (max_reg < regno_t)
16856 {
16857 max_reg = regno_t;
16858 max_bit = last_used_bit_t;
16859 }
16860 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16861 max_bit = last_used_bit_t;
16862
16863 field = TREE_CHAIN (field);
16864 }
16865
16866 /* Update the current padding_bits_to_clear using the intersection of the
16867 padding bits of all the fields. */
16868 for (i=*regno; i < max_reg; i++)
16869 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16870
16871 /* Do not keep trailing padding bits, we do not know yet whether this
16872 is the end of the argument. */
16873 mask = ((uint32_t) 1 << max_bit) - 1;
16874 padding_bits_to_clear[max_reg]
16875 |= padding_bits_to_clear_res[max_reg] & mask;
16876
16877 *regno = max_reg;
16878 *last_used_bit = max_bit;
16879 }
16880 else
16881 /* This function should only be used for structs and unions. */
16882 gcc_unreachable ();
16883
16884 return not_to_clear_reg_mask;
16885 }
16886
16887 /* In the context of ARMv8-M Security Extensions, this function is used for both
16888 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16889 registers are used when returning or passing arguments, which is then
16890 returned as a mask. It will also compute a mask to indicate padding/unused
16891 bits for each of these registers, and passes this through the
16892 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16893 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16894 the starting register used to pass this argument or return value is passed
16895 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16896 for struct and union types. */
16897
16898 static unsigned HOST_WIDE_INT
16899 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16900 uint32_t * padding_bits_to_clear)
16901
16902 {
16903 int last_used_bit = 0;
16904 unsigned HOST_WIDE_INT not_to_clear_mask;
16905
16906 if (RECORD_OR_UNION_TYPE_P (arg_type))
16907 {
16908 not_to_clear_mask
16909 = comp_not_to_clear_mask_str_un (arg_type, &regno,
16910 padding_bits_to_clear, 0,
16911 &last_used_bit);
16912
16913
16914 /* If the 'last_used_bit' is not zero, that means we are still using a
16915 part of the last 'regno'. In such cases we must clear the trailing
16916 bits. Otherwise we are not using regno and we should mark it as to
16917 clear. */
16918 if (last_used_bit != 0)
16919 padding_bits_to_clear[regno]
16920 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16921 else
16922 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16923 }
16924 else
16925 {
16926 not_to_clear_mask = 0;
16927 /* We are not dealing with structs nor unions. So these arguments may be
16928 passed in floating point registers too. In some cases a BLKmode is
16929 used when returning or passing arguments in multiple VFP registers. */
16930 if (GET_MODE (arg_rtx) == BLKmode)
16931 {
16932 int i, arg_regs;
16933 rtx reg;
16934
16935 /* This should really only occur when dealing with the hard-float
16936 ABI. */
16937 gcc_assert (TARGET_HARD_FLOAT_ABI);
16938
16939 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16940 {
16941 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16942 gcc_assert (REG_P (reg));
16943
16944 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16945
16946 /* If we are dealing with DF mode, make sure we don't
16947 clear either of the registers it addresses. */
16948 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16949 if (arg_regs > 1)
16950 {
16951 unsigned HOST_WIDE_INT mask;
16952 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16953 mask -= HOST_WIDE_INT_1U << REGNO (reg);
16954 not_to_clear_mask |= mask;
16955 }
16956 }
16957 }
16958 else
16959 {
16960 /* Otherwise we can rely on the MODE to determine how many registers
16961 are being used by this argument. */
16962 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16963 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16964 if (arg_regs > 1)
16965 {
16966 unsigned HOST_WIDE_INT
16967 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16968 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16969 not_to_clear_mask |= mask;
16970 }
16971 }
16972 }
16973
16974 return not_to_clear_mask;
16975 }
16976
16977 /* Clears caller saved registers not used to pass arguments before a
16978 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
16979 registers is done in __gnu_cmse_nonsecure_call libcall.
16980 See libgcc/config/arm/cmse_nonsecure_call.S. */
16981
16982 static void
16983 cmse_nonsecure_call_clear_caller_saved (void)
16984 {
16985 basic_block bb;
16986
16987 FOR_EACH_BB_FN (bb, cfun)
16988 {
16989 rtx_insn *insn;
16990
16991 FOR_BB_INSNS (bb, insn)
16992 {
16993 uint64_t to_clear_mask, float_mask;
16994 rtx_insn *seq;
16995 rtx pat, call, unspec, reg, cleared_reg, tmp;
16996 unsigned int regno, maxregno;
16997 rtx address;
16998 CUMULATIVE_ARGS args_so_far_v;
16999 cumulative_args_t args_so_far;
17000 tree arg_type, fntype;
17001 bool using_r4, first_param = true;
17002 function_args_iterator args_iter;
17003 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17004 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
17005
17006 if (!NONDEBUG_INSN_P (insn))
17007 continue;
17008
17009 if (!CALL_P (insn))
17010 continue;
17011
17012 pat = PATTERN (insn);
17013 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17014 call = XVECEXP (pat, 0, 0);
17015
17016 /* Get the real call RTX if the insn sets a value, ie. returns. */
17017 if (GET_CODE (call) == SET)
17018 call = SET_SRC (call);
17019
17020 /* Check if it is a cmse_nonsecure_call. */
17021 unspec = XEXP (call, 0);
17022 if (GET_CODE (unspec) != UNSPEC
17023 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17024 continue;
17025
17026 /* Determine the caller-saved registers we need to clear. */
17027 to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
17028 maxregno = NUM_ARG_REGS - 1;
17029 /* Only look at the caller-saved floating point registers in case of
17030 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17031 lazy store and loads which clear both caller- and callee-saved
17032 registers. */
17033 if (TARGET_HARD_FLOAT_ABI)
17034 {
17035 float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
17036 float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
17037 to_clear_mask |= float_mask;
17038 maxregno = D7_VFP_REGNUM;
17039 }
17040
17041 /* Make sure the register used to hold the function address is not
17042 cleared. */
17043 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17044 gcc_assert (MEM_P (address));
17045 gcc_assert (REG_P (XEXP (address, 0)));
17046 to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
17047
17048 /* Set basic block of call insn so that df rescan is performed on
17049 insns inserted here. */
17050 set_block_for_insn (insn, bb);
17051 df_set_flags (DF_DEFER_INSN_RESCAN);
17052 start_sequence ();
17053
17054 /* Make sure the scheduler doesn't schedule other insns beyond
17055 here. */
17056 emit_insn (gen_blockage ());
17057
17058 /* Walk through all arguments and clear registers appropriately.
17059 */
17060 fntype = TREE_TYPE (MEM_EXPR (address));
17061 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17062 NULL_TREE);
17063 args_so_far = pack_cumulative_args (&args_so_far_v);
17064 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17065 {
17066 rtx arg_rtx;
17067 machine_mode arg_mode = TYPE_MODE (arg_type);
17068
17069 if (VOID_TYPE_P (arg_type))
17070 continue;
17071
17072 if (!first_param)
17073 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17074 true);
17075
17076 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17077 true);
17078 gcc_assert (REG_P (arg_rtx));
17079 to_clear_mask
17080 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17081 REGNO (arg_rtx),
17082 padding_bits_to_clear_ptr);
17083
17084 first_param = false;
17085 }
17086
17087 /* Clear padding bits where needed. */
17088 cleared_reg = XEXP (address, 0);
17089 reg = gen_rtx_REG (SImode, IP_REGNUM);
17090 using_r4 = false;
17091 for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17092 {
17093 if (padding_bits_to_clear[regno] == 0)
17094 continue;
17095
17096 /* If this is a Thumb-1 target copy the address of the function
17097 we are calling from 'r4' into 'ip' such that we can use r4 to
17098 clear the unused bits in the arguments. */
17099 if (TARGET_THUMB1 && !using_r4)
17100 {
17101 using_r4 = true;
17102 reg = cleared_reg;
17103 emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17104 reg);
17105 }
17106
17107 tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17108 emit_move_insn (reg, tmp);
17109 /* Also fill the top half of the negated
17110 padding_bits_to_clear. */
17111 if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17112 {
17113 tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17114 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17115 GEN_INT (16),
17116 GEN_INT (16)),
17117 tmp));
17118 }
17119
17120 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17121 gen_rtx_REG (SImode, regno),
17122 reg));
17123
17124 }
17125 if (using_r4)
17126 emit_move_insn (cleared_reg,
17127 gen_rtx_REG (SImode, IP_REGNUM));
17128
17129 /* We use right shift and left shift to clear the LSB of the address
17130 we jump to instead of using bic, to avoid having to use an extra
17131 register on Thumb-1. */
17132 tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17133 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17134 tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17135 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17136
17137 /* Clearing all registers that leak before doing a non-secure
17138 call. */
17139 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17140 {
17141 if (!(to_clear_mask & (1LL << regno)))
17142 continue;
17143
17144 /* If regno is an even vfp register and its successor is also to
17145 be cleared, use vmov. */
17146 if (IS_VFP_REGNUM (regno))
17147 {
17148 if (TARGET_VFP_DOUBLE
17149 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17150 && to_clear_mask & (1LL << (regno + 1)))
17151 emit_move_insn (gen_rtx_REG (DFmode, regno++),
17152 CONST0_RTX (DFmode));
17153 else
17154 emit_move_insn (gen_rtx_REG (SFmode, regno),
17155 CONST0_RTX (SFmode));
17156 }
17157 else
17158 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17159 }
17160
17161 seq = get_insns ();
17162 end_sequence ();
17163 emit_insn_before (seq, insn);
17164
17165 }
17166 }
17167 }
17168
17169 /* Rewrite move insn into subtract of 0 if the condition codes will
17170 be useful in next conditional jump insn. */
17171
17172 static void
17173 thumb1_reorg (void)
17174 {
17175 basic_block bb;
17176
17177 FOR_EACH_BB_FN (bb, cfun)
17178 {
17179 rtx dest, src;
17180 rtx cmp, op0, op1, set = NULL;
17181 rtx_insn *prev, *insn = BB_END (bb);
17182 bool insn_clobbered = false;
17183
17184 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17185 insn = PREV_INSN (insn);
17186
17187 /* Find the last cbranchsi4_insn in basic block BB. */
17188 if (insn == BB_HEAD (bb)
17189 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17190 continue;
17191
17192 /* Get the register with which we are comparing. */
17193 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17194 op0 = XEXP (cmp, 0);
17195 op1 = XEXP (cmp, 1);
17196
17197 /* Check that comparison is against ZERO. */
17198 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17199 continue;
17200
17201 /* Find the first flag setting insn before INSN in basic block BB. */
17202 gcc_assert (insn != BB_HEAD (bb));
17203 for (prev = PREV_INSN (insn);
17204 (!insn_clobbered
17205 && prev != BB_HEAD (bb)
17206 && (NOTE_P (prev)
17207 || DEBUG_INSN_P (prev)
17208 || ((set = single_set (prev)) != NULL
17209 && get_attr_conds (prev) == CONDS_NOCOND)));
17210 prev = PREV_INSN (prev))
17211 {
17212 if (reg_set_p (op0, prev))
17213 insn_clobbered = true;
17214 }
17215
17216 /* Skip if op0 is clobbered by insn other than prev. */
17217 if (insn_clobbered)
17218 continue;
17219
17220 if (!set)
17221 continue;
17222
17223 dest = SET_DEST (set);
17224 src = SET_SRC (set);
17225 if (!low_register_operand (dest, SImode)
17226 || !low_register_operand (src, SImode))
17227 continue;
17228
17229 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17230 in INSN. Both src and dest of the move insn are checked. */
17231 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17232 {
17233 dest = copy_rtx (dest);
17234 src = copy_rtx (src);
17235 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17236 PATTERN (prev) = gen_rtx_SET (dest, src);
17237 INSN_CODE (prev) = -1;
17238 /* Set test register in INSN to dest. */
17239 XEXP (cmp, 0) = copy_rtx (dest);
17240 INSN_CODE (insn) = -1;
17241 }
17242 }
17243 }
17244
17245 /* Convert instructions to their cc-clobbering variant if possible, since
17246 that allows us to use smaller encodings. */
17247
17248 static void
17249 thumb2_reorg (void)
17250 {
17251 basic_block bb;
17252 regset_head live;
17253
17254 INIT_REG_SET (&live);
17255
17256 /* We are freeing block_for_insn in the toplev to keep compatibility
17257 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17258 compute_bb_for_insn ();
17259 df_analyze ();
17260
17261 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17262
17263 FOR_EACH_BB_FN (bb, cfun)
17264 {
17265 if ((current_tune->disparage_flag_setting_t16_encodings
17266 == tune_params::DISPARAGE_FLAGS_ALL)
17267 && optimize_bb_for_speed_p (bb))
17268 continue;
17269
17270 rtx_insn *insn;
17271 Convert_Action action = SKIP;
17272 Convert_Action action_for_partial_flag_setting
17273 = ((current_tune->disparage_flag_setting_t16_encodings
17274 != tune_params::DISPARAGE_FLAGS_NEITHER)
17275 && optimize_bb_for_speed_p (bb))
17276 ? SKIP : CONV;
17277
17278 COPY_REG_SET (&live, DF_LR_OUT (bb));
17279 df_simulate_initialize_backwards (bb, &live);
17280 FOR_BB_INSNS_REVERSE (bb, insn)
17281 {
17282 if (NONJUMP_INSN_P (insn)
17283 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17284 && GET_CODE (PATTERN (insn)) == SET)
17285 {
17286 action = SKIP;
17287 rtx pat = PATTERN (insn);
17288 rtx dst = XEXP (pat, 0);
17289 rtx src = XEXP (pat, 1);
17290 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17291
17292 if (UNARY_P (src) || BINARY_P (src))
17293 op0 = XEXP (src, 0);
17294
17295 if (BINARY_P (src))
17296 op1 = XEXP (src, 1);
17297
17298 if (low_register_operand (dst, SImode))
17299 {
17300 switch (GET_CODE (src))
17301 {
17302 case PLUS:
17303 /* Adding two registers and storing the result
17304 in the first source is already a 16-bit
17305 operation. */
17306 if (rtx_equal_p (dst, op0)
17307 && register_operand (op1, SImode))
17308 break;
17309
17310 if (low_register_operand (op0, SImode))
17311 {
17312 /* ADDS <Rd>,<Rn>,<Rm> */
17313 if (low_register_operand (op1, SImode))
17314 action = CONV;
17315 /* ADDS <Rdn>,#<imm8> */
17316 /* SUBS <Rdn>,#<imm8> */
17317 else if (rtx_equal_p (dst, op0)
17318 && CONST_INT_P (op1)
17319 && IN_RANGE (INTVAL (op1), -255, 255))
17320 action = CONV;
17321 /* ADDS <Rd>,<Rn>,#<imm3> */
17322 /* SUBS <Rd>,<Rn>,#<imm3> */
17323 else if (CONST_INT_P (op1)
17324 && IN_RANGE (INTVAL (op1), -7, 7))
17325 action = CONV;
17326 }
17327 /* ADCS <Rd>, <Rn> */
17328 else if (GET_CODE (XEXP (src, 0)) == PLUS
17329 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17330 && low_register_operand (XEXP (XEXP (src, 0), 1),
17331 SImode)
17332 && COMPARISON_P (op1)
17333 && cc_register (XEXP (op1, 0), VOIDmode)
17334 && maybe_get_arm_condition_code (op1) == ARM_CS
17335 && XEXP (op1, 1) == const0_rtx)
17336 action = CONV;
17337 break;
17338
17339 case MINUS:
17340 /* RSBS <Rd>,<Rn>,#0
17341 Not handled here: see NEG below. */
17342 /* SUBS <Rd>,<Rn>,#<imm3>
17343 SUBS <Rdn>,#<imm8>
17344 Not handled here: see PLUS above. */
17345 /* SUBS <Rd>,<Rn>,<Rm> */
17346 if (low_register_operand (op0, SImode)
17347 && low_register_operand (op1, SImode))
17348 action = CONV;
17349 break;
17350
17351 case MULT:
17352 /* MULS <Rdm>,<Rn>,<Rdm>
17353 As an exception to the rule, this is only used
17354 when optimizing for size since MULS is slow on all
17355 known implementations. We do not even want to use
17356 MULS in cold code, if optimizing for speed, so we
17357 test the global flag here. */
17358 if (!optimize_size)
17359 break;
17360 /* Fall through. */
17361 case AND:
17362 case IOR:
17363 case XOR:
17364 /* ANDS <Rdn>,<Rm> */
17365 if (rtx_equal_p (dst, op0)
17366 && low_register_operand (op1, SImode))
17367 action = action_for_partial_flag_setting;
17368 else if (rtx_equal_p (dst, op1)
17369 && low_register_operand (op0, SImode))
17370 action = action_for_partial_flag_setting == SKIP
17371 ? SKIP : SWAP_CONV;
17372 break;
17373
17374 case ASHIFTRT:
17375 case ASHIFT:
17376 case LSHIFTRT:
17377 /* ASRS <Rdn>,<Rm> */
17378 /* LSRS <Rdn>,<Rm> */
17379 /* LSLS <Rdn>,<Rm> */
17380 if (rtx_equal_p (dst, op0)
17381 && low_register_operand (op1, SImode))
17382 action = action_for_partial_flag_setting;
17383 /* ASRS <Rd>,<Rm>,#<imm5> */
17384 /* LSRS <Rd>,<Rm>,#<imm5> */
17385 /* LSLS <Rd>,<Rm>,#<imm5> */
17386 else if (low_register_operand (op0, SImode)
17387 && CONST_INT_P (op1)
17388 && IN_RANGE (INTVAL (op1), 0, 31))
17389 action = action_for_partial_flag_setting;
17390 break;
17391
17392 case ROTATERT:
17393 /* RORS <Rdn>,<Rm> */
17394 if (rtx_equal_p (dst, op0)
17395 && low_register_operand (op1, SImode))
17396 action = action_for_partial_flag_setting;
17397 break;
17398
17399 case NOT:
17400 /* MVNS <Rd>,<Rm> */
17401 if (low_register_operand (op0, SImode))
17402 action = action_for_partial_flag_setting;
17403 break;
17404
17405 case NEG:
17406 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17407 if (low_register_operand (op0, SImode))
17408 action = CONV;
17409 break;
17410
17411 case CONST_INT:
17412 /* MOVS <Rd>,#<imm8> */
17413 if (CONST_INT_P (src)
17414 && IN_RANGE (INTVAL (src), 0, 255))
17415 action = action_for_partial_flag_setting;
17416 break;
17417
17418 case REG:
17419 /* MOVS and MOV<c> with registers have different
17420 encodings, so are not relevant here. */
17421 break;
17422
17423 default:
17424 break;
17425 }
17426 }
17427
17428 if (action != SKIP)
17429 {
17430 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17431 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17432 rtvec vec;
17433
17434 if (action == SWAP_CONV)
17435 {
17436 src = copy_rtx (src);
17437 XEXP (src, 0) = op1;
17438 XEXP (src, 1) = op0;
17439 pat = gen_rtx_SET (dst, src);
17440 vec = gen_rtvec (2, pat, clobber);
17441 }
17442 else /* action == CONV */
17443 vec = gen_rtvec (2, pat, clobber);
17444
17445 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17446 INSN_CODE (insn) = -1;
17447 }
17448 }
17449
17450 if (NONDEBUG_INSN_P (insn))
17451 df_simulate_one_insn_backwards (bb, insn, &live);
17452 }
17453 }
17454
17455 CLEAR_REG_SET (&live);
17456 }
17457
17458 /* Gcc puts the pool in the wrong place for ARM, since we can only
17459 load addresses a limited distance around the pc. We do some
17460 special munging to move the constant pool values to the correct
17461 point in the code. */
17462 static void
17463 arm_reorg (void)
17464 {
17465 rtx_insn *insn;
17466 HOST_WIDE_INT address = 0;
17467 Mfix * fix;
17468
17469 if (use_cmse)
17470 cmse_nonsecure_call_clear_caller_saved ();
17471 if (TARGET_THUMB1)
17472 thumb1_reorg ();
17473 else if (TARGET_THUMB2)
17474 thumb2_reorg ();
17475
17476 /* Ensure all insns that must be split have been split at this point.
17477 Otherwise, the pool placement code below may compute incorrect
17478 insn lengths. Note that when optimizing, all insns have already
17479 been split at this point. */
17480 if (!optimize)
17481 split_all_insns_noflow ();
17482
17483 /* Make sure we do not attempt to create a literal pool even though it should
17484 no longer be necessary to create any. */
17485 if (arm_disable_literal_pool)
17486 return ;
17487
17488 minipool_fix_head = minipool_fix_tail = NULL;
17489
17490 /* The first insn must always be a note, or the code below won't
17491 scan it properly. */
17492 insn = get_insns ();
17493 gcc_assert (NOTE_P (insn));
17494 minipool_pad = 0;
17495
17496 /* Scan all the insns and record the operands that will need fixing. */
17497 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17498 {
17499 if (BARRIER_P (insn))
17500 push_minipool_barrier (insn, address);
17501 else if (INSN_P (insn))
17502 {
17503 rtx_jump_table_data *table;
17504
17505 note_invalid_constants (insn, address, true);
17506 address += get_attr_length (insn);
17507
17508 /* If the insn is a vector jump, add the size of the table
17509 and skip the table. */
17510 if (tablejump_p (insn, NULL, &table))
17511 {
17512 address += get_jump_table_size (table);
17513 insn = table;
17514 }
17515 }
17516 else if (LABEL_P (insn))
17517 /* Add the worst-case padding due to alignment. We don't add
17518 the _current_ padding because the minipool insertions
17519 themselves might change it. */
17520 address += get_label_padding (insn);
17521 }
17522
17523 fix = minipool_fix_head;
17524
17525 /* Now scan the fixups and perform the required changes. */
17526 while (fix)
17527 {
17528 Mfix * ftmp;
17529 Mfix * fdel;
17530 Mfix * last_added_fix;
17531 Mfix * last_barrier = NULL;
17532 Mfix * this_fix;
17533
17534 /* Skip any further barriers before the next fix. */
17535 while (fix && BARRIER_P (fix->insn))
17536 fix = fix->next;
17537
17538 /* No more fixes. */
17539 if (fix == NULL)
17540 break;
17541
17542 last_added_fix = NULL;
17543
17544 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17545 {
17546 if (BARRIER_P (ftmp->insn))
17547 {
17548 if (ftmp->address >= minipool_vector_head->max_address)
17549 break;
17550
17551 last_barrier = ftmp;
17552 }
17553 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17554 break;
17555
17556 last_added_fix = ftmp; /* Keep track of the last fix added. */
17557 }
17558
17559 /* If we found a barrier, drop back to that; any fixes that we
17560 could have reached but come after the barrier will now go in
17561 the next mini-pool. */
17562 if (last_barrier != NULL)
17563 {
17564 /* Reduce the refcount for those fixes that won't go into this
17565 pool after all. */
17566 for (fdel = last_barrier->next;
17567 fdel && fdel != ftmp;
17568 fdel = fdel->next)
17569 {
17570 fdel->minipool->refcount--;
17571 fdel->minipool = NULL;
17572 }
17573
17574 ftmp = last_barrier;
17575 }
17576 else
17577 {
17578 /* ftmp is first fix that we can't fit into this pool and
17579 there no natural barriers that we could use. Insert a
17580 new barrier in the code somewhere between the previous
17581 fix and this one, and arrange to jump around it. */
17582 HOST_WIDE_INT max_address;
17583
17584 /* The last item on the list of fixes must be a barrier, so
17585 we can never run off the end of the list of fixes without
17586 last_barrier being set. */
17587 gcc_assert (ftmp);
17588
17589 max_address = minipool_vector_head->max_address;
17590 /* Check that there isn't another fix that is in range that
17591 we couldn't fit into this pool because the pool was
17592 already too large: we need to put the pool before such an
17593 instruction. The pool itself may come just after the
17594 fix because create_fix_barrier also allows space for a
17595 jump instruction. */
17596 if (ftmp->address < max_address)
17597 max_address = ftmp->address + 1;
17598
17599 last_barrier = create_fix_barrier (last_added_fix, max_address);
17600 }
17601
17602 assign_minipool_offsets (last_barrier);
17603
17604 while (ftmp)
17605 {
17606 if (!BARRIER_P (ftmp->insn)
17607 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17608 == NULL))
17609 break;
17610
17611 ftmp = ftmp->next;
17612 }
17613
17614 /* Scan over the fixes we have identified for this pool, fixing them
17615 up and adding the constants to the pool itself. */
17616 for (this_fix = fix; this_fix && ftmp != this_fix;
17617 this_fix = this_fix->next)
17618 if (!BARRIER_P (this_fix->insn))
17619 {
17620 rtx addr
17621 = plus_constant (Pmode,
17622 gen_rtx_LABEL_REF (VOIDmode,
17623 minipool_vector_label),
17624 this_fix->minipool->offset);
17625 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17626 }
17627
17628 dump_minipool (last_barrier->insn);
17629 fix = ftmp;
17630 }
17631
17632 /* From now on we must synthesize any constants that we can't handle
17633 directly. This can happen if the RTL gets split during final
17634 instruction generation. */
17635 cfun->machine->after_arm_reorg = 1;
17636
17637 /* Free the minipool memory. */
17638 obstack_free (&minipool_obstack, minipool_startobj);
17639 }
17640 \f
17641 /* Routines to output assembly language. */
17642
17643 /* Return string representation of passed in real value. */
17644 static const char *
17645 fp_const_from_val (REAL_VALUE_TYPE *r)
17646 {
17647 if (!fp_consts_inited)
17648 init_fp_table ();
17649
17650 gcc_assert (real_equal (r, &value_fp0));
17651 return "0";
17652 }
17653
17654 /* OPERANDS[0] is the entire list of insns that constitute pop,
17655 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17656 is in the list, UPDATE is true iff the list contains explicit
17657 update of base register. */
17658 void
17659 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17660 bool update)
17661 {
17662 int i;
17663 char pattern[100];
17664 int offset;
17665 const char *conditional;
17666 int num_saves = XVECLEN (operands[0], 0);
17667 unsigned int regno;
17668 unsigned int regno_base = REGNO (operands[1]);
17669 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17670
17671 offset = 0;
17672 offset += update ? 1 : 0;
17673 offset += return_pc ? 1 : 0;
17674
17675 /* Is the base register in the list? */
17676 for (i = offset; i < num_saves; i++)
17677 {
17678 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17679 /* If SP is in the list, then the base register must be SP. */
17680 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17681 /* If base register is in the list, there must be no explicit update. */
17682 if (regno == regno_base)
17683 gcc_assert (!update);
17684 }
17685
17686 conditional = reverse ? "%?%D0" : "%?%d0";
17687 /* Can't use POP if returning from an interrupt. */
17688 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17689 sprintf (pattern, "pop%s\t{", conditional);
17690 else
17691 {
17692 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17693 It's just a convention, their semantics are identical. */
17694 if (regno_base == SP_REGNUM)
17695 sprintf (pattern, "ldmfd%s\t", conditional);
17696 else if (update)
17697 sprintf (pattern, "ldmia%s\t", conditional);
17698 else
17699 sprintf (pattern, "ldm%s\t", conditional);
17700
17701 strcat (pattern, reg_names[regno_base]);
17702 if (update)
17703 strcat (pattern, "!, {");
17704 else
17705 strcat (pattern, ", {");
17706 }
17707
17708 /* Output the first destination register. */
17709 strcat (pattern,
17710 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17711
17712 /* Output the rest of the destination registers. */
17713 for (i = offset + 1; i < num_saves; i++)
17714 {
17715 strcat (pattern, ", ");
17716 strcat (pattern,
17717 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17718 }
17719
17720 strcat (pattern, "}");
17721
17722 if (interrupt_p && return_pc)
17723 strcat (pattern, "^");
17724
17725 output_asm_insn (pattern, &cond);
17726 }
17727
17728
17729 /* Output the assembly for a store multiple. */
17730
17731 const char *
17732 vfp_output_vstmd (rtx * operands)
17733 {
17734 char pattern[100];
17735 int p;
17736 int base;
17737 int i;
17738 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17739 ? XEXP (operands[0], 0)
17740 : XEXP (XEXP (operands[0], 0), 0);
17741 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17742
17743 if (push_p)
17744 strcpy (pattern, "vpush%?.64\t{%P1");
17745 else
17746 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17747
17748 p = strlen (pattern);
17749
17750 gcc_assert (REG_P (operands[1]));
17751
17752 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17753 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17754 {
17755 p += sprintf (&pattern[p], ", d%d", base + i);
17756 }
17757 strcpy (&pattern[p], "}");
17758
17759 output_asm_insn (pattern, operands);
17760 return "";
17761 }
17762
17763
17764 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17765 number of bytes pushed. */
17766
17767 static int
17768 vfp_emit_fstmd (int base_reg, int count)
17769 {
17770 rtx par;
17771 rtx dwarf;
17772 rtx tmp, reg;
17773 int i;
17774
17775 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17776 register pairs are stored by a store multiple insn. We avoid this
17777 by pushing an extra pair. */
17778 if (count == 2 && !arm_arch6)
17779 {
17780 if (base_reg == LAST_VFP_REGNUM - 3)
17781 base_reg -= 2;
17782 count++;
17783 }
17784
17785 /* FSTMD may not store more than 16 doubleword registers at once. Split
17786 larger stores into multiple parts (up to a maximum of two, in
17787 practice). */
17788 if (count > 16)
17789 {
17790 int saved;
17791 /* NOTE: base_reg is an internal register number, so each D register
17792 counts as 2. */
17793 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17794 saved += vfp_emit_fstmd (base_reg, 16);
17795 return saved;
17796 }
17797
17798 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17799 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17800
17801 reg = gen_rtx_REG (DFmode, base_reg);
17802 base_reg += 2;
17803
17804 XVECEXP (par, 0, 0)
17805 = gen_rtx_SET (gen_frame_mem
17806 (BLKmode,
17807 gen_rtx_PRE_MODIFY (Pmode,
17808 stack_pointer_rtx,
17809 plus_constant
17810 (Pmode, stack_pointer_rtx,
17811 - (count * 8)))
17812 ),
17813 gen_rtx_UNSPEC (BLKmode,
17814 gen_rtvec (1, reg),
17815 UNSPEC_PUSH_MULT));
17816
17817 tmp = gen_rtx_SET (stack_pointer_rtx,
17818 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17819 RTX_FRAME_RELATED_P (tmp) = 1;
17820 XVECEXP (dwarf, 0, 0) = tmp;
17821
17822 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17823 RTX_FRAME_RELATED_P (tmp) = 1;
17824 XVECEXP (dwarf, 0, 1) = tmp;
17825
17826 for (i = 1; i < count; i++)
17827 {
17828 reg = gen_rtx_REG (DFmode, base_reg);
17829 base_reg += 2;
17830 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17831
17832 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17833 plus_constant (Pmode,
17834 stack_pointer_rtx,
17835 i * 8)),
17836 reg);
17837 RTX_FRAME_RELATED_P (tmp) = 1;
17838 XVECEXP (dwarf, 0, i + 1) = tmp;
17839 }
17840
17841 par = emit_insn (par);
17842 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17843 RTX_FRAME_RELATED_P (par) = 1;
17844
17845 return count * 8;
17846 }
17847
17848 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17849 has the cmse_nonsecure_call attribute and returns false otherwise. */
17850
17851 bool
17852 detect_cmse_nonsecure_call (tree addr)
17853 {
17854 if (!addr)
17855 return FALSE;
17856
17857 tree fntype = TREE_TYPE (addr);
17858 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17859 TYPE_ATTRIBUTES (fntype)))
17860 return TRUE;
17861 return FALSE;
17862 }
17863
17864
17865 /* Emit a call instruction with pattern PAT. ADDR is the address of
17866 the call target. */
17867
17868 void
17869 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17870 {
17871 rtx insn;
17872
17873 insn = emit_call_insn (pat);
17874
17875 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17876 If the call might use such an entry, add a use of the PIC register
17877 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17878 if (TARGET_VXWORKS_RTP
17879 && flag_pic
17880 && !sibcall
17881 && GET_CODE (addr) == SYMBOL_REF
17882 && (SYMBOL_REF_DECL (addr)
17883 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17884 : !SYMBOL_REF_LOCAL_P (addr)))
17885 {
17886 require_pic_register ();
17887 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17888 }
17889
17890 if (TARGET_AAPCS_BASED)
17891 {
17892 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17893 linker. We need to add an IP clobber to allow setting
17894 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17895 is not needed since it's a fixed register. */
17896 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17897 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17898 }
17899 }
17900
17901 /* Output a 'call' insn. */
17902 const char *
17903 output_call (rtx *operands)
17904 {
17905 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17906
17907 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17908 if (REGNO (operands[0]) == LR_REGNUM)
17909 {
17910 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17911 output_asm_insn ("mov%?\t%0, %|lr", operands);
17912 }
17913
17914 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17915
17916 if (TARGET_INTERWORK || arm_arch4t)
17917 output_asm_insn ("bx%?\t%0", operands);
17918 else
17919 output_asm_insn ("mov%?\t%|pc, %0", operands);
17920
17921 return "";
17922 }
17923
17924 /* Output a move from arm registers to arm registers of a long double
17925 OPERANDS[0] is the destination.
17926 OPERANDS[1] is the source. */
17927 const char *
17928 output_mov_long_double_arm_from_arm (rtx *operands)
17929 {
17930 /* We have to be careful here because the two might overlap. */
17931 int dest_start = REGNO (operands[0]);
17932 int src_start = REGNO (operands[1]);
17933 rtx ops[2];
17934 int i;
17935
17936 if (dest_start < src_start)
17937 {
17938 for (i = 0; i < 3; i++)
17939 {
17940 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17941 ops[1] = gen_rtx_REG (SImode, src_start + i);
17942 output_asm_insn ("mov%?\t%0, %1", ops);
17943 }
17944 }
17945 else
17946 {
17947 for (i = 2; i >= 0; i--)
17948 {
17949 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17950 ops[1] = gen_rtx_REG (SImode, src_start + i);
17951 output_asm_insn ("mov%?\t%0, %1", ops);
17952 }
17953 }
17954
17955 return "";
17956 }
17957
17958 void
17959 arm_emit_movpair (rtx dest, rtx src)
17960 {
17961 /* If the src is an immediate, simplify it. */
17962 if (CONST_INT_P (src))
17963 {
17964 HOST_WIDE_INT val = INTVAL (src);
17965 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17966 if ((val >> 16) & 0x0000ffff)
17967 {
17968 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17969 GEN_INT (16)),
17970 GEN_INT ((val >> 16) & 0x0000ffff));
17971 rtx_insn *insn = get_last_insn ();
17972 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17973 }
17974 return;
17975 }
17976 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17977 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17978 rtx_insn *insn = get_last_insn ();
17979 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17980 }
17981
17982 /* Output a move between double words. It must be REG<-MEM
17983 or MEM<-REG. */
17984 const char *
17985 output_move_double (rtx *operands, bool emit, int *count)
17986 {
17987 enum rtx_code code0 = GET_CODE (operands[0]);
17988 enum rtx_code code1 = GET_CODE (operands[1]);
17989 rtx otherops[3];
17990 if (count)
17991 *count = 1;
17992
17993 /* The only case when this might happen is when
17994 you are looking at the length of a DImode instruction
17995 that has an invalid constant in it. */
17996 if (code0 == REG && code1 != MEM)
17997 {
17998 gcc_assert (!emit);
17999 *count = 2;
18000 return "";
18001 }
18002
18003 if (code0 == REG)
18004 {
18005 unsigned int reg0 = REGNO (operands[0]);
18006
18007 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18008
18009 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18010
18011 switch (GET_CODE (XEXP (operands[1], 0)))
18012 {
18013 case REG:
18014
18015 if (emit)
18016 {
18017 if (TARGET_LDRD
18018 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18019 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18020 else
18021 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18022 }
18023 break;
18024
18025 case PRE_INC:
18026 gcc_assert (TARGET_LDRD);
18027 if (emit)
18028 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18029 break;
18030
18031 case PRE_DEC:
18032 if (emit)
18033 {
18034 if (TARGET_LDRD)
18035 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18036 else
18037 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18038 }
18039 break;
18040
18041 case POST_INC:
18042 if (emit)
18043 {
18044 if (TARGET_LDRD)
18045 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18046 else
18047 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18048 }
18049 break;
18050
18051 case POST_DEC:
18052 gcc_assert (TARGET_LDRD);
18053 if (emit)
18054 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18055 break;
18056
18057 case PRE_MODIFY:
18058 case POST_MODIFY:
18059 /* Autoicrement addressing modes should never have overlapping
18060 base and destination registers, and overlapping index registers
18061 are already prohibited, so this doesn't need to worry about
18062 fix_cm3_ldrd. */
18063 otherops[0] = operands[0];
18064 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18065 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18066
18067 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18068 {
18069 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18070 {
18071 /* Registers overlap so split out the increment. */
18072 if (emit)
18073 {
18074 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18075 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18076 }
18077 if (count)
18078 *count = 2;
18079 }
18080 else
18081 {
18082 /* Use a single insn if we can.
18083 FIXME: IWMMXT allows offsets larger than ldrd can
18084 handle, fix these up with a pair of ldr. */
18085 if (TARGET_THUMB2
18086 || !CONST_INT_P (otherops[2])
18087 || (INTVAL (otherops[2]) > -256
18088 && INTVAL (otherops[2]) < 256))
18089 {
18090 if (emit)
18091 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18092 }
18093 else
18094 {
18095 if (emit)
18096 {
18097 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18098 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18099 }
18100 if (count)
18101 *count = 2;
18102
18103 }
18104 }
18105 }
18106 else
18107 {
18108 /* Use a single insn if we can.
18109 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18110 fix these up with a pair of ldr. */
18111 if (TARGET_THUMB2
18112 || !CONST_INT_P (otherops[2])
18113 || (INTVAL (otherops[2]) > -256
18114 && INTVAL (otherops[2]) < 256))
18115 {
18116 if (emit)
18117 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18118 }
18119 else
18120 {
18121 if (emit)
18122 {
18123 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18124 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18125 }
18126 if (count)
18127 *count = 2;
18128 }
18129 }
18130 break;
18131
18132 case LABEL_REF:
18133 case CONST:
18134 /* We might be able to use ldrd %0, %1 here. However the range is
18135 different to ldr/adr, and it is broken on some ARMv7-M
18136 implementations. */
18137 /* Use the second register of the pair to avoid problematic
18138 overlap. */
18139 otherops[1] = operands[1];
18140 if (emit)
18141 output_asm_insn ("adr%?\t%0, %1", otherops);
18142 operands[1] = otherops[0];
18143 if (emit)
18144 {
18145 if (TARGET_LDRD)
18146 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18147 else
18148 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18149 }
18150
18151 if (count)
18152 *count = 2;
18153 break;
18154
18155 /* ??? This needs checking for thumb2. */
18156 default:
18157 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18158 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18159 {
18160 otherops[0] = operands[0];
18161 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18162 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18163
18164 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18165 {
18166 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18167 {
18168 switch ((int) INTVAL (otherops[2]))
18169 {
18170 case -8:
18171 if (emit)
18172 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18173 return "";
18174 case -4:
18175 if (TARGET_THUMB2)
18176 break;
18177 if (emit)
18178 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18179 return "";
18180 case 4:
18181 if (TARGET_THUMB2)
18182 break;
18183 if (emit)
18184 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18185 return "";
18186 }
18187 }
18188 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18189 operands[1] = otherops[0];
18190 if (TARGET_LDRD
18191 && (REG_P (otherops[2])
18192 || TARGET_THUMB2
18193 || (CONST_INT_P (otherops[2])
18194 && INTVAL (otherops[2]) > -256
18195 && INTVAL (otherops[2]) < 256)))
18196 {
18197 if (reg_overlap_mentioned_p (operands[0],
18198 otherops[2]))
18199 {
18200 /* Swap base and index registers over to
18201 avoid a conflict. */
18202 std::swap (otherops[1], otherops[2]);
18203 }
18204 /* If both registers conflict, it will usually
18205 have been fixed by a splitter. */
18206 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18207 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18208 {
18209 if (emit)
18210 {
18211 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18212 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18213 }
18214 if (count)
18215 *count = 2;
18216 }
18217 else
18218 {
18219 otherops[0] = operands[0];
18220 if (emit)
18221 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18222 }
18223 return "";
18224 }
18225
18226 if (CONST_INT_P (otherops[2]))
18227 {
18228 if (emit)
18229 {
18230 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18231 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18232 else
18233 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18234 }
18235 }
18236 else
18237 {
18238 if (emit)
18239 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18240 }
18241 }
18242 else
18243 {
18244 if (emit)
18245 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18246 }
18247
18248 if (count)
18249 *count = 2;
18250
18251 if (TARGET_LDRD)
18252 return "ldrd%?\t%0, [%1]";
18253
18254 return "ldmia%?\t%1, %M0";
18255 }
18256 else
18257 {
18258 otherops[1] = adjust_address (operands[1], SImode, 4);
18259 /* Take care of overlapping base/data reg. */
18260 if (reg_mentioned_p (operands[0], operands[1]))
18261 {
18262 if (emit)
18263 {
18264 output_asm_insn ("ldr%?\t%0, %1", otherops);
18265 output_asm_insn ("ldr%?\t%0, %1", operands);
18266 }
18267 if (count)
18268 *count = 2;
18269
18270 }
18271 else
18272 {
18273 if (emit)
18274 {
18275 output_asm_insn ("ldr%?\t%0, %1", operands);
18276 output_asm_insn ("ldr%?\t%0, %1", otherops);
18277 }
18278 if (count)
18279 *count = 2;
18280 }
18281 }
18282 }
18283 }
18284 else
18285 {
18286 /* Constraints should ensure this. */
18287 gcc_assert (code0 == MEM && code1 == REG);
18288 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18289 || (TARGET_ARM && TARGET_LDRD));
18290
18291 switch (GET_CODE (XEXP (operands[0], 0)))
18292 {
18293 case REG:
18294 if (emit)
18295 {
18296 if (TARGET_LDRD)
18297 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18298 else
18299 output_asm_insn ("stm%?\t%m0, %M1", operands);
18300 }
18301 break;
18302
18303 case PRE_INC:
18304 gcc_assert (TARGET_LDRD);
18305 if (emit)
18306 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18307 break;
18308
18309 case PRE_DEC:
18310 if (emit)
18311 {
18312 if (TARGET_LDRD)
18313 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18314 else
18315 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18316 }
18317 break;
18318
18319 case POST_INC:
18320 if (emit)
18321 {
18322 if (TARGET_LDRD)
18323 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18324 else
18325 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18326 }
18327 break;
18328
18329 case POST_DEC:
18330 gcc_assert (TARGET_LDRD);
18331 if (emit)
18332 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18333 break;
18334
18335 case PRE_MODIFY:
18336 case POST_MODIFY:
18337 otherops[0] = operands[1];
18338 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18339 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18340
18341 /* IWMMXT allows offsets larger than ldrd can handle,
18342 fix these up with a pair of ldr. */
18343 if (!TARGET_THUMB2
18344 && CONST_INT_P (otherops[2])
18345 && (INTVAL(otherops[2]) <= -256
18346 || INTVAL(otherops[2]) >= 256))
18347 {
18348 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18349 {
18350 if (emit)
18351 {
18352 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18353 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18354 }
18355 if (count)
18356 *count = 2;
18357 }
18358 else
18359 {
18360 if (emit)
18361 {
18362 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18363 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18364 }
18365 if (count)
18366 *count = 2;
18367 }
18368 }
18369 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18370 {
18371 if (emit)
18372 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18373 }
18374 else
18375 {
18376 if (emit)
18377 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18378 }
18379 break;
18380
18381 case PLUS:
18382 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18383 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18384 {
18385 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18386 {
18387 case -8:
18388 if (emit)
18389 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18390 return "";
18391
18392 case -4:
18393 if (TARGET_THUMB2)
18394 break;
18395 if (emit)
18396 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18397 return "";
18398
18399 case 4:
18400 if (TARGET_THUMB2)
18401 break;
18402 if (emit)
18403 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18404 return "";
18405 }
18406 }
18407 if (TARGET_LDRD
18408 && (REG_P (otherops[2])
18409 || TARGET_THUMB2
18410 || (CONST_INT_P (otherops[2])
18411 && INTVAL (otherops[2]) > -256
18412 && INTVAL (otherops[2]) < 256)))
18413 {
18414 otherops[0] = operands[1];
18415 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18416 if (emit)
18417 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18418 return "";
18419 }
18420 /* Fall through */
18421
18422 default:
18423 otherops[0] = adjust_address (operands[0], SImode, 4);
18424 otherops[1] = operands[1];
18425 if (emit)
18426 {
18427 output_asm_insn ("str%?\t%1, %0", operands);
18428 output_asm_insn ("str%?\t%H1, %0", otherops);
18429 }
18430 if (count)
18431 *count = 2;
18432 }
18433 }
18434
18435 return "";
18436 }
18437
18438 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18439 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18440
18441 const char *
18442 output_move_quad (rtx *operands)
18443 {
18444 if (REG_P (operands[0]))
18445 {
18446 /* Load, or reg->reg move. */
18447
18448 if (MEM_P (operands[1]))
18449 {
18450 switch (GET_CODE (XEXP (operands[1], 0)))
18451 {
18452 case REG:
18453 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18454 break;
18455
18456 case LABEL_REF:
18457 case CONST:
18458 output_asm_insn ("adr%?\t%0, %1", operands);
18459 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18460 break;
18461
18462 default:
18463 gcc_unreachable ();
18464 }
18465 }
18466 else
18467 {
18468 rtx ops[2];
18469 int dest, src, i;
18470
18471 gcc_assert (REG_P (operands[1]));
18472
18473 dest = REGNO (operands[0]);
18474 src = REGNO (operands[1]);
18475
18476 /* This seems pretty dumb, but hopefully GCC won't try to do it
18477 very often. */
18478 if (dest < src)
18479 for (i = 0; i < 4; i++)
18480 {
18481 ops[0] = gen_rtx_REG (SImode, dest + i);
18482 ops[1] = gen_rtx_REG (SImode, src + i);
18483 output_asm_insn ("mov%?\t%0, %1", ops);
18484 }
18485 else
18486 for (i = 3; i >= 0; i--)
18487 {
18488 ops[0] = gen_rtx_REG (SImode, dest + i);
18489 ops[1] = gen_rtx_REG (SImode, src + i);
18490 output_asm_insn ("mov%?\t%0, %1", ops);
18491 }
18492 }
18493 }
18494 else
18495 {
18496 gcc_assert (MEM_P (operands[0]));
18497 gcc_assert (REG_P (operands[1]));
18498 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18499
18500 switch (GET_CODE (XEXP (operands[0], 0)))
18501 {
18502 case REG:
18503 output_asm_insn ("stm%?\t%m0, %M1", operands);
18504 break;
18505
18506 default:
18507 gcc_unreachable ();
18508 }
18509 }
18510
18511 return "";
18512 }
18513
18514 /* Output a VFP load or store instruction. */
18515
18516 const char *
18517 output_move_vfp (rtx *operands)
18518 {
18519 rtx reg, mem, addr, ops[2];
18520 int load = REG_P (operands[0]);
18521 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18522 int sp = (!TARGET_VFP_FP16INST
18523 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18524 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18525 const char *templ;
18526 char buff[50];
18527 machine_mode mode;
18528
18529 reg = operands[!load];
18530 mem = operands[load];
18531
18532 mode = GET_MODE (reg);
18533
18534 gcc_assert (REG_P (reg));
18535 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18536 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18537 || mode == SFmode
18538 || mode == DFmode
18539 || mode == HImode
18540 || mode == SImode
18541 || mode == DImode
18542 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18543 gcc_assert (MEM_P (mem));
18544
18545 addr = XEXP (mem, 0);
18546
18547 switch (GET_CODE (addr))
18548 {
18549 case PRE_DEC:
18550 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18551 ops[0] = XEXP (addr, 0);
18552 ops[1] = reg;
18553 break;
18554
18555 case POST_INC:
18556 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18557 ops[0] = XEXP (addr, 0);
18558 ops[1] = reg;
18559 break;
18560
18561 default:
18562 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18563 ops[0] = reg;
18564 ops[1] = mem;
18565 break;
18566 }
18567
18568 sprintf (buff, templ,
18569 load ? "ld" : "st",
18570 dp ? "64" : sp ? "32" : "16",
18571 dp ? "P" : "",
18572 integer_p ? "\t%@ int" : "");
18573 output_asm_insn (buff, ops);
18574
18575 return "";
18576 }
18577
18578 /* Output a Neon double-word or quad-word load or store, or a load
18579 or store for larger structure modes.
18580
18581 WARNING: The ordering of elements is weird in big-endian mode,
18582 because the EABI requires that vectors stored in memory appear
18583 as though they were stored by a VSTM, as required by the EABI.
18584 GCC RTL defines element ordering based on in-memory order.
18585 This can be different from the architectural ordering of elements
18586 within a NEON register. The intrinsics defined in arm_neon.h use the
18587 NEON register element ordering, not the GCC RTL element ordering.
18588
18589 For example, the in-memory ordering of a big-endian a quadword
18590 vector with 16-bit elements when stored from register pair {d0,d1}
18591 will be (lowest address first, d0[N] is NEON register element N):
18592
18593 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18594
18595 When necessary, quadword registers (dN, dN+1) are moved to ARM
18596 registers from rN in the order:
18597
18598 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18599
18600 So that STM/LDM can be used on vectors in ARM registers, and the
18601 same memory layout will result as if VSTM/VLDM were used.
18602
18603 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18604 possible, which allows use of appropriate alignment tags.
18605 Note that the choice of "64" is independent of the actual vector
18606 element size; this size simply ensures that the behavior is
18607 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18608
18609 Due to limitations of those instructions, use of VST1.64/VLD1.64
18610 is not possible if:
18611 - the address contains PRE_DEC, or
18612 - the mode refers to more than 4 double-word registers
18613
18614 In those cases, it would be possible to replace VSTM/VLDM by a
18615 sequence of instructions; this is not currently implemented since
18616 this is not certain to actually improve performance. */
18617
18618 const char *
18619 output_move_neon (rtx *operands)
18620 {
18621 rtx reg, mem, addr, ops[2];
18622 int regno, nregs, load = REG_P (operands[0]);
18623 const char *templ;
18624 char buff[50];
18625 machine_mode mode;
18626
18627 reg = operands[!load];
18628 mem = operands[load];
18629
18630 mode = GET_MODE (reg);
18631
18632 gcc_assert (REG_P (reg));
18633 regno = REGNO (reg);
18634 nregs = REG_NREGS (reg) / 2;
18635 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18636 || NEON_REGNO_OK_FOR_QUAD (regno));
18637 gcc_assert (VALID_NEON_DREG_MODE (mode)
18638 || VALID_NEON_QREG_MODE (mode)
18639 || VALID_NEON_STRUCT_MODE (mode));
18640 gcc_assert (MEM_P (mem));
18641
18642 addr = XEXP (mem, 0);
18643
18644 /* Strip off const from addresses like (const (plus (...))). */
18645 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18646 addr = XEXP (addr, 0);
18647
18648 switch (GET_CODE (addr))
18649 {
18650 case POST_INC:
18651 /* We have to use vldm / vstm for too-large modes. */
18652 if (nregs > 4)
18653 {
18654 templ = "v%smia%%?\t%%0!, %%h1";
18655 ops[0] = XEXP (addr, 0);
18656 }
18657 else
18658 {
18659 templ = "v%s1.64\t%%h1, %%A0";
18660 ops[0] = mem;
18661 }
18662 ops[1] = reg;
18663 break;
18664
18665 case PRE_DEC:
18666 /* We have to use vldm / vstm in this case, since there is no
18667 pre-decrement form of the vld1 / vst1 instructions. */
18668 templ = "v%smdb%%?\t%%0!, %%h1";
18669 ops[0] = XEXP (addr, 0);
18670 ops[1] = reg;
18671 break;
18672
18673 case POST_MODIFY:
18674 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18675 gcc_unreachable ();
18676
18677 case REG:
18678 /* We have to use vldm / vstm for too-large modes. */
18679 if (nregs > 1)
18680 {
18681 if (nregs > 4)
18682 templ = "v%smia%%?\t%%m0, %%h1";
18683 else
18684 templ = "v%s1.64\t%%h1, %%A0";
18685
18686 ops[0] = mem;
18687 ops[1] = reg;
18688 break;
18689 }
18690 /* Fall through. */
18691 case LABEL_REF:
18692 case PLUS:
18693 {
18694 int i;
18695 int overlap = -1;
18696 for (i = 0; i < nregs; i++)
18697 {
18698 /* We're only using DImode here because it's a convenient size. */
18699 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18700 ops[1] = adjust_address (mem, DImode, 8 * i);
18701 if (reg_overlap_mentioned_p (ops[0], mem))
18702 {
18703 gcc_assert (overlap == -1);
18704 overlap = i;
18705 }
18706 else
18707 {
18708 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18709 output_asm_insn (buff, ops);
18710 }
18711 }
18712 if (overlap != -1)
18713 {
18714 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18715 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18716 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18717 output_asm_insn (buff, ops);
18718 }
18719
18720 return "";
18721 }
18722
18723 default:
18724 gcc_unreachable ();
18725 }
18726
18727 sprintf (buff, templ, load ? "ld" : "st");
18728 output_asm_insn (buff, ops);
18729
18730 return "";
18731 }
18732
18733 /* Compute and return the length of neon_mov<mode>, where <mode> is
18734 one of VSTRUCT modes: EI, OI, CI or XI. */
18735 int
18736 arm_attr_length_move_neon (rtx_insn *insn)
18737 {
18738 rtx reg, mem, addr;
18739 int load;
18740 machine_mode mode;
18741
18742 extract_insn_cached (insn);
18743
18744 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18745 {
18746 mode = GET_MODE (recog_data.operand[0]);
18747 switch (mode)
18748 {
18749 case E_EImode:
18750 case E_OImode:
18751 return 8;
18752 case E_CImode:
18753 return 12;
18754 case E_XImode:
18755 return 16;
18756 default:
18757 gcc_unreachable ();
18758 }
18759 }
18760
18761 load = REG_P (recog_data.operand[0]);
18762 reg = recog_data.operand[!load];
18763 mem = recog_data.operand[load];
18764
18765 gcc_assert (MEM_P (mem));
18766
18767 addr = XEXP (mem, 0);
18768
18769 /* Strip off const from addresses like (const (plus (...))). */
18770 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18771 addr = XEXP (addr, 0);
18772
18773 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18774 {
18775 int insns = REG_NREGS (reg) / 2;
18776 return insns * 4;
18777 }
18778 else
18779 return 4;
18780 }
18781
18782 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18783 return zero. */
18784
18785 int
18786 arm_address_offset_is_imm (rtx_insn *insn)
18787 {
18788 rtx mem, addr;
18789
18790 extract_insn_cached (insn);
18791
18792 if (REG_P (recog_data.operand[0]))
18793 return 0;
18794
18795 mem = recog_data.operand[0];
18796
18797 gcc_assert (MEM_P (mem));
18798
18799 addr = XEXP (mem, 0);
18800
18801 if (REG_P (addr)
18802 || (GET_CODE (addr) == PLUS
18803 && REG_P (XEXP (addr, 0))
18804 && CONST_INT_P (XEXP (addr, 1))))
18805 return 1;
18806 else
18807 return 0;
18808 }
18809
18810 /* Output an ADD r, s, #n where n may be too big for one instruction.
18811 If adding zero to one register, output nothing. */
18812 const char *
18813 output_add_immediate (rtx *operands)
18814 {
18815 HOST_WIDE_INT n = INTVAL (operands[2]);
18816
18817 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18818 {
18819 if (n < 0)
18820 output_multi_immediate (operands,
18821 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18822 -n);
18823 else
18824 output_multi_immediate (operands,
18825 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18826 n);
18827 }
18828
18829 return "";
18830 }
18831
18832 /* Output a multiple immediate operation.
18833 OPERANDS is the vector of operands referred to in the output patterns.
18834 INSTR1 is the output pattern to use for the first constant.
18835 INSTR2 is the output pattern to use for subsequent constants.
18836 IMMED_OP is the index of the constant slot in OPERANDS.
18837 N is the constant value. */
18838 static const char *
18839 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18840 int immed_op, HOST_WIDE_INT n)
18841 {
18842 #if HOST_BITS_PER_WIDE_INT > 32
18843 n &= 0xffffffff;
18844 #endif
18845
18846 if (n == 0)
18847 {
18848 /* Quick and easy output. */
18849 operands[immed_op] = const0_rtx;
18850 output_asm_insn (instr1, operands);
18851 }
18852 else
18853 {
18854 int i;
18855 const char * instr = instr1;
18856
18857 /* Note that n is never zero here (which would give no output). */
18858 for (i = 0; i < 32; i += 2)
18859 {
18860 if (n & (3 << i))
18861 {
18862 operands[immed_op] = GEN_INT (n & (255 << i));
18863 output_asm_insn (instr, operands);
18864 instr = instr2;
18865 i += 6;
18866 }
18867 }
18868 }
18869
18870 return "";
18871 }
18872
18873 /* Return the name of a shifter operation. */
18874 static const char *
18875 arm_shift_nmem(enum rtx_code code)
18876 {
18877 switch (code)
18878 {
18879 case ASHIFT:
18880 return ARM_LSL_NAME;
18881
18882 case ASHIFTRT:
18883 return "asr";
18884
18885 case LSHIFTRT:
18886 return "lsr";
18887
18888 case ROTATERT:
18889 return "ror";
18890
18891 default:
18892 abort();
18893 }
18894 }
18895
18896 /* Return the appropriate ARM instruction for the operation code.
18897 The returned result should not be overwritten. OP is the rtx of the
18898 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18899 was shifted. */
18900 const char *
18901 arithmetic_instr (rtx op, int shift_first_arg)
18902 {
18903 switch (GET_CODE (op))
18904 {
18905 case PLUS:
18906 return "add";
18907
18908 case MINUS:
18909 return shift_first_arg ? "rsb" : "sub";
18910
18911 case IOR:
18912 return "orr";
18913
18914 case XOR:
18915 return "eor";
18916
18917 case AND:
18918 return "and";
18919
18920 case ASHIFT:
18921 case ASHIFTRT:
18922 case LSHIFTRT:
18923 case ROTATERT:
18924 return arm_shift_nmem(GET_CODE(op));
18925
18926 default:
18927 gcc_unreachable ();
18928 }
18929 }
18930
18931 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18932 for the operation code. The returned result should not be overwritten.
18933 OP is the rtx code of the shift.
18934 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18935 shift. */
18936 static const char *
18937 shift_op (rtx op, HOST_WIDE_INT *amountp)
18938 {
18939 const char * mnem;
18940 enum rtx_code code = GET_CODE (op);
18941
18942 switch (code)
18943 {
18944 case ROTATE:
18945 if (!CONST_INT_P (XEXP (op, 1)))
18946 {
18947 output_operand_lossage ("invalid shift operand");
18948 return NULL;
18949 }
18950
18951 code = ROTATERT;
18952 *amountp = 32 - INTVAL (XEXP (op, 1));
18953 mnem = "ror";
18954 break;
18955
18956 case ASHIFT:
18957 case ASHIFTRT:
18958 case LSHIFTRT:
18959 case ROTATERT:
18960 mnem = arm_shift_nmem(code);
18961 if (CONST_INT_P (XEXP (op, 1)))
18962 {
18963 *amountp = INTVAL (XEXP (op, 1));
18964 }
18965 else if (REG_P (XEXP (op, 1)))
18966 {
18967 *amountp = -1;
18968 return mnem;
18969 }
18970 else
18971 {
18972 output_operand_lossage ("invalid shift operand");
18973 return NULL;
18974 }
18975 break;
18976
18977 case MULT:
18978 /* We never have to worry about the amount being other than a
18979 power of 2, since this case can never be reloaded from a reg. */
18980 if (!CONST_INT_P (XEXP (op, 1)))
18981 {
18982 output_operand_lossage ("invalid shift operand");
18983 return NULL;
18984 }
18985
18986 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18987
18988 /* Amount must be a power of two. */
18989 if (*amountp & (*amountp - 1))
18990 {
18991 output_operand_lossage ("invalid shift operand");
18992 return NULL;
18993 }
18994
18995 *amountp = exact_log2 (*amountp);
18996 gcc_assert (IN_RANGE (*amountp, 0, 31));
18997 return ARM_LSL_NAME;
18998
18999 default:
19000 output_operand_lossage ("invalid shift operand");
19001 return NULL;
19002 }
19003
19004 /* This is not 100% correct, but follows from the desire to merge
19005 multiplication by a power of 2 with the recognizer for a
19006 shift. >=32 is not a valid shift for "lsl", so we must try and
19007 output a shift that produces the correct arithmetical result.
19008 Using lsr #32 is identical except for the fact that the carry bit
19009 is not set correctly if we set the flags; but we never use the
19010 carry bit from such an operation, so we can ignore that. */
19011 if (code == ROTATERT)
19012 /* Rotate is just modulo 32. */
19013 *amountp &= 31;
19014 else if (*amountp != (*amountp & 31))
19015 {
19016 if (code == ASHIFT)
19017 mnem = "lsr";
19018 *amountp = 32;
19019 }
19020
19021 /* Shifts of 0 are no-ops. */
19022 if (*amountp == 0)
19023 return NULL;
19024
19025 return mnem;
19026 }
19027
19028 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19029 because /bin/as is horribly restrictive. The judgement about
19030 whether or not each character is 'printable' (and can be output as
19031 is) or not (and must be printed with an octal escape) must be made
19032 with reference to the *host* character set -- the situation is
19033 similar to that discussed in the comments above pp_c_char in
19034 c-pretty-print.c. */
19035
19036 #define MAX_ASCII_LEN 51
19037
19038 void
19039 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19040 {
19041 int i;
19042 int len_so_far = 0;
19043
19044 fputs ("\t.ascii\t\"", stream);
19045
19046 for (i = 0; i < len; i++)
19047 {
19048 int c = p[i];
19049
19050 if (len_so_far >= MAX_ASCII_LEN)
19051 {
19052 fputs ("\"\n\t.ascii\t\"", stream);
19053 len_so_far = 0;
19054 }
19055
19056 if (ISPRINT (c))
19057 {
19058 if (c == '\\' || c == '\"')
19059 {
19060 putc ('\\', stream);
19061 len_so_far++;
19062 }
19063 putc (c, stream);
19064 len_so_far++;
19065 }
19066 else
19067 {
19068 fprintf (stream, "\\%03o", c);
19069 len_so_far += 4;
19070 }
19071 }
19072
19073 fputs ("\"\n", stream);
19074 }
19075 \f
19076 /* Whether a register is callee saved or not. This is necessary because high
19077 registers are marked as caller saved when optimizing for size on Thumb-1
19078 targets despite being callee saved in order to avoid using them. */
19079 #define callee_saved_reg_p(reg) \
19080 (!call_used_regs[reg] \
19081 || (TARGET_THUMB1 && optimize_size \
19082 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19083
19084 /* Compute the register save mask for registers 0 through 12
19085 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19086
19087 static unsigned long
19088 arm_compute_save_reg0_reg12_mask (void)
19089 {
19090 unsigned long func_type = arm_current_func_type ();
19091 unsigned long save_reg_mask = 0;
19092 unsigned int reg;
19093
19094 if (IS_INTERRUPT (func_type))
19095 {
19096 unsigned int max_reg;
19097 /* Interrupt functions must not corrupt any registers,
19098 even call clobbered ones. If this is a leaf function
19099 we can just examine the registers used by the RTL, but
19100 otherwise we have to assume that whatever function is
19101 called might clobber anything, and so we have to save
19102 all the call-clobbered registers as well. */
19103 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19104 /* FIQ handlers have registers r8 - r12 banked, so
19105 we only need to check r0 - r7, Normal ISRs only
19106 bank r14 and r15, so we must check up to r12.
19107 r13 is the stack pointer which is always preserved,
19108 so we do not need to consider it here. */
19109 max_reg = 7;
19110 else
19111 max_reg = 12;
19112
19113 for (reg = 0; reg <= max_reg; reg++)
19114 if (df_regs_ever_live_p (reg)
19115 || (! crtl->is_leaf && call_used_regs[reg]))
19116 save_reg_mask |= (1 << reg);
19117
19118 /* Also save the pic base register if necessary. */
19119 if (flag_pic
19120 && !TARGET_SINGLE_PIC_BASE
19121 && arm_pic_register != INVALID_REGNUM
19122 && crtl->uses_pic_offset_table)
19123 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19124 }
19125 else if (IS_VOLATILE(func_type))
19126 {
19127 /* For noreturn functions we historically omitted register saves
19128 altogether. However this really messes up debugging. As a
19129 compromise save just the frame pointers. Combined with the link
19130 register saved elsewhere this should be sufficient to get
19131 a backtrace. */
19132 if (frame_pointer_needed)
19133 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19134 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19135 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19136 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19137 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19138 }
19139 else
19140 {
19141 /* In the normal case we only need to save those registers
19142 which are call saved and which are used by this function. */
19143 for (reg = 0; reg <= 11; reg++)
19144 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19145 save_reg_mask |= (1 << reg);
19146
19147 /* Handle the frame pointer as a special case. */
19148 if (frame_pointer_needed)
19149 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19150
19151 /* If we aren't loading the PIC register,
19152 don't stack it even though it may be live. */
19153 if (flag_pic
19154 && !TARGET_SINGLE_PIC_BASE
19155 && arm_pic_register != INVALID_REGNUM
19156 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19157 || crtl->uses_pic_offset_table))
19158 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19159
19160 /* The prologue will copy SP into R0, so save it. */
19161 if (IS_STACKALIGN (func_type))
19162 save_reg_mask |= 1;
19163 }
19164
19165 /* Save registers so the exception handler can modify them. */
19166 if (crtl->calls_eh_return)
19167 {
19168 unsigned int i;
19169
19170 for (i = 0; ; i++)
19171 {
19172 reg = EH_RETURN_DATA_REGNO (i);
19173 if (reg == INVALID_REGNUM)
19174 break;
19175 save_reg_mask |= 1 << reg;
19176 }
19177 }
19178
19179 return save_reg_mask;
19180 }
19181
19182 /* Return true if r3 is live at the start of the function. */
19183
19184 static bool
19185 arm_r3_live_at_start_p (void)
19186 {
19187 /* Just look at cfg info, which is still close enough to correct at this
19188 point. This gives false positives for broken functions that might use
19189 uninitialized data that happens to be allocated in r3, but who cares? */
19190 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19191 }
19192
19193 /* Compute the number of bytes used to store the static chain register on the
19194 stack, above the stack frame. We need to know this accurately to get the
19195 alignment of the rest of the stack frame correct. */
19196
19197 static int
19198 arm_compute_static_chain_stack_bytes (void)
19199 {
19200 /* See the defining assertion in arm_expand_prologue. */
19201 if (IS_NESTED (arm_current_func_type ())
19202 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19203 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19204 || flag_stack_clash_protection)
19205 && !df_regs_ever_live_p (LR_REGNUM)))
19206 && arm_r3_live_at_start_p ()
19207 && crtl->args.pretend_args_size == 0)
19208 return 4;
19209
19210 return 0;
19211 }
19212
19213 /* Compute a bit mask of which core registers need to be
19214 saved on the stack for the current function.
19215 This is used by arm_compute_frame_layout, which may add extra registers. */
19216
19217 static unsigned long
19218 arm_compute_save_core_reg_mask (void)
19219 {
19220 unsigned int save_reg_mask = 0;
19221 unsigned long func_type = arm_current_func_type ();
19222 unsigned int reg;
19223
19224 if (IS_NAKED (func_type))
19225 /* This should never really happen. */
19226 return 0;
19227
19228 /* If we are creating a stack frame, then we must save the frame pointer,
19229 IP (which will hold the old stack pointer), LR and the PC. */
19230 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19231 save_reg_mask |=
19232 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19233 | (1 << IP_REGNUM)
19234 | (1 << LR_REGNUM)
19235 | (1 << PC_REGNUM);
19236
19237 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19238
19239 /* Decide if we need to save the link register.
19240 Interrupt routines have their own banked link register,
19241 so they never need to save it.
19242 Otherwise if we do not use the link register we do not need to save
19243 it. If we are pushing other registers onto the stack however, we
19244 can save an instruction in the epilogue by pushing the link register
19245 now and then popping it back into the PC. This incurs extra memory
19246 accesses though, so we only do it when optimizing for size, and only
19247 if we know that we will not need a fancy return sequence. */
19248 if (df_regs_ever_live_p (LR_REGNUM)
19249 || (save_reg_mask
19250 && optimize_size
19251 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19252 && !crtl->tail_call_emit
19253 && !crtl->calls_eh_return))
19254 save_reg_mask |= 1 << LR_REGNUM;
19255
19256 if (cfun->machine->lr_save_eliminated)
19257 save_reg_mask &= ~ (1 << LR_REGNUM);
19258
19259 if (TARGET_REALLY_IWMMXT
19260 && ((bit_count (save_reg_mask)
19261 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19262 arm_compute_static_chain_stack_bytes())
19263 ) % 2) != 0)
19264 {
19265 /* The total number of registers that are going to be pushed
19266 onto the stack is odd. We need to ensure that the stack
19267 is 64-bit aligned before we start to save iWMMXt registers,
19268 and also before we start to create locals. (A local variable
19269 might be a double or long long which we will load/store using
19270 an iWMMXt instruction). Therefore we need to push another
19271 ARM register, so that the stack will be 64-bit aligned. We
19272 try to avoid using the arg registers (r0 -r3) as they might be
19273 used to pass values in a tail call. */
19274 for (reg = 4; reg <= 12; reg++)
19275 if ((save_reg_mask & (1 << reg)) == 0)
19276 break;
19277
19278 if (reg <= 12)
19279 save_reg_mask |= (1 << reg);
19280 else
19281 {
19282 cfun->machine->sibcall_blocked = 1;
19283 save_reg_mask |= (1 << 3);
19284 }
19285 }
19286
19287 /* We may need to push an additional register for use initializing the
19288 PIC base register. */
19289 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19290 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19291 {
19292 reg = thumb_find_work_register (1 << 4);
19293 if (!call_used_regs[reg])
19294 save_reg_mask |= (1 << reg);
19295 }
19296
19297 return save_reg_mask;
19298 }
19299
19300 /* Compute a bit mask of which core registers need to be
19301 saved on the stack for the current function. */
19302 static unsigned long
19303 thumb1_compute_save_core_reg_mask (void)
19304 {
19305 unsigned long mask;
19306 unsigned reg;
19307
19308 mask = 0;
19309 for (reg = 0; reg < 12; reg ++)
19310 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19311 mask |= 1 << reg;
19312
19313 /* Handle the frame pointer as a special case. */
19314 if (frame_pointer_needed)
19315 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19316
19317 if (flag_pic
19318 && !TARGET_SINGLE_PIC_BASE
19319 && arm_pic_register != INVALID_REGNUM
19320 && crtl->uses_pic_offset_table)
19321 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19322
19323 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19324 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19325 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19326
19327 /* LR will also be pushed if any lo regs are pushed. */
19328 if (mask & 0xff || thumb_force_lr_save ())
19329 mask |= (1 << LR_REGNUM);
19330
19331 /* Make sure we have a low work register if we need one.
19332 We will need one if we are going to push a high register,
19333 but we are not currently intending to push a low register. */
19334 if ((mask & 0xff) == 0
19335 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19336 {
19337 /* Use thumb_find_work_register to choose which register
19338 we will use. If the register is live then we will
19339 have to push it. Use LAST_LO_REGNUM as our fallback
19340 choice for the register to select. */
19341 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19342 /* Make sure the register returned by thumb_find_work_register is
19343 not part of the return value. */
19344 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19345 reg = LAST_LO_REGNUM;
19346
19347 if (callee_saved_reg_p (reg))
19348 mask |= 1 << reg;
19349 }
19350
19351 /* The 504 below is 8 bytes less than 512 because there are two possible
19352 alignment words. We can't tell here if they will be present or not so we
19353 have to play it safe and assume that they are. */
19354 if ((CALLER_INTERWORKING_SLOT_SIZE +
19355 ROUND_UP_WORD (get_frame_size ()) +
19356 crtl->outgoing_args_size) >= 504)
19357 {
19358 /* This is the same as the code in thumb1_expand_prologue() which
19359 determines which register to use for stack decrement. */
19360 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19361 if (mask & (1 << reg))
19362 break;
19363
19364 if (reg > LAST_LO_REGNUM)
19365 {
19366 /* Make sure we have a register available for stack decrement. */
19367 mask |= 1 << LAST_LO_REGNUM;
19368 }
19369 }
19370
19371 return mask;
19372 }
19373
19374
19375 /* Return the number of bytes required to save VFP registers. */
19376 static int
19377 arm_get_vfp_saved_size (void)
19378 {
19379 unsigned int regno;
19380 int count;
19381 int saved;
19382
19383 saved = 0;
19384 /* Space for saved VFP registers. */
19385 if (TARGET_HARD_FLOAT)
19386 {
19387 count = 0;
19388 for (regno = FIRST_VFP_REGNUM;
19389 regno < LAST_VFP_REGNUM;
19390 regno += 2)
19391 {
19392 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19393 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19394 {
19395 if (count > 0)
19396 {
19397 /* Workaround ARM10 VFPr1 bug. */
19398 if (count == 2 && !arm_arch6)
19399 count++;
19400 saved += count * 8;
19401 }
19402 count = 0;
19403 }
19404 else
19405 count++;
19406 }
19407 if (count > 0)
19408 {
19409 if (count == 2 && !arm_arch6)
19410 count++;
19411 saved += count * 8;
19412 }
19413 }
19414 return saved;
19415 }
19416
19417
19418 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19419 everything bar the final return instruction. If simple_return is true,
19420 then do not output epilogue, because it has already been emitted in RTL. */
19421 const char *
19422 output_return_instruction (rtx operand, bool really_return, bool reverse,
19423 bool simple_return)
19424 {
19425 char conditional[10];
19426 char instr[100];
19427 unsigned reg;
19428 unsigned long live_regs_mask;
19429 unsigned long func_type;
19430 arm_stack_offsets *offsets;
19431
19432 func_type = arm_current_func_type ();
19433
19434 if (IS_NAKED (func_type))
19435 return "";
19436
19437 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19438 {
19439 /* If this function was declared non-returning, and we have
19440 found a tail call, then we have to trust that the called
19441 function won't return. */
19442 if (really_return)
19443 {
19444 rtx ops[2];
19445
19446 /* Otherwise, trap an attempted return by aborting. */
19447 ops[0] = operand;
19448 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19449 : "abort");
19450 assemble_external_libcall (ops[1]);
19451 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19452 }
19453
19454 return "";
19455 }
19456
19457 gcc_assert (!cfun->calls_alloca || really_return);
19458
19459 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19460
19461 cfun->machine->return_used_this_function = 1;
19462
19463 offsets = arm_get_frame_offsets ();
19464 live_regs_mask = offsets->saved_regs_mask;
19465
19466 if (!simple_return && live_regs_mask)
19467 {
19468 const char * return_reg;
19469
19470 /* If we do not have any special requirements for function exit
19471 (e.g. interworking) then we can load the return address
19472 directly into the PC. Otherwise we must load it into LR. */
19473 if (really_return
19474 && !IS_CMSE_ENTRY (func_type)
19475 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19476 return_reg = reg_names[PC_REGNUM];
19477 else
19478 return_reg = reg_names[LR_REGNUM];
19479
19480 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19481 {
19482 /* There are three possible reasons for the IP register
19483 being saved. 1) a stack frame was created, in which case
19484 IP contains the old stack pointer, or 2) an ISR routine
19485 corrupted it, or 3) it was saved to align the stack on
19486 iWMMXt. In case 1, restore IP into SP, otherwise just
19487 restore IP. */
19488 if (frame_pointer_needed)
19489 {
19490 live_regs_mask &= ~ (1 << IP_REGNUM);
19491 live_regs_mask |= (1 << SP_REGNUM);
19492 }
19493 else
19494 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19495 }
19496
19497 /* On some ARM architectures it is faster to use LDR rather than
19498 LDM to load a single register. On other architectures, the
19499 cost is the same. In 26 bit mode, or for exception handlers,
19500 we have to use LDM to load the PC so that the CPSR is also
19501 restored. */
19502 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19503 if (live_regs_mask == (1U << reg))
19504 break;
19505
19506 if (reg <= LAST_ARM_REGNUM
19507 && (reg != LR_REGNUM
19508 || ! really_return
19509 || ! IS_INTERRUPT (func_type)))
19510 {
19511 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19512 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19513 }
19514 else
19515 {
19516 char *p;
19517 int first = 1;
19518
19519 /* Generate the load multiple instruction to restore the
19520 registers. Note we can get here, even if
19521 frame_pointer_needed is true, but only if sp already
19522 points to the base of the saved core registers. */
19523 if (live_regs_mask & (1 << SP_REGNUM))
19524 {
19525 unsigned HOST_WIDE_INT stack_adjust;
19526
19527 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19528 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19529
19530 if (stack_adjust && arm_arch5 && TARGET_ARM)
19531 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19532 else
19533 {
19534 /* If we can't use ldmib (SA110 bug),
19535 then try to pop r3 instead. */
19536 if (stack_adjust)
19537 live_regs_mask |= 1 << 3;
19538
19539 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19540 }
19541 }
19542 /* For interrupt returns we have to use an LDM rather than
19543 a POP so that we can use the exception return variant. */
19544 else if (IS_INTERRUPT (func_type))
19545 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19546 else
19547 sprintf (instr, "pop%s\t{", conditional);
19548
19549 p = instr + strlen (instr);
19550
19551 for (reg = 0; reg <= SP_REGNUM; reg++)
19552 if (live_regs_mask & (1 << reg))
19553 {
19554 int l = strlen (reg_names[reg]);
19555
19556 if (first)
19557 first = 0;
19558 else
19559 {
19560 memcpy (p, ", ", 2);
19561 p += 2;
19562 }
19563
19564 memcpy (p, "%|", 2);
19565 memcpy (p + 2, reg_names[reg], l);
19566 p += l + 2;
19567 }
19568
19569 if (live_regs_mask & (1 << LR_REGNUM))
19570 {
19571 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19572 /* If returning from an interrupt, restore the CPSR. */
19573 if (IS_INTERRUPT (func_type))
19574 strcat (p, "^");
19575 }
19576 else
19577 strcpy (p, "}");
19578 }
19579
19580 output_asm_insn (instr, & operand);
19581
19582 /* See if we need to generate an extra instruction to
19583 perform the actual function return. */
19584 if (really_return
19585 && func_type != ARM_FT_INTERWORKED
19586 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19587 {
19588 /* The return has already been handled
19589 by loading the LR into the PC. */
19590 return "";
19591 }
19592 }
19593
19594 if (really_return)
19595 {
19596 switch ((int) ARM_FUNC_TYPE (func_type))
19597 {
19598 case ARM_FT_ISR:
19599 case ARM_FT_FIQ:
19600 /* ??? This is wrong for unified assembly syntax. */
19601 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19602 break;
19603
19604 case ARM_FT_INTERWORKED:
19605 gcc_assert (arm_arch5 || arm_arch4t);
19606 sprintf (instr, "bx%s\t%%|lr", conditional);
19607 break;
19608
19609 case ARM_FT_EXCEPTION:
19610 /* ??? This is wrong for unified assembly syntax. */
19611 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19612 break;
19613
19614 default:
19615 if (IS_CMSE_ENTRY (func_type))
19616 {
19617 /* Check if we have to clear the 'GE bits' which is only used if
19618 parallel add and subtraction instructions are available. */
19619 if (TARGET_INT_SIMD)
19620 snprintf (instr, sizeof (instr),
19621 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19622 else
19623 snprintf (instr, sizeof (instr),
19624 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19625
19626 output_asm_insn (instr, & operand);
19627 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19628 {
19629 /* Clear the cumulative exception-status bits (0-4,7) and the
19630 condition code bits (28-31) of the FPSCR. We need to
19631 remember to clear the first scratch register used (IP) and
19632 save and restore the second (r4). */
19633 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19634 output_asm_insn (instr, & operand);
19635 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19636 output_asm_insn (instr, & operand);
19637 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19638 output_asm_insn (instr, & operand);
19639 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19640 output_asm_insn (instr, & operand);
19641 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19642 output_asm_insn (instr, & operand);
19643 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19644 output_asm_insn (instr, & operand);
19645 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19646 output_asm_insn (instr, & operand);
19647 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19648 output_asm_insn (instr, & operand);
19649 }
19650 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19651 }
19652 /* Use bx if it's available. */
19653 else if (arm_arch5 || arm_arch4t)
19654 sprintf (instr, "bx%s\t%%|lr", conditional);
19655 else
19656 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19657 break;
19658 }
19659
19660 output_asm_insn (instr, & operand);
19661 }
19662
19663 return "";
19664 }
19665
19666 /* Output in FILE asm statements needed to declare the NAME of the function
19667 defined by its DECL node. */
19668
19669 void
19670 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19671 {
19672 size_t cmse_name_len;
19673 char *cmse_name = 0;
19674 char cmse_prefix[] = "__acle_se_";
19675
19676 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19677 extra function label for each function with the 'cmse_nonsecure_entry'
19678 attribute. This extra function label should be prepended with
19679 '__acle_se_', telling the linker that it needs to create secure gateway
19680 veneers for this function. */
19681 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19682 DECL_ATTRIBUTES (decl)))
19683 {
19684 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19685 cmse_name = XALLOCAVEC (char, cmse_name_len);
19686 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19687 targetm.asm_out.globalize_label (file, cmse_name);
19688
19689 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19690 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19691 }
19692
19693 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19694 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19695 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19696 ASM_OUTPUT_LABEL (file, name);
19697
19698 if (cmse_name)
19699 ASM_OUTPUT_LABEL (file, cmse_name);
19700
19701 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19702 }
19703
19704 /* Write the function name into the code section, directly preceding
19705 the function prologue.
19706
19707 Code will be output similar to this:
19708 t0
19709 .ascii "arm_poke_function_name", 0
19710 .align
19711 t1
19712 .word 0xff000000 + (t1 - t0)
19713 arm_poke_function_name
19714 mov ip, sp
19715 stmfd sp!, {fp, ip, lr, pc}
19716 sub fp, ip, #4
19717
19718 When performing a stack backtrace, code can inspect the value
19719 of 'pc' stored at 'fp' + 0. If the trace function then looks
19720 at location pc - 12 and the top 8 bits are set, then we know
19721 that there is a function name embedded immediately preceding this
19722 location and has length ((pc[-3]) & 0xff000000).
19723
19724 We assume that pc is declared as a pointer to an unsigned long.
19725
19726 It is of no benefit to output the function name if we are assembling
19727 a leaf function. These function types will not contain a stack
19728 backtrace structure, therefore it is not possible to determine the
19729 function name. */
19730 void
19731 arm_poke_function_name (FILE *stream, const char *name)
19732 {
19733 unsigned long alignlength;
19734 unsigned long length;
19735 rtx x;
19736
19737 length = strlen (name) + 1;
19738 alignlength = ROUND_UP_WORD (length);
19739
19740 ASM_OUTPUT_ASCII (stream, name, length);
19741 ASM_OUTPUT_ALIGN (stream, 2);
19742 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19743 assemble_aligned_integer (UNITS_PER_WORD, x);
19744 }
19745
19746 /* Place some comments into the assembler stream
19747 describing the current function. */
19748 static void
19749 arm_output_function_prologue (FILE *f)
19750 {
19751 unsigned long func_type;
19752
19753 /* Sanity check. */
19754 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19755
19756 func_type = arm_current_func_type ();
19757
19758 switch ((int) ARM_FUNC_TYPE (func_type))
19759 {
19760 default:
19761 case ARM_FT_NORMAL:
19762 break;
19763 case ARM_FT_INTERWORKED:
19764 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19765 break;
19766 case ARM_FT_ISR:
19767 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19768 break;
19769 case ARM_FT_FIQ:
19770 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19771 break;
19772 case ARM_FT_EXCEPTION:
19773 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19774 break;
19775 }
19776
19777 if (IS_NAKED (func_type))
19778 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19779
19780 if (IS_VOLATILE (func_type))
19781 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19782
19783 if (IS_NESTED (func_type))
19784 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19785 if (IS_STACKALIGN (func_type))
19786 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19787 if (IS_CMSE_ENTRY (func_type))
19788 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19789
19790 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19791 crtl->args.size,
19792 crtl->args.pretend_args_size,
19793 (HOST_WIDE_INT) get_frame_size ());
19794
19795 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19796 frame_pointer_needed,
19797 cfun->machine->uses_anonymous_args);
19798
19799 if (cfun->machine->lr_save_eliminated)
19800 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19801
19802 if (crtl->calls_eh_return)
19803 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19804
19805 }
19806
19807 static void
19808 arm_output_function_epilogue (FILE *)
19809 {
19810 arm_stack_offsets *offsets;
19811
19812 if (TARGET_THUMB1)
19813 {
19814 int regno;
19815
19816 /* Emit any call-via-reg trampolines that are needed for v4t support
19817 of call_reg and call_value_reg type insns. */
19818 for (regno = 0; regno < LR_REGNUM; regno++)
19819 {
19820 rtx label = cfun->machine->call_via[regno];
19821
19822 if (label != NULL)
19823 {
19824 switch_to_section (function_section (current_function_decl));
19825 targetm.asm_out.internal_label (asm_out_file, "L",
19826 CODE_LABEL_NUMBER (label));
19827 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19828 }
19829 }
19830
19831 /* ??? Probably not safe to set this here, since it assumes that a
19832 function will be emitted as assembly immediately after we generate
19833 RTL for it. This does not happen for inline functions. */
19834 cfun->machine->return_used_this_function = 0;
19835 }
19836 else /* TARGET_32BIT */
19837 {
19838 /* We need to take into account any stack-frame rounding. */
19839 offsets = arm_get_frame_offsets ();
19840
19841 gcc_assert (!use_return_insn (FALSE, NULL)
19842 || (cfun->machine->return_used_this_function != 0)
19843 || offsets->saved_regs == offsets->outgoing_args
19844 || frame_pointer_needed);
19845 }
19846 }
19847
19848 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19849 STR and STRD. If an even number of registers are being pushed, one
19850 or more STRD patterns are created for each register pair. If an
19851 odd number of registers are pushed, emit an initial STR followed by
19852 as many STRD instructions as are needed. This works best when the
19853 stack is initially 64-bit aligned (the normal case), since it
19854 ensures that each STRD is also 64-bit aligned. */
19855 static void
19856 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19857 {
19858 int num_regs = 0;
19859 int i;
19860 int regno;
19861 rtx par = NULL_RTX;
19862 rtx dwarf = NULL_RTX;
19863 rtx tmp;
19864 bool first = true;
19865
19866 num_regs = bit_count (saved_regs_mask);
19867
19868 /* Must be at least one register to save, and can't save SP or PC. */
19869 gcc_assert (num_regs > 0 && num_regs <= 14);
19870 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19871 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19872
19873 /* Create sequence for DWARF info. All the frame-related data for
19874 debugging is held in this wrapper. */
19875 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19876
19877 /* Describe the stack adjustment. */
19878 tmp = gen_rtx_SET (stack_pointer_rtx,
19879 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19880 RTX_FRAME_RELATED_P (tmp) = 1;
19881 XVECEXP (dwarf, 0, 0) = tmp;
19882
19883 /* Find the first register. */
19884 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19885 ;
19886
19887 i = 0;
19888
19889 /* If there's an odd number of registers to push. Start off by
19890 pushing a single register. This ensures that subsequent strd
19891 operations are dword aligned (assuming that SP was originally
19892 64-bit aligned). */
19893 if ((num_regs & 1) != 0)
19894 {
19895 rtx reg, mem, insn;
19896
19897 reg = gen_rtx_REG (SImode, regno);
19898 if (num_regs == 1)
19899 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19900 stack_pointer_rtx));
19901 else
19902 mem = gen_frame_mem (Pmode,
19903 gen_rtx_PRE_MODIFY
19904 (Pmode, stack_pointer_rtx,
19905 plus_constant (Pmode, stack_pointer_rtx,
19906 -4 * num_regs)));
19907
19908 tmp = gen_rtx_SET (mem, reg);
19909 RTX_FRAME_RELATED_P (tmp) = 1;
19910 insn = emit_insn (tmp);
19911 RTX_FRAME_RELATED_P (insn) = 1;
19912 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19913 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19914 RTX_FRAME_RELATED_P (tmp) = 1;
19915 i++;
19916 regno++;
19917 XVECEXP (dwarf, 0, i) = tmp;
19918 first = false;
19919 }
19920
19921 while (i < num_regs)
19922 if (saved_regs_mask & (1 << regno))
19923 {
19924 rtx reg1, reg2, mem1, mem2;
19925 rtx tmp0, tmp1, tmp2;
19926 int regno2;
19927
19928 /* Find the register to pair with this one. */
19929 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19930 regno2++)
19931 ;
19932
19933 reg1 = gen_rtx_REG (SImode, regno);
19934 reg2 = gen_rtx_REG (SImode, regno2);
19935
19936 if (first)
19937 {
19938 rtx insn;
19939
19940 first = false;
19941 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19942 stack_pointer_rtx,
19943 -4 * num_regs));
19944 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19945 stack_pointer_rtx,
19946 -4 * (num_regs - 1)));
19947 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19948 plus_constant (Pmode, stack_pointer_rtx,
19949 -4 * (num_regs)));
19950 tmp1 = gen_rtx_SET (mem1, reg1);
19951 tmp2 = gen_rtx_SET (mem2, reg2);
19952 RTX_FRAME_RELATED_P (tmp0) = 1;
19953 RTX_FRAME_RELATED_P (tmp1) = 1;
19954 RTX_FRAME_RELATED_P (tmp2) = 1;
19955 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19956 XVECEXP (par, 0, 0) = tmp0;
19957 XVECEXP (par, 0, 1) = tmp1;
19958 XVECEXP (par, 0, 2) = tmp2;
19959 insn = emit_insn (par);
19960 RTX_FRAME_RELATED_P (insn) = 1;
19961 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19962 }
19963 else
19964 {
19965 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19966 stack_pointer_rtx,
19967 4 * i));
19968 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19969 stack_pointer_rtx,
19970 4 * (i + 1)));
19971 tmp1 = gen_rtx_SET (mem1, reg1);
19972 tmp2 = gen_rtx_SET (mem2, reg2);
19973 RTX_FRAME_RELATED_P (tmp1) = 1;
19974 RTX_FRAME_RELATED_P (tmp2) = 1;
19975 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19976 XVECEXP (par, 0, 0) = tmp1;
19977 XVECEXP (par, 0, 1) = tmp2;
19978 emit_insn (par);
19979 }
19980
19981 /* Create unwind information. This is an approximation. */
19982 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19983 plus_constant (Pmode,
19984 stack_pointer_rtx,
19985 4 * i)),
19986 reg1);
19987 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19988 plus_constant (Pmode,
19989 stack_pointer_rtx,
19990 4 * (i + 1))),
19991 reg2);
19992
19993 RTX_FRAME_RELATED_P (tmp1) = 1;
19994 RTX_FRAME_RELATED_P (tmp2) = 1;
19995 XVECEXP (dwarf, 0, i + 1) = tmp1;
19996 XVECEXP (dwarf, 0, i + 2) = tmp2;
19997 i += 2;
19998 regno = regno2 + 1;
19999 }
20000 else
20001 regno++;
20002
20003 return;
20004 }
20005
20006 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20007 whenever possible, otherwise it emits single-word stores. The first store
20008 also allocates stack space for all saved registers, using writeback with
20009 post-addressing mode. All other stores use offset addressing. If no STRD
20010 can be emitted, this function emits a sequence of single-word stores,
20011 and not an STM as before, because single-word stores provide more freedom
20012 scheduling and can be turned into an STM by peephole optimizations. */
20013 static void
20014 arm_emit_strd_push (unsigned long saved_regs_mask)
20015 {
20016 int num_regs = 0;
20017 int i, j, dwarf_index = 0;
20018 int offset = 0;
20019 rtx dwarf = NULL_RTX;
20020 rtx insn = NULL_RTX;
20021 rtx tmp, mem;
20022
20023 /* TODO: A more efficient code can be emitted by changing the
20024 layout, e.g., first push all pairs that can use STRD to keep the
20025 stack aligned, and then push all other registers. */
20026 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20027 if (saved_regs_mask & (1 << i))
20028 num_regs++;
20029
20030 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20031 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20032 gcc_assert (num_regs > 0);
20033
20034 /* Create sequence for DWARF info. */
20035 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20036
20037 /* For dwarf info, we generate explicit stack update. */
20038 tmp = gen_rtx_SET (stack_pointer_rtx,
20039 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20040 RTX_FRAME_RELATED_P (tmp) = 1;
20041 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20042
20043 /* Save registers. */
20044 offset = - 4 * num_regs;
20045 j = 0;
20046 while (j <= LAST_ARM_REGNUM)
20047 if (saved_regs_mask & (1 << j))
20048 {
20049 if ((j % 2 == 0)
20050 && (saved_regs_mask & (1 << (j + 1))))
20051 {
20052 /* Current register and previous register form register pair for
20053 which STRD can be generated. */
20054 if (offset < 0)
20055 {
20056 /* Allocate stack space for all saved registers. */
20057 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20058 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20059 mem = gen_frame_mem (DImode, tmp);
20060 offset = 0;
20061 }
20062 else if (offset > 0)
20063 mem = gen_frame_mem (DImode,
20064 plus_constant (Pmode,
20065 stack_pointer_rtx,
20066 offset));
20067 else
20068 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20069
20070 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20071 RTX_FRAME_RELATED_P (tmp) = 1;
20072 tmp = emit_insn (tmp);
20073
20074 /* Record the first store insn. */
20075 if (dwarf_index == 1)
20076 insn = tmp;
20077
20078 /* Generate dwarf info. */
20079 mem = gen_frame_mem (SImode,
20080 plus_constant (Pmode,
20081 stack_pointer_rtx,
20082 offset));
20083 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20084 RTX_FRAME_RELATED_P (tmp) = 1;
20085 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20086
20087 mem = gen_frame_mem (SImode,
20088 plus_constant (Pmode,
20089 stack_pointer_rtx,
20090 offset + 4));
20091 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20092 RTX_FRAME_RELATED_P (tmp) = 1;
20093 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20094
20095 offset += 8;
20096 j += 2;
20097 }
20098 else
20099 {
20100 /* Emit a single word store. */
20101 if (offset < 0)
20102 {
20103 /* Allocate stack space for all saved registers. */
20104 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20105 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20106 mem = gen_frame_mem (SImode, tmp);
20107 offset = 0;
20108 }
20109 else if (offset > 0)
20110 mem = gen_frame_mem (SImode,
20111 plus_constant (Pmode,
20112 stack_pointer_rtx,
20113 offset));
20114 else
20115 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20116
20117 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20118 RTX_FRAME_RELATED_P (tmp) = 1;
20119 tmp = emit_insn (tmp);
20120
20121 /* Record the first store insn. */
20122 if (dwarf_index == 1)
20123 insn = tmp;
20124
20125 /* Generate dwarf info. */
20126 mem = gen_frame_mem (SImode,
20127 plus_constant(Pmode,
20128 stack_pointer_rtx,
20129 offset));
20130 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20131 RTX_FRAME_RELATED_P (tmp) = 1;
20132 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20133
20134 offset += 4;
20135 j += 1;
20136 }
20137 }
20138 else
20139 j++;
20140
20141 /* Attach dwarf info to the first insn we generate. */
20142 gcc_assert (insn != NULL_RTX);
20143 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20144 RTX_FRAME_RELATED_P (insn) = 1;
20145 }
20146
20147 /* Generate and emit an insn that we will recognize as a push_multi.
20148 Unfortunately, since this insn does not reflect very well the actual
20149 semantics of the operation, we need to annotate the insn for the benefit
20150 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20151 MASK for registers that should be annotated for DWARF2 frame unwind
20152 information. */
20153 static rtx
20154 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20155 {
20156 int num_regs = 0;
20157 int num_dwarf_regs = 0;
20158 int i, j;
20159 rtx par;
20160 rtx dwarf;
20161 int dwarf_par_index;
20162 rtx tmp, reg;
20163
20164 /* We don't record the PC in the dwarf frame information. */
20165 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20166
20167 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20168 {
20169 if (mask & (1 << i))
20170 num_regs++;
20171 if (dwarf_regs_mask & (1 << i))
20172 num_dwarf_regs++;
20173 }
20174
20175 gcc_assert (num_regs && num_regs <= 16);
20176 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20177
20178 /* For the body of the insn we are going to generate an UNSPEC in
20179 parallel with several USEs. This allows the insn to be recognized
20180 by the push_multi pattern in the arm.md file.
20181
20182 The body of the insn looks something like this:
20183
20184 (parallel [
20185 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20186 (const_int:SI <num>)))
20187 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20188 (use (reg:SI XX))
20189 (use (reg:SI YY))
20190 ...
20191 ])
20192
20193 For the frame note however, we try to be more explicit and actually
20194 show each register being stored into the stack frame, plus a (single)
20195 decrement of the stack pointer. We do it this way in order to be
20196 friendly to the stack unwinding code, which only wants to see a single
20197 stack decrement per instruction. The RTL we generate for the note looks
20198 something like this:
20199
20200 (sequence [
20201 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20202 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20203 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20204 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20205 ...
20206 ])
20207
20208 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20209 instead we'd have a parallel expression detailing all
20210 the stores to the various memory addresses so that debug
20211 information is more up-to-date. Remember however while writing
20212 this to take care of the constraints with the push instruction.
20213
20214 Note also that this has to be taken care of for the VFP registers.
20215
20216 For more see PR43399. */
20217
20218 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20219 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20220 dwarf_par_index = 1;
20221
20222 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20223 {
20224 if (mask & (1 << i))
20225 {
20226 reg = gen_rtx_REG (SImode, i);
20227
20228 XVECEXP (par, 0, 0)
20229 = gen_rtx_SET (gen_frame_mem
20230 (BLKmode,
20231 gen_rtx_PRE_MODIFY (Pmode,
20232 stack_pointer_rtx,
20233 plus_constant
20234 (Pmode, stack_pointer_rtx,
20235 -4 * num_regs))
20236 ),
20237 gen_rtx_UNSPEC (BLKmode,
20238 gen_rtvec (1, reg),
20239 UNSPEC_PUSH_MULT));
20240
20241 if (dwarf_regs_mask & (1 << i))
20242 {
20243 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20244 reg);
20245 RTX_FRAME_RELATED_P (tmp) = 1;
20246 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20247 }
20248
20249 break;
20250 }
20251 }
20252
20253 for (j = 1, i++; j < num_regs; i++)
20254 {
20255 if (mask & (1 << i))
20256 {
20257 reg = gen_rtx_REG (SImode, i);
20258
20259 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20260
20261 if (dwarf_regs_mask & (1 << i))
20262 {
20263 tmp
20264 = gen_rtx_SET (gen_frame_mem
20265 (SImode,
20266 plus_constant (Pmode, stack_pointer_rtx,
20267 4 * j)),
20268 reg);
20269 RTX_FRAME_RELATED_P (tmp) = 1;
20270 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20271 }
20272
20273 j++;
20274 }
20275 }
20276
20277 par = emit_insn (par);
20278
20279 tmp = gen_rtx_SET (stack_pointer_rtx,
20280 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20281 RTX_FRAME_RELATED_P (tmp) = 1;
20282 XVECEXP (dwarf, 0, 0) = tmp;
20283
20284 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20285
20286 return par;
20287 }
20288
20289 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20290 SIZE is the offset to be adjusted.
20291 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20292 static void
20293 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20294 {
20295 rtx dwarf;
20296
20297 RTX_FRAME_RELATED_P (insn) = 1;
20298 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20299 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20300 }
20301
20302 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20303 SAVED_REGS_MASK shows which registers need to be restored.
20304
20305 Unfortunately, since this insn does not reflect very well the actual
20306 semantics of the operation, we need to annotate the insn for the benefit
20307 of DWARF2 frame unwind information. */
20308 static void
20309 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20310 {
20311 int num_regs = 0;
20312 int i, j;
20313 rtx par;
20314 rtx dwarf = NULL_RTX;
20315 rtx tmp, reg;
20316 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20317 int offset_adj;
20318 int emit_update;
20319
20320 offset_adj = return_in_pc ? 1 : 0;
20321 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20322 if (saved_regs_mask & (1 << i))
20323 num_regs++;
20324
20325 gcc_assert (num_regs && num_regs <= 16);
20326
20327 /* If SP is in reglist, then we don't emit SP update insn. */
20328 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20329
20330 /* The parallel needs to hold num_regs SETs
20331 and one SET for the stack update. */
20332 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20333
20334 if (return_in_pc)
20335 XVECEXP (par, 0, 0) = ret_rtx;
20336
20337 if (emit_update)
20338 {
20339 /* Increment the stack pointer, based on there being
20340 num_regs 4-byte registers to restore. */
20341 tmp = gen_rtx_SET (stack_pointer_rtx,
20342 plus_constant (Pmode,
20343 stack_pointer_rtx,
20344 4 * num_regs));
20345 RTX_FRAME_RELATED_P (tmp) = 1;
20346 XVECEXP (par, 0, offset_adj) = tmp;
20347 }
20348
20349 /* Now restore every reg, which may include PC. */
20350 for (j = 0, i = 0; j < num_regs; i++)
20351 if (saved_regs_mask & (1 << i))
20352 {
20353 reg = gen_rtx_REG (SImode, i);
20354 if ((num_regs == 1) && emit_update && !return_in_pc)
20355 {
20356 /* Emit single load with writeback. */
20357 tmp = gen_frame_mem (SImode,
20358 gen_rtx_POST_INC (Pmode,
20359 stack_pointer_rtx));
20360 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20361 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20362 return;
20363 }
20364
20365 tmp = gen_rtx_SET (reg,
20366 gen_frame_mem
20367 (SImode,
20368 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20369 RTX_FRAME_RELATED_P (tmp) = 1;
20370 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20371
20372 /* We need to maintain a sequence for DWARF info too. As dwarf info
20373 should not have PC, skip PC. */
20374 if (i != PC_REGNUM)
20375 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20376
20377 j++;
20378 }
20379
20380 if (return_in_pc)
20381 par = emit_jump_insn (par);
20382 else
20383 par = emit_insn (par);
20384
20385 REG_NOTES (par) = dwarf;
20386 if (!return_in_pc)
20387 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20388 stack_pointer_rtx, stack_pointer_rtx);
20389 }
20390
20391 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20392 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20393
20394 Unfortunately, since this insn does not reflect very well the actual
20395 semantics of the operation, we need to annotate the insn for the benefit
20396 of DWARF2 frame unwind information. */
20397 static void
20398 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20399 {
20400 int i, j;
20401 rtx par;
20402 rtx dwarf = NULL_RTX;
20403 rtx tmp, reg;
20404
20405 gcc_assert (num_regs && num_regs <= 32);
20406
20407 /* Workaround ARM10 VFPr1 bug. */
20408 if (num_regs == 2 && !arm_arch6)
20409 {
20410 if (first_reg == 15)
20411 first_reg--;
20412
20413 num_regs++;
20414 }
20415
20416 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20417 there could be up to 32 D-registers to restore.
20418 If there are more than 16 D-registers, make two recursive calls,
20419 each of which emits one pop_multi instruction. */
20420 if (num_regs > 16)
20421 {
20422 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20423 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20424 return;
20425 }
20426
20427 /* The parallel needs to hold num_regs SETs
20428 and one SET for the stack update. */
20429 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20430
20431 /* Increment the stack pointer, based on there being
20432 num_regs 8-byte registers to restore. */
20433 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20434 RTX_FRAME_RELATED_P (tmp) = 1;
20435 XVECEXP (par, 0, 0) = tmp;
20436
20437 /* Now show every reg that will be restored, using a SET for each. */
20438 for (j = 0, i=first_reg; j < num_regs; i += 2)
20439 {
20440 reg = gen_rtx_REG (DFmode, i);
20441
20442 tmp = gen_rtx_SET (reg,
20443 gen_frame_mem
20444 (DFmode,
20445 plus_constant (Pmode, base_reg, 8 * j)));
20446 RTX_FRAME_RELATED_P (tmp) = 1;
20447 XVECEXP (par, 0, j + 1) = tmp;
20448
20449 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20450
20451 j++;
20452 }
20453
20454 par = emit_insn (par);
20455 REG_NOTES (par) = dwarf;
20456
20457 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20458 if (REGNO (base_reg) == IP_REGNUM)
20459 {
20460 RTX_FRAME_RELATED_P (par) = 1;
20461 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20462 }
20463 else
20464 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20465 base_reg, base_reg);
20466 }
20467
20468 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20469 number of registers are being popped, multiple LDRD patterns are created for
20470 all register pairs. If odd number of registers are popped, last register is
20471 loaded by using LDR pattern. */
20472 static void
20473 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20474 {
20475 int num_regs = 0;
20476 int i, j;
20477 rtx par = NULL_RTX;
20478 rtx dwarf = NULL_RTX;
20479 rtx tmp, reg, tmp1;
20480 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20481
20482 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20483 if (saved_regs_mask & (1 << i))
20484 num_regs++;
20485
20486 gcc_assert (num_regs && num_regs <= 16);
20487
20488 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20489 to be popped. So, if num_regs is even, now it will become odd,
20490 and we can generate pop with PC. If num_regs is odd, it will be
20491 even now, and ldr with return can be generated for PC. */
20492 if (return_in_pc)
20493 num_regs--;
20494
20495 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20496
20497 /* Var j iterates over all the registers to gather all the registers in
20498 saved_regs_mask. Var i gives index of saved registers in stack frame.
20499 A PARALLEL RTX of register-pair is created here, so that pattern for
20500 LDRD can be matched. As PC is always last register to be popped, and
20501 we have already decremented num_regs if PC, we don't have to worry
20502 about PC in this loop. */
20503 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20504 if (saved_regs_mask & (1 << j))
20505 {
20506 /* Create RTX for memory load. */
20507 reg = gen_rtx_REG (SImode, j);
20508 tmp = gen_rtx_SET (reg,
20509 gen_frame_mem (SImode,
20510 plus_constant (Pmode,
20511 stack_pointer_rtx, 4 * i)));
20512 RTX_FRAME_RELATED_P (tmp) = 1;
20513
20514 if (i % 2 == 0)
20515 {
20516 /* When saved-register index (i) is even, the RTX to be emitted is
20517 yet to be created. Hence create it first. The LDRD pattern we
20518 are generating is :
20519 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20520 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20521 where target registers need not be consecutive. */
20522 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20523 dwarf = NULL_RTX;
20524 }
20525
20526 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20527 added as 0th element and if i is odd, reg_i is added as 1st element
20528 of LDRD pattern shown above. */
20529 XVECEXP (par, 0, (i % 2)) = tmp;
20530 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20531
20532 if ((i % 2) == 1)
20533 {
20534 /* When saved-register index (i) is odd, RTXs for both the registers
20535 to be loaded are generated in above given LDRD pattern, and the
20536 pattern can be emitted now. */
20537 par = emit_insn (par);
20538 REG_NOTES (par) = dwarf;
20539 RTX_FRAME_RELATED_P (par) = 1;
20540 }
20541
20542 i++;
20543 }
20544
20545 /* If the number of registers pushed is odd AND return_in_pc is false OR
20546 number of registers are even AND return_in_pc is true, last register is
20547 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20548 then LDR with post increment. */
20549
20550 /* Increment the stack pointer, based on there being
20551 num_regs 4-byte registers to restore. */
20552 tmp = gen_rtx_SET (stack_pointer_rtx,
20553 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20554 RTX_FRAME_RELATED_P (tmp) = 1;
20555 tmp = emit_insn (tmp);
20556 if (!return_in_pc)
20557 {
20558 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20559 stack_pointer_rtx, stack_pointer_rtx);
20560 }
20561
20562 dwarf = NULL_RTX;
20563
20564 if (((num_regs % 2) == 1 && !return_in_pc)
20565 || ((num_regs % 2) == 0 && return_in_pc))
20566 {
20567 /* Scan for the single register to be popped. Skip until the saved
20568 register is found. */
20569 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20570
20571 /* Gen LDR with post increment here. */
20572 tmp1 = gen_rtx_MEM (SImode,
20573 gen_rtx_POST_INC (SImode,
20574 stack_pointer_rtx));
20575 set_mem_alias_set (tmp1, get_frame_alias_set ());
20576
20577 reg = gen_rtx_REG (SImode, j);
20578 tmp = gen_rtx_SET (reg, tmp1);
20579 RTX_FRAME_RELATED_P (tmp) = 1;
20580 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20581
20582 if (return_in_pc)
20583 {
20584 /* If return_in_pc, j must be PC_REGNUM. */
20585 gcc_assert (j == PC_REGNUM);
20586 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20587 XVECEXP (par, 0, 0) = ret_rtx;
20588 XVECEXP (par, 0, 1) = tmp;
20589 par = emit_jump_insn (par);
20590 }
20591 else
20592 {
20593 par = emit_insn (tmp);
20594 REG_NOTES (par) = dwarf;
20595 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20596 stack_pointer_rtx, stack_pointer_rtx);
20597 }
20598
20599 }
20600 else if ((num_regs % 2) == 1 && return_in_pc)
20601 {
20602 /* There are 2 registers to be popped. So, generate the pattern
20603 pop_multiple_with_stack_update_and_return to pop in PC. */
20604 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20605 }
20606
20607 return;
20608 }
20609
20610 /* LDRD in ARM mode needs consecutive registers as operands. This function
20611 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20612 offset addressing and then generates one separate stack udpate. This provides
20613 more scheduling freedom, compared to writeback on every load. However,
20614 if the function returns using load into PC directly
20615 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20616 before the last load. TODO: Add a peephole optimization to recognize
20617 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20618 peephole optimization to merge the load at stack-offset zero
20619 with the stack update instruction using load with writeback
20620 in post-index addressing mode. */
20621 static void
20622 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20623 {
20624 int j = 0;
20625 int offset = 0;
20626 rtx par = NULL_RTX;
20627 rtx dwarf = NULL_RTX;
20628 rtx tmp, mem;
20629
20630 /* Restore saved registers. */
20631 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20632 j = 0;
20633 while (j <= LAST_ARM_REGNUM)
20634 if (saved_regs_mask & (1 << j))
20635 {
20636 if ((j % 2) == 0
20637 && (saved_regs_mask & (1 << (j + 1)))
20638 && (j + 1) != PC_REGNUM)
20639 {
20640 /* Current register and next register form register pair for which
20641 LDRD can be generated. PC is always the last register popped, and
20642 we handle it separately. */
20643 if (offset > 0)
20644 mem = gen_frame_mem (DImode,
20645 plus_constant (Pmode,
20646 stack_pointer_rtx,
20647 offset));
20648 else
20649 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20650
20651 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20652 tmp = emit_insn (tmp);
20653 RTX_FRAME_RELATED_P (tmp) = 1;
20654
20655 /* Generate dwarf info. */
20656
20657 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20658 gen_rtx_REG (SImode, j),
20659 NULL_RTX);
20660 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20661 gen_rtx_REG (SImode, j + 1),
20662 dwarf);
20663
20664 REG_NOTES (tmp) = dwarf;
20665
20666 offset += 8;
20667 j += 2;
20668 }
20669 else if (j != PC_REGNUM)
20670 {
20671 /* Emit a single word load. */
20672 if (offset > 0)
20673 mem = gen_frame_mem (SImode,
20674 plus_constant (Pmode,
20675 stack_pointer_rtx,
20676 offset));
20677 else
20678 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20679
20680 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20681 tmp = emit_insn (tmp);
20682 RTX_FRAME_RELATED_P (tmp) = 1;
20683
20684 /* Generate dwarf info. */
20685 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20686 gen_rtx_REG (SImode, j),
20687 NULL_RTX);
20688
20689 offset += 4;
20690 j += 1;
20691 }
20692 else /* j == PC_REGNUM */
20693 j++;
20694 }
20695 else
20696 j++;
20697
20698 /* Update the stack. */
20699 if (offset > 0)
20700 {
20701 tmp = gen_rtx_SET (stack_pointer_rtx,
20702 plus_constant (Pmode,
20703 stack_pointer_rtx,
20704 offset));
20705 tmp = emit_insn (tmp);
20706 arm_add_cfa_adjust_cfa_note (tmp, offset,
20707 stack_pointer_rtx, stack_pointer_rtx);
20708 offset = 0;
20709 }
20710
20711 if (saved_regs_mask & (1 << PC_REGNUM))
20712 {
20713 /* Only PC is to be popped. */
20714 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20715 XVECEXP (par, 0, 0) = ret_rtx;
20716 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20717 gen_frame_mem (SImode,
20718 gen_rtx_POST_INC (SImode,
20719 stack_pointer_rtx)));
20720 RTX_FRAME_RELATED_P (tmp) = 1;
20721 XVECEXP (par, 0, 1) = tmp;
20722 par = emit_jump_insn (par);
20723
20724 /* Generate dwarf info. */
20725 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20726 gen_rtx_REG (SImode, PC_REGNUM),
20727 NULL_RTX);
20728 REG_NOTES (par) = dwarf;
20729 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20730 stack_pointer_rtx, stack_pointer_rtx);
20731 }
20732 }
20733
20734 /* Calculate the size of the return value that is passed in registers. */
20735 static unsigned
20736 arm_size_return_regs (void)
20737 {
20738 machine_mode mode;
20739
20740 if (crtl->return_rtx != 0)
20741 mode = GET_MODE (crtl->return_rtx);
20742 else
20743 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20744
20745 return GET_MODE_SIZE (mode);
20746 }
20747
20748 /* Return true if the current function needs to save/restore LR. */
20749 static bool
20750 thumb_force_lr_save (void)
20751 {
20752 return !cfun->machine->lr_save_eliminated
20753 && (!crtl->is_leaf
20754 || thumb_far_jump_used_p ()
20755 || df_regs_ever_live_p (LR_REGNUM));
20756 }
20757
20758 /* We do not know if r3 will be available because
20759 we do have an indirect tailcall happening in this
20760 particular case. */
20761 static bool
20762 is_indirect_tailcall_p (rtx call)
20763 {
20764 rtx pat = PATTERN (call);
20765
20766 /* Indirect tail call. */
20767 pat = XVECEXP (pat, 0, 0);
20768 if (GET_CODE (pat) == SET)
20769 pat = SET_SRC (pat);
20770
20771 pat = XEXP (XEXP (pat, 0), 0);
20772 return REG_P (pat);
20773 }
20774
20775 /* Return true if r3 is used by any of the tail call insns in the
20776 current function. */
20777 static bool
20778 any_sibcall_could_use_r3 (void)
20779 {
20780 edge_iterator ei;
20781 edge e;
20782
20783 if (!crtl->tail_call_emit)
20784 return false;
20785 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20786 if (e->flags & EDGE_SIBCALL)
20787 {
20788 rtx_insn *call = BB_END (e->src);
20789 if (!CALL_P (call))
20790 call = prev_nonnote_nondebug_insn (call);
20791 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20792 if (find_regno_fusage (call, USE, 3)
20793 || is_indirect_tailcall_p (call))
20794 return true;
20795 }
20796 return false;
20797 }
20798
20799
20800 /* Compute the distance from register FROM to register TO.
20801 These can be the arg pointer (26), the soft frame pointer (25),
20802 the stack pointer (13) or the hard frame pointer (11).
20803 In thumb mode r7 is used as the soft frame pointer, if needed.
20804 Typical stack layout looks like this:
20805
20806 old stack pointer -> | |
20807 ----
20808 | | \
20809 | | saved arguments for
20810 | | vararg functions
20811 | | /
20812 --
20813 hard FP & arg pointer -> | | \
20814 | | stack
20815 | | frame
20816 | | /
20817 --
20818 | | \
20819 | | call saved
20820 | | registers
20821 soft frame pointer -> | | /
20822 --
20823 | | \
20824 | | local
20825 | | variables
20826 locals base pointer -> | | /
20827 --
20828 | | \
20829 | | outgoing
20830 | | arguments
20831 current stack pointer -> | | /
20832 --
20833
20834 For a given function some or all of these stack components
20835 may not be needed, giving rise to the possibility of
20836 eliminating some of the registers.
20837
20838 The values returned by this function must reflect the behavior
20839 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20840
20841 The sign of the number returned reflects the direction of stack
20842 growth, so the values are positive for all eliminations except
20843 from the soft frame pointer to the hard frame pointer.
20844
20845 SFP may point just inside the local variables block to ensure correct
20846 alignment. */
20847
20848
20849 /* Return cached stack offsets. */
20850
20851 static arm_stack_offsets *
20852 arm_get_frame_offsets (void)
20853 {
20854 struct arm_stack_offsets *offsets;
20855
20856 offsets = &cfun->machine->stack_offsets;
20857
20858 return offsets;
20859 }
20860
20861
20862 /* Calculate stack offsets. These are used to calculate register elimination
20863 offsets and in prologue/epilogue code. Also calculates which registers
20864 should be saved. */
20865
20866 static void
20867 arm_compute_frame_layout (void)
20868 {
20869 struct arm_stack_offsets *offsets;
20870 unsigned long func_type;
20871 int saved;
20872 int core_saved;
20873 HOST_WIDE_INT frame_size;
20874 int i;
20875
20876 offsets = &cfun->machine->stack_offsets;
20877
20878 /* Initially this is the size of the local variables. It will translated
20879 into an offset once we have determined the size of preceding data. */
20880 frame_size = ROUND_UP_WORD (get_frame_size ());
20881
20882 /* Space for variadic functions. */
20883 offsets->saved_args = crtl->args.pretend_args_size;
20884
20885 /* In Thumb mode this is incorrect, but never used. */
20886 offsets->frame
20887 = (offsets->saved_args
20888 + arm_compute_static_chain_stack_bytes ()
20889 + (frame_pointer_needed ? 4 : 0));
20890
20891 if (TARGET_32BIT)
20892 {
20893 unsigned int regno;
20894
20895 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20896 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20897 saved = core_saved;
20898
20899 /* We know that SP will be doubleword aligned on entry, and we must
20900 preserve that condition at any subroutine call. We also require the
20901 soft frame pointer to be doubleword aligned. */
20902
20903 if (TARGET_REALLY_IWMMXT)
20904 {
20905 /* Check for the call-saved iWMMXt registers. */
20906 for (regno = FIRST_IWMMXT_REGNUM;
20907 regno <= LAST_IWMMXT_REGNUM;
20908 regno++)
20909 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20910 saved += 8;
20911 }
20912
20913 func_type = arm_current_func_type ();
20914 /* Space for saved VFP registers. */
20915 if (! IS_VOLATILE (func_type)
20916 && TARGET_HARD_FLOAT)
20917 saved += arm_get_vfp_saved_size ();
20918 }
20919 else /* TARGET_THUMB1 */
20920 {
20921 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20922 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20923 saved = core_saved;
20924 if (TARGET_BACKTRACE)
20925 saved += 16;
20926 }
20927
20928 /* Saved registers include the stack frame. */
20929 offsets->saved_regs
20930 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20931 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20932
20933 /* A leaf function does not need any stack alignment if it has nothing
20934 on the stack. */
20935 if (crtl->is_leaf && frame_size == 0
20936 /* However if it calls alloca(), we have a dynamically allocated
20937 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20938 && ! cfun->calls_alloca)
20939 {
20940 offsets->outgoing_args = offsets->soft_frame;
20941 offsets->locals_base = offsets->soft_frame;
20942 return;
20943 }
20944
20945 /* Ensure SFP has the correct alignment. */
20946 if (ARM_DOUBLEWORD_ALIGN
20947 && (offsets->soft_frame & 7))
20948 {
20949 offsets->soft_frame += 4;
20950 /* Try to align stack by pushing an extra reg. Don't bother doing this
20951 when there is a stack frame as the alignment will be rolled into
20952 the normal stack adjustment. */
20953 if (frame_size + crtl->outgoing_args_size == 0)
20954 {
20955 int reg = -1;
20956
20957 /* Register r3 is caller-saved. Normally it does not need to be
20958 saved on entry by the prologue. However if we choose to save
20959 it for padding then we may confuse the compiler into thinking
20960 a prologue sequence is required when in fact it is not. This
20961 will occur when shrink-wrapping if r3 is used as a scratch
20962 register and there are no other callee-saved writes.
20963
20964 This situation can be avoided when other callee-saved registers
20965 are available and r3 is not mandatory if we choose a callee-saved
20966 register for padding. */
20967 bool prefer_callee_reg_p = false;
20968
20969 /* If it is safe to use r3, then do so. This sometimes
20970 generates better code on Thumb-2 by avoiding the need to
20971 use 32-bit push/pop instructions. */
20972 if (! any_sibcall_could_use_r3 ()
20973 && arm_size_return_regs () <= 12
20974 && (offsets->saved_regs_mask & (1 << 3)) == 0
20975 && (TARGET_THUMB2
20976 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20977 {
20978 reg = 3;
20979 if (!TARGET_THUMB2)
20980 prefer_callee_reg_p = true;
20981 }
20982 if (reg == -1
20983 || prefer_callee_reg_p)
20984 {
20985 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20986 {
20987 /* Avoid fixed registers; they may be changed at
20988 arbitrary times so it's unsafe to restore them
20989 during the epilogue. */
20990 if (!fixed_regs[i]
20991 && (offsets->saved_regs_mask & (1 << i)) == 0)
20992 {
20993 reg = i;
20994 break;
20995 }
20996 }
20997 }
20998
20999 if (reg != -1)
21000 {
21001 offsets->saved_regs += 4;
21002 offsets->saved_regs_mask |= (1 << reg);
21003 }
21004 }
21005 }
21006
21007 offsets->locals_base = offsets->soft_frame + frame_size;
21008 offsets->outgoing_args = (offsets->locals_base
21009 + crtl->outgoing_args_size);
21010
21011 if (ARM_DOUBLEWORD_ALIGN)
21012 {
21013 /* Ensure SP remains doubleword aligned. */
21014 if (offsets->outgoing_args & 7)
21015 offsets->outgoing_args += 4;
21016 gcc_assert (!(offsets->outgoing_args & 7));
21017 }
21018 }
21019
21020
21021 /* Calculate the relative offsets for the different stack pointers. Positive
21022 offsets are in the direction of stack growth. */
21023
21024 HOST_WIDE_INT
21025 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21026 {
21027 arm_stack_offsets *offsets;
21028
21029 offsets = arm_get_frame_offsets ();
21030
21031 /* OK, now we have enough information to compute the distances.
21032 There must be an entry in these switch tables for each pair
21033 of registers in ELIMINABLE_REGS, even if some of the entries
21034 seem to be redundant or useless. */
21035 switch (from)
21036 {
21037 case ARG_POINTER_REGNUM:
21038 switch (to)
21039 {
21040 case THUMB_HARD_FRAME_POINTER_REGNUM:
21041 return 0;
21042
21043 case FRAME_POINTER_REGNUM:
21044 /* This is the reverse of the soft frame pointer
21045 to hard frame pointer elimination below. */
21046 return offsets->soft_frame - offsets->saved_args;
21047
21048 case ARM_HARD_FRAME_POINTER_REGNUM:
21049 /* This is only non-zero in the case where the static chain register
21050 is stored above the frame. */
21051 return offsets->frame - offsets->saved_args - 4;
21052
21053 case STACK_POINTER_REGNUM:
21054 /* If nothing has been pushed on the stack at all
21055 then this will return -4. This *is* correct! */
21056 return offsets->outgoing_args - (offsets->saved_args + 4);
21057
21058 default:
21059 gcc_unreachable ();
21060 }
21061 gcc_unreachable ();
21062
21063 case FRAME_POINTER_REGNUM:
21064 switch (to)
21065 {
21066 case THUMB_HARD_FRAME_POINTER_REGNUM:
21067 return 0;
21068
21069 case ARM_HARD_FRAME_POINTER_REGNUM:
21070 /* The hard frame pointer points to the top entry in the
21071 stack frame. The soft frame pointer to the bottom entry
21072 in the stack frame. If there is no stack frame at all,
21073 then they are identical. */
21074
21075 return offsets->frame - offsets->soft_frame;
21076
21077 case STACK_POINTER_REGNUM:
21078 return offsets->outgoing_args - offsets->soft_frame;
21079
21080 default:
21081 gcc_unreachable ();
21082 }
21083 gcc_unreachable ();
21084
21085 default:
21086 /* You cannot eliminate from the stack pointer.
21087 In theory you could eliminate from the hard frame
21088 pointer to the stack pointer, but this will never
21089 happen, since if a stack frame is not needed the
21090 hard frame pointer will never be used. */
21091 gcc_unreachable ();
21092 }
21093 }
21094
21095 /* Given FROM and TO register numbers, say whether this elimination is
21096 allowed. Frame pointer elimination is automatically handled.
21097
21098 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21099 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21100 pointer, we must eliminate FRAME_POINTER_REGNUM into
21101 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21102 ARG_POINTER_REGNUM. */
21103
21104 bool
21105 arm_can_eliminate (const int from, const int to)
21106 {
21107 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21108 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21109 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21110 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21111 true);
21112 }
21113
21114 /* Emit RTL to save coprocessor registers on function entry. Returns the
21115 number of bytes pushed. */
21116
21117 static int
21118 arm_save_coproc_regs(void)
21119 {
21120 int saved_size = 0;
21121 unsigned reg;
21122 unsigned start_reg;
21123 rtx insn;
21124
21125 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21126 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21127 {
21128 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21129 insn = gen_rtx_MEM (V2SImode, insn);
21130 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21131 RTX_FRAME_RELATED_P (insn) = 1;
21132 saved_size += 8;
21133 }
21134
21135 if (TARGET_HARD_FLOAT)
21136 {
21137 start_reg = FIRST_VFP_REGNUM;
21138
21139 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21140 {
21141 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21142 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21143 {
21144 if (start_reg != reg)
21145 saved_size += vfp_emit_fstmd (start_reg,
21146 (reg - start_reg) / 2);
21147 start_reg = reg + 2;
21148 }
21149 }
21150 if (start_reg != reg)
21151 saved_size += vfp_emit_fstmd (start_reg,
21152 (reg - start_reg) / 2);
21153 }
21154 return saved_size;
21155 }
21156
21157
21158 /* Set the Thumb frame pointer from the stack pointer. */
21159
21160 static void
21161 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21162 {
21163 HOST_WIDE_INT amount;
21164 rtx insn, dwarf;
21165
21166 amount = offsets->outgoing_args - offsets->locals_base;
21167 if (amount < 1024)
21168 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21169 stack_pointer_rtx, GEN_INT (amount)));
21170 else
21171 {
21172 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21173 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21174 expects the first two operands to be the same. */
21175 if (TARGET_THUMB2)
21176 {
21177 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21178 stack_pointer_rtx,
21179 hard_frame_pointer_rtx));
21180 }
21181 else
21182 {
21183 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21184 hard_frame_pointer_rtx,
21185 stack_pointer_rtx));
21186 }
21187 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21188 plus_constant (Pmode, stack_pointer_rtx, amount));
21189 RTX_FRAME_RELATED_P (dwarf) = 1;
21190 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21191 }
21192
21193 RTX_FRAME_RELATED_P (insn) = 1;
21194 }
21195
21196 struct scratch_reg {
21197 rtx reg;
21198 bool saved;
21199 };
21200
21201 /* Return a short-lived scratch register for use as a 2nd scratch register on
21202 function entry after the registers are saved in the prologue. This register
21203 must be released by means of release_scratch_register_on_entry. IP is not
21204 considered since it is always used as the 1st scratch register if available.
21205
21206 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21207 mask of live registers. */
21208
21209 static void
21210 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21211 unsigned long live_regs)
21212 {
21213 int regno = -1;
21214
21215 sr->saved = false;
21216
21217 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21218 regno = LR_REGNUM;
21219 else
21220 {
21221 unsigned int i;
21222
21223 for (i = 4; i < 11; i++)
21224 if (regno1 != i && (live_regs & (1 << i)) != 0)
21225 {
21226 regno = i;
21227 break;
21228 }
21229
21230 if (regno < 0)
21231 {
21232 /* If IP is used as the 1st scratch register for a nested function,
21233 then either r3 wasn't available or is used to preserve IP. */
21234 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21235 regno1 = 3;
21236 regno = (regno1 == 3 ? 2 : 3);
21237 sr->saved
21238 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21239 regno);
21240 }
21241 }
21242
21243 sr->reg = gen_rtx_REG (SImode, regno);
21244 if (sr->saved)
21245 {
21246 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21247 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21248 rtx x = gen_rtx_SET (stack_pointer_rtx,
21249 plus_constant (Pmode, stack_pointer_rtx, -4));
21250 RTX_FRAME_RELATED_P (insn) = 1;
21251 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21252 }
21253 }
21254
21255 /* Release a scratch register obtained from the preceding function. */
21256
21257 static void
21258 release_scratch_register_on_entry (struct scratch_reg *sr)
21259 {
21260 if (sr->saved)
21261 {
21262 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21263 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21264 rtx x = gen_rtx_SET (stack_pointer_rtx,
21265 plus_constant (Pmode, stack_pointer_rtx, 4));
21266 RTX_FRAME_RELATED_P (insn) = 1;
21267 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21268 }
21269 }
21270
21271 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21272
21273 #if PROBE_INTERVAL > 4096
21274 #error Cannot use indexed addressing mode for stack probing
21275 #endif
21276
21277 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21278 inclusive. These are offsets from the current stack pointer. REGNO1
21279 is the index number of the 1st scratch register and LIVE_REGS is the
21280 mask of live registers. */
21281
21282 static void
21283 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21284 unsigned int regno1, unsigned long live_regs)
21285 {
21286 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21287
21288 /* See if we have a constant small number of probes to generate. If so,
21289 that's the easy case. */
21290 if (size <= PROBE_INTERVAL)
21291 {
21292 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21293 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21294 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21295 }
21296
21297 /* The run-time loop is made up of 10 insns in the generic case while the
21298 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21299 else if (size <= 5 * PROBE_INTERVAL)
21300 {
21301 HOST_WIDE_INT i, rem;
21302
21303 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21304 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21305 emit_stack_probe (reg1);
21306
21307 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21308 it exceeds SIZE. If only two probes are needed, this will not
21309 generate any code. Then probe at FIRST + SIZE. */
21310 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21311 {
21312 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21313 emit_stack_probe (reg1);
21314 }
21315
21316 rem = size - (i - PROBE_INTERVAL);
21317 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21318 {
21319 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21320 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21321 }
21322 else
21323 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21324 }
21325
21326 /* Otherwise, do the same as above, but in a loop. Note that we must be
21327 extra careful with variables wrapping around because we might be at
21328 the very top (or the very bottom) of the address space and we have
21329 to be able to handle this case properly; in particular, we use an
21330 equality test for the loop condition. */
21331 else
21332 {
21333 HOST_WIDE_INT rounded_size;
21334 struct scratch_reg sr;
21335
21336 get_scratch_register_on_entry (&sr, regno1, live_regs);
21337
21338 emit_move_insn (reg1, GEN_INT (first));
21339
21340
21341 /* Step 1: round SIZE to the previous multiple of the interval. */
21342
21343 rounded_size = size & -PROBE_INTERVAL;
21344 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21345
21346
21347 /* Step 2: compute initial and final value of the loop counter. */
21348
21349 /* TEST_ADDR = SP + FIRST. */
21350 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21351
21352 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21353 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21354
21355
21356 /* Step 3: the loop
21357
21358 do
21359 {
21360 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21361 probe at TEST_ADDR
21362 }
21363 while (TEST_ADDR != LAST_ADDR)
21364
21365 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21366 until it is equal to ROUNDED_SIZE. */
21367
21368 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21369
21370
21371 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21372 that SIZE is equal to ROUNDED_SIZE. */
21373
21374 if (size != rounded_size)
21375 {
21376 HOST_WIDE_INT rem = size - rounded_size;
21377
21378 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21379 {
21380 emit_set_insn (sr.reg,
21381 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21382 emit_stack_probe (plus_constant (Pmode, sr.reg,
21383 PROBE_INTERVAL - rem));
21384 }
21385 else
21386 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21387 }
21388
21389 release_scratch_register_on_entry (&sr);
21390 }
21391
21392 /* Make sure nothing is scheduled before we are done. */
21393 emit_insn (gen_blockage ());
21394 }
21395
21396 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21397 absolute addresses. */
21398
21399 const char *
21400 output_probe_stack_range (rtx reg1, rtx reg2)
21401 {
21402 static int labelno = 0;
21403 char loop_lab[32];
21404 rtx xops[2];
21405
21406 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21407
21408 /* Loop. */
21409 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21410
21411 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21412 xops[0] = reg1;
21413 xops[1] = GEN_INT (PROBE_INTERVAL);
21414 output_asm_insn ("sub\t%0, %0, %1", xops);
21415
21416 /* Probe at TEST_ADDR. */
21417 output_asm_insn ("str\tr0, [%0, #0]", xops);
21418
21419 /* Test if TEST_ADDR == LAST_ADDR. */
21420 xops[1] = reg2;
21421 output_asm_insn ("cmp\t%0, %1", xops);
21422
21423 /* Branch. */
21424 fputs ("\tbne\t", asm_out_file);
21425 assemble_name_raw (asm_out_file, loop_lab);
21426 fputc ('\n', asm_out_file);
21427
21428 return "";
21429 }
21430
21431 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21432 function. */
21433 void
21434 arm_expand_prologue (void)
21435 {
21436 rtx amount;
21437 rtx insn;
21438 rtx ip_rtx;
21439 unsigned long live_regs_mask;
21440 unsigned long func_type;
21441 int fp_offset = 0;
21442 int saved_pretend_args = 0;
21443 int saved_regs = 0;
21444 unsigned HOST_WIDE_INT args_to_push;
21445 HOST_WIDE_INT size;
21446 arm_stack_offsets *offsets;
21447 bool clobber_ip;
21448
21449 func_type = arm_current_func_type ();
21450
21451 /* Naked functions don't have prologues. */
21452 if (IS_NAKED (func_type))
21453 {
21454 if (flag_stack_usage_info)
21455 current_function_static_stack_size = 0;
21456 return;
21457 }
21458
21459 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21460 args_to_push = crtl->args.pretend_args_size;
21461
21462 /* Compute which register we will have to save onto the stack. */
21463 offsets = arm_get_frame_offsets ();
21464 live_regs_mask = offsets->saved_regs_mask;
21465
21466 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21467
21468 if (IS_STACKALIGN (func_type))
21469 {
21470 rtx r0, r1;
21471
21472 /* Handle a word-aligned stack pointer. We generate the following:
21473
21474 mov r0, sp
21475 bic r1, r0, #7
21476 mov sp, r1
21477 <save and restore r0 in normal prologue/epilogue>
21478 mov sp, r0
21479 bx lr
21480
21481 The unwinder doesn't need to know about the stack realignment.
21482 Just tell it we saved SP in r0. */
21483 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21484
21485 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21486 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21487
21488 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21489 RTX_FRAME_RELATED_P (insn) = 1;
21490 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21491
21492 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21493
21494 /* ??? The CFA changes here, which may cause GDB to conclude that it
21495 has entered a different function. That said, the unwind info is
21496 correct, individually, before and after this instruction because
21497 we've described the save of SP, which will override the default
21498 handling of SP as restoring from the CFA. */
21499 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21500 }
21501
21502 /* The static chain register is the same as the IP register. If it is
21503 clobbered when creating the frame, we need to save and restore it. */
21504 clobber_ip = IS_NESTED (func_type)
21505 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21506 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21507 || flag_stack_clash_protection)
21508 && !df_regs_ever_live_p (LR_REGNUM)
21509 && arm_r3_live_at_start_p ()));
21510
21511 /* Find somewhere to store IP whilst the frame is being created.
21512 We try the following places in order:
21513
21514 1. The last argument register r3 if it is available.
21515 2. A slot on the stack above the frame if there are no
21516 arguments to push onto the stack.
21517 3. Register r3 again, after pushing the argument registers
21518 onto the stack, if this is a varargs function.
21519 4. The last slot on the stack created for the arguments to
21520 push, if this isn't a varargs function.
21521
21522 Note - we only need to tell the dwarf2 backend about the SP
21523 adjustment in the second variant; the static chain register
21524 doesn't need to be unwound, as it doesn't contain a value
21525 inherited from the caller. */
21526 if (clobber_ip)
21527 {
21528 if (!arm_r3_live_at_start_p ())
21529 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21530 else if (args_to_push == 0)
21531 {
21532 rtx addr, dwarf;
21533
21534 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21535 saved_regs += 4;
21536
21537 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21538 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21539 fp_offset = 4;
21540
21541 /* Just tell the dwarf backend that we adjusted SP. */
21542 dwarf = gen_rtx_SET (stack_pointer_rtx,
21543 plus_constant (Pmode, stack_pointer_rtx,
21544 -fp_offset));
21545 RTX_FRAME_RELATED_P (insn) = 1;
21546 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21547 }
21548 else
21549 {
21550 /* Store the args on the stack. */
21551 if (cfun->machine->uses_anonymous_args)
21552 {
21553 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21554 (0xf0 >> (args_to_push / 4)) & 0xf);
21555 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21556 saved_pretend_args = 1;
21557 }
21558 else
21559 {
21560 rtx addr, dwarf;
21561
21562 if (args_to_push == 4)
21563 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21564 else
21565 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21566 plus_constant (Pmode,
21567 stack_pointer_rtx,
21568 -args_to_push));
21569
21570 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21571
21572 /* Just tell the dwarf backend that we adjusted SP. */
21573 dwarf = gen_rtx_SET (stack_pointer_rtx,
21574 plus_constant (Pmode, stack_pointer_rtx,
21575 -args_to_push));
21576 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21577 }
21578
21579 RTX_FRAME_RELATED_P (insn) = 1;
21580 fp_offset = args_to_push;
21581 args_to_push = 0;
21582 }
21583 }
21584
21585 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21586 {
21587 if (IS_INTERRUPT (func_type))
21588 {
21589 /* Interrupt functions must not corrupt any registers.
21590 Creating a frame pointer however, corrupts the IP
21591 register, so we must push it first. */
21592 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21593
21594 /* Do not set RTX_FRAME_RELATED_P on this insn.
21595 The dwarf stack unwinding code only wants to see one
21596 stack decrement per function, and this is not it. If
21597 this instruction is labeled as being part of the frame
21598 creation sequence then dwarf2out_frame_debug_expr will
21599 die when it encounters the assignment of IP to FP
21600 later on, since the use of SP here establishes SP as
21601 the CFA register and not IP.
21602
21603 Anyway this instruction is not really part of the stack
21604 frame creation although it is part of the prologue. */
21605 }
21606
21607 insn = emit_set_insn (ip_rtx,
21608 plus_constant (Pmode, stack_pointer_rtx,
21609 fp_offset));
21610 RTX_FRAME_RELATED_P (insn) = 1;
21611 }
21612
21613 if (args_to_push)
21614 {
21615 /* Push the argument registers, or reserve space for them. */
21616 if (cfun->machine->uses_anonymous_args)
21617 insn = emit_multi_reg_push
21618 ((0xf0 >> (args_to_push / 4)) & 0xf,
21619 (0xf0 >> (args_to_push / 4)) & 0xf);
21620 else
21621 insn = emit_insn
21622 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21623 GEN_INT (- args_to_push)));
21624 RTX_FRAME_RELATED_P (insn) = 1;
21625 }
21626
21627 /* If this is an interrupt service routine, and the link register
21628 is going to be pushed, and we're not generating extra
21629 push of IP (needed when frame is needed and frame layout if apcs),
21630 subtracting four from LR now will mean that the function return
21631 can be done with a single instruction. */
21632 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21633 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21634 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21635 && TARGET_ARM)
21636 {
21637 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21638
21639 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21640 }
21641
21642 if (live_regs_mask)
21643 {
21644 unsigned long dwarf_regs_mask = live_regs_mask;
21645
21646 saved_regs += bit_count (live_regs_mask) * 4;
21647 if (optimize_size && !frame_pointer_needed
21648 && saved_regs == offsets->saved_regs - offsets->saved_args)
21649 {
21650 /* If no coprocessor registers are being pushed and we don't have
21651 to worry about a frame pointer then push extra registers to
21652 create the stack frame. This is done in a way that does not
21653 alter the frame layout, so is independent of the epilogue. */
21654 int n;
21655 int frame;
21656 n = 0;
21657 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21658 n++;
21659 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21660 if (frame && n * 4 >= frame)
21661 {
21662 n = frame / 4;
21663 live_regs_mask |= (1 << n) - 1;
21664 saved_regs += frame;
21665 }
21666 }
21667
21668 if (TARGET_LDRD
21669 && current_tune->prefer_ldrd_strd
21670 && !optimize_function_for_size_p (cfun))
21671 {
21672 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21673 if (TARGET_THUMB2)
21674 thumb2_emit_strd_push (live_regs_mask);
21675 else if (TARGET_ARM
21676 && !TARGET_APCS_FRAME
21677 && !IS_INTERRUPT (func_type))
21678 arm_emit_strd_push (live_regs_mask);
21679 else
21680 {
21681 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21682 RTX_FRAME_RELATED_P (insn) = 1;
21683 }
21684 }
21685 else
21686 {
21687 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21688 RTX_FRAME_RELATED_P (insn) = 1;
21689 }
21690 }
21691
21692 if (! IS_VOLATILE (func_type))
21693 saved_regs += arm_save_coproc_regs ();
21694
21695 if (frame_pointer_needed && TARGET_ARM)
21696 {
21697 /* Create the new frame pointer. */
21698 if (TARGET_APCS_FRAME)
21699 {
21700 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21701 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21702 RTX_FRAME_RELATED_P (insn) = 1;
21703 }
21704 else
21705 {
21706 insn = GEN_INT (saved_regs - (4 + fp_offset));
21707 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21708 stack_pointer_rtx, insn));
21709 RTX_FRAME_RELATED_P (insn) = 1;
21710 }
21711 }
21712
21713 size = offsets->outgoing_args - offsets->saved_args;
21714 if (flag_stack_usage_info)
21715 current_function_static_stack_size = size;
21716
21717 /* If this isn't an interrupt service routine and we have a frame, then do
21718 stack checking. We use IP as the first scratch register, except for the
21719 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21720 if (!IS_INTERRUPT (func_type)
21721 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21722 || flag_stack_clash_protection))
21723 {
21724 unsigned int regno;
21725
21726 if (!IS_NESTED (func_type) || clobber_ip)
21727 regno = IP_REGNUM;
21728 else if (df_regs_ever_live_p (LR_REGNUM))
21729 regno = LR_REGNUM;
21730 else
21731 regno = 3;
21732
21733 if (crtl->is_leaf && !cfun->calls_alloca)
21734 {
21735 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21736 arm_emit_probe_stack_range (get_stack_check_protect (),
21737 size - get_stack_check_protect (),
21738 regno, live_regs_mask);
21739 }
21740 else if (size > 0)
21741 arm_emit_probe_stack_range (get_stack_check_protect (), size,
21742 regno, live_regs_mask);
21743 }
21744
21745 /* Recover the static chain register. */
21746 if (clobber_ip)
21747 {
21748 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21749 insn = gen_rtx_REG (SImode, 3);
21750 else
21751 {
21752 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21753 insn = gen_frame_mem (SImode, insn);
21754 }
21755 emit_set_insn (ip_rtx, insn);
21756 emit_insn (gen_force_register_use (ip_rtx));
21757 }
21758
21759 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21760 {
21761 /* This add can produce multiple insns for a large constant, so we
21762 need to get tricky. */
21763 rtx_insn *last = get_last_insn ();
21764
21765 amount = GEN_INT (offsets->saved_args + saved_regs
21766 - offsets->outgoing_args);
21767
21768 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21769 amount));
21770 do
21771 {
21772 last = last ? NEXT_INSN (last) : get_insns ();
21773 RTX_FRAME_RELATED_P (last) = 1;
21774 }
21775 while (last != insn);
21776
21777 /* If the frame pointer is needed, emit a special barrier that
21778 will prevent the scheduler from moving stores to the frame
21779 before the stack adjustment. */
21780 if (frame_pointer_needed)
21781 emit_insn (gen_stack_tie (stack_pointer_rtx,
21782 hard_frame_pointer_rtx));
21783 }
21784
21785
21786 if (frame_pointer_needed && TARGET_THUMB2)
21787 thumb_set_frame_pointer (offsets);
21788
21789 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21790 {
21791 unsigned long mask;
21792
21793 mask = live_regs_mask;
21794 mask &= THUMB2_WORK_REGS;
21795 if (!IS_NESTED (func_type))
21796 mask |= (1 << IP_REGNUM);
21797 arm_load_pic_register (mask);
21798 }
21799
21800 /* If we are profiling, make sure no instructions are scheduled before
21801 the call to mcount. Similarly if the user has requested no
21802 scheduling in the prolog. Similarly if we want non-call exceptions
21803 using the EABI unwinder, to prevent faulting instructions from being
21804 swapped with a stack adjustment. */
21805 if (crtl->profile || !TARGET_SCHED_PROLOG
21806 || (arm_except_unwind_info (&global_options) == UI_TARGET
21807 && cfun->can_throw_non_call_exceptions))
21808 emit_insn (gen_blockage ());
21809
21810 /* If the link register is being kept alive, with the return address in it,
21811 then make sure that it does not get reused by the ce2 pass. */
21812 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21813 cfun->machine->lr_save_eliminated = 1;
21814 }
21815 \f
21816 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21817 static void
21818 arm_print_condition (FILE *stream)
21819 {
21820 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21821 {
21822 /* Branch conversion is not implemented for Thumb-2. */
21823 if (TARGET_THUMB)
21824 {
21825 output_operand_lossage ("predicated Thumb instruction");
21826 return;
21827 }
21828 if (current_insn_predicate != NULL)
21829 {
21830 output_operand_lossage
21831 ("predicated instruction in conditional sequence");
21832 return;
21833 }
21834
21835 fputs (arm_condition_codes[arm_current_cc], stream);
21836 }
21837 else if (current_insn_predicate)
21838 {
21839 enum arm_cond_code code;
21840
21841 if (TARGET_THUMB1)
21842 {
21843 output_operand_lossage ("predicated Thumb instruction");
21844 return;
21845 }
21846
21847 code = get_arm_condition_code (current_insn_predicate);
21848 fputs (arm_condition_codes[code], stream);
21849 }
21850 }
21851
21852
21853 /* Globally reserved letters: acln
21854 Puncutation letters currently used: @_|?().!#
21855 Lower case letters currently used: bcdefhimpqtvwxyz
21856 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21857 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21858
21859 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21860
21861 If CODE is 'd', then the X is a condition operand and the instruction
21862 should only be executed if the condition is true.
21863 if CODE is 'D', then the X is a condition operand and the instruction
21864 should only be executed if the condition is false: however, if the mode
21865 of the comparison is CCFPEmode, then always execute the instruction -- we
21866 do this because in these circumstances !GE does not necessarily imply LT;
21867 in these cases the instruction pattern will take care to make sure that
21868 an instruction containing %d will follow, thereby undoing the effects of
21869 doing this instruction unconditionally.
21870 If CODE is 'N' then X is a floating point operand that must be negated
21871 before output.
21872 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21873 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21874 static void
21875 arm_print_operand (FILE *stream, rtx x, int code)
21876 {
21877 switch (code)
21878 {
21879 case '@':
21880 fputs (ASM_COMMENT_START, stream);
21881 return;
21882
21883 case '_':
21884 fputs (user_label_prefix, stream);
21885 return;
21886
21887 case '|':
21888 fputs (REGISTER_PREFIX, stream);
21889 return;
21890
21891 case '?':
21892 arm_print_condition (stream);
21893 return;
21894
21895 case '.':
21896 /* The current condition code for a condition code setting instruction.
21897 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21898 fputc('s', stream);
21899 arm_print_condition (stream);
21900 return;
21901
21902 case '!':
21903 /* If the instruction is conditionally executed then print
21904 the current condition code, otherwise print 's'. */
21905 gcc_assert (TARGET_THUMB2);
21906 if (current_insn_predicate)
21907 arm_print_condition (stream);
21908 else
21909 fputc('s', stream);
21910 break;
21911
21912 /* %# is a "break" sequence. It doesn't output anything, but is used to
21913 separate e.g. operand numbers from following text, if that text consists
21914 of further digits which we don't want to be part of the operand
21915 number. */
21916 case '#':
21917 return;
21918
21919 case 'N':
21920 {
21921 REAL_VALUE_TYPE r;
21922 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21923 fprintf (stream, "%s", fp_const_from_val (&r));
21924 }
21925 return;
21926
21927 /* An integer or symbol address without a preceding # sign. */
21928 case 'c':
21929 switch (GET_CODE (x))
21930 {
21931 case CONST_INT:
21932 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21933 break;
21934
21935 case SYMBOL_REF:
21936 output_addr_const (stream, x);
21937 break;
21938
21939 case CONST:
21940 if (GET_CODE (XEXP (x, 0)) == PLUS
21941 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21942 {
21943 output_addr_const (stream, x);
21944 break;
21945 }
21946 /* Fall through. */
21947
21948 default:
21949 output_operand_lossage ("Unsupported operand for code '%c'", code);
21950 }
21951 return;
21952
21953 /* An integer that we want to print in HEX. */
21954 case 'x':
21955 switch (GET_CODE (x))
21956 {
21957 case CONST_INT:
21958 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21959 break;
21960
21961 default:
21962 output_operand_lossage ("Unsupported operand for code '%c'", code);
21963 }
21964 return;
21965
21966 case 'B':
21967 if (CONST_INT_P (x))
21968 {
21969 HOST_WIDE_INT val;
21970 val = ARM_SIGN_EXTEND (~INTVAL (x));
21971 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21972 }
21973 else
21974 {
21975 putc ('~', stream);
21976 output_addr_const (stream, x);
21977 }
21978 return;
21979
21980 case 'b':
21981 /* Print the log2 of a CONST_INT. */
21982 {
21983 HOST_WIDE_INT val;
21984
21985 if (!CONST_INT_P (x)
21986 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21987 output_operand_lossage ("Unsupported operand for code '%c'", code);
21988 else
21989 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21990 }
21991 return;
21992
21993 case 'L':
21994 /* The low 16 bits of an immediate constant. */
21995 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21996 return;
21997
21998 case 'i':
21999 fprintf (stream, "%s", arithmetic_instr (x, 1));
22000 return;
22001
22002 case 'I':
22003 fprintf (stream, "%s", arithmetic_instr (x, 0));
22004 return;
22005
22006 case 'S':
22007 {
22008 HOST_WIDE_INT val;
22009 const char *shift;
22010
22011 shift = shift_op (x, &val);
22012
22013 if (shift)
22014 {
22015 fprintf (stream, ", %s ", shift);
22016 if (val == -1)
22017 arm_print_operand (stream, XEXP (x, 1), 0);
22018 else
22019 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22020 }
22021 }
22022 return;
22023
22024 /* An explanation of the 'Q', 'R' and 'H' register operands:
22025
22026 In a pair of registers containing a DI or DF value the 'Q'
22027 operand returns the register number of the register containing
22028 the least significant part of the value. The 'R' operand returns
22029 the register number of the register containing the most
22030 significant part of the value.
22031
22032 The 'H' operand returns the higher of the two register numbers.
22033 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22034 same as the 'Q' operand, since the most significant part of the
22035 value is held in the lower number register. The reverse is true
22036 on systems where WORDS_BIG_ENDIAN is false.
22037
22038 The purpose of these operands is to distinguish between cases
22039 where the endian-ness of the values is important (for example
22040 when they are added together), and cases where the endian-ness
22041 is irrelevant, but the order of register operations is important.
22042 For example when loading a value from memory into a register
22043 pair, the endian-ness does not matter. Provided that the value
22044 from the lower memory address is put into the lower numbered
22045 register, and the value from the higher address is put into the
22046 higher numbered register, the load will work regardless of whether
22047 the value being loaded is big-wordian or little-wordian. The
22048 order of the two register loads can matter however, if the address
22049 of the memory location is actually held in one of the registers
22050 being overwritten by the load.
22051
22052 The 'Q' and 'R' constraints are also available for 64-bit
22053 constants. */
22054 case 'Q':
22055 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22056 {
22057 rtx part = gen_lowpart (SImode, x);
22058 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22059 return;
22060 }
22061
22062 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22063 {
22064 output_operand_lossage ("invalid operand for code '%c'", code);
22065 return;
22066 }
22067
22068 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22069 return;
22070
22071 case 'R':
22072 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22073 {
22074 machine_mode mode = GET_MODE (x);
22075 rtx part;
22076
22077 if (mode == VOIDmode)
22078 mode = DImode;
22079 part = gen_highpart_mode (SImode, mode, x);
22080 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22081 return;
22082 }
22083
22084 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22085 {
22086 output_operand_lossage ("invalid operand for code '%c'", code);
22087 return;
22088 }
22089
22090 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22091 return;
22092
22093 case 'H':
22094 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22095 {
22096 output_operand_lossage ("invalid operand for code '%c'", code);
22097 return;
22098 }
22099
22100 asm_fprintf (stream, "%r", REGNO (x) + 1);
22101 return;
22102
22103 case 'J':
22104 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22105 {
22106 output_operand_lossage ("invalid operand for code '%c'", code);
22107 return;
22108 }
22109
22110 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22111 return;
22112
22113 case 'K':
22114 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22115 {
22116 output_operand_lossage ("invalid operand for code '%c'", code);
22117 return;
22118 }
22119
22120 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22121 return;
22122
22123 case 'm':
22124 asm_fprintf (stream, "%r",
22125 REG_P (XEXP (x, 0))
22126 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22127 return;
22128
22129 case 'M':
22130 asm_fprintf (stream, "{%r-%r}",
22131 REGNO (x),
22132 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22133 return;
22134
22135 /* Like 'M', but writing doubleword vector registers, for use by Neon
22136 insns. */
22137 case 'h':
22138 {
22139 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22140 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22141 if (numregs == 1)
22142 asm_fprintf (stream, "{d%d}", regno);
22143 else
22144 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22145 }
22146 return;
22147
22148 case 'd':
22149 /* CONST_TRUE_RTX means always -- that's the default. */
22150 if (x == const_true_rtx)
22151 return;
22152
22153 if (!COMPARISON_P (x))
22154 {
22155 output_operand_lossage ("invalid operand for code '%c'", code);
22156 return;
22157 }
22158
22159 fputs (arm_condition_codes[get_arm_condition_code (x)],
22160 stream);
22161 return;
22162
22163 case 'D':
22164 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22165 want to do that. */
22166 if (x == const_true_rtx)
22167 {
22168 output_operand_lossage ("instruction never executed");
22169 return;
22170 }
22171 if (!COMPARISON_P (x))
22172 {
22173 output_operand_lossage ("invalid operand for code '%c'", code);
22174 return;
22175 }
22176
22177 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22178 (get_arm_condition_code (x))],
22179 stream);
22180 return;
22181
22182 case 's':
22183 case 'V':
22184 case 'W':
22185 case 'X':
22186 case 'Y':
22187 case 'Z':
22188 /* Former Maverick support, removed after GCC-4.7. */
22189 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22190 return;
22191
22192 case 'U':
22193 if (!REG_P (x)
22194 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22195 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22196 /* Bad value for wCG register number. */
22197 {
22198 output_operand_lossage ("invalid operand for code '%c'", code);
22199 return;
22200 }
22201
22202 else
22203 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22204 return;
22205
22206 /* Print an iWMMXt control register name. */
22207 case 'w':
22208 if (!CONST_INT_P (x)
22209 || INTVAL (x) < 0
22210 || INTVAL (x) >= 16)
22211 /* Bad value for wC register number. */
22212 {
22213 output_operand_lossage ("invalid operand for code '%c'", code);
22214 return;
22215 }
22216
22217 else
22218 {
22219 static const char * wc_reg_names [16] =
22220 {
22221 "wCID", "wCon", "wCSSF", "wCASF",
22222 "wC4", "wC5", "wC6", "wC7",
22223 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22224 "wC12", "wC13", "wC14", "wC15"
22225 };
22226
22227 fputs (wc_reg_names [INTVAL (x)], stream);
22228 }
22229 return;
22230
22231 /* Print the high single-precision register of a VFP double-precision
22232 register. */
22233 case 'p':
22234 {
22235 machine_mode mode = GET_MODE (x);
22236 int regno;
22237
22238 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22239 {
22240 output_operand_lossage ("invalid operand for code '%c'", code);
22241 return;
22242 }
22243
22244 regno = REGNO (x);
22245 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22246 {
22247 output_operand_lossage ("invalid operand for code '%c'", code);
22248 return;
22249 }
22250
22251 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22252 }
22253 return;
22254
22255 /* Print a VFP/Neon double precision or quad precision register name. */
22256 case 'P':
22257 case 'q':
22258 {
22259 machine_mode mode = GET_MODE (x);
22260 int is_quad = (code == 'q');
22261 int regno;
22262
22263 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22264 {
22265 output_operand_lossage ("invalid operand for code '%c'", code);
22266 return;
22267 }
22268
22269 if (!REG_P (x)
22270 || !IS_VFP_REGNUM (REGNO (x)))
22271 {
22272 output_operand_lossage ("invalid operand for code '%c'", code);
22273 return;
22274 }
22275
22276 regno = REGNO (x);
22277 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22278 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22279 {
22280 output_operand_lossage ("invalid operand for code '%c'", code);
22281 return;
22282 }
22283
22284 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22285 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22286 }
22287 return;
22288
22289 /* These two codes print the low/high doubleword register of a Neon quad
22290 register, respectively. For pair-structure types, can also print
22291 low/high quadword registers. */
22292 case 'e':
22293 case 'f':
22294 {
22295 machine_mode mode = GET_MODE (x);
22296 int regno;
22297
22298 if ((GET_MODE_SIZE (mode) != 16
22299 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22300 {
22301 output_operand_lossage ("invalid operand for code '%c'", code);
22302 return;
22303 }
22304
22305 regno = REGNO (x);
22306 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22307 {
22308 output_operand_lossage ("invalid operand for code '%c'", code);
22309 return;
22310 }
22311
22312 if (GET_MODE_SIZE (mode) == 16)
22313 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22314 + (code == 'f' ? 1 : 0));
22315 else
22316 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22317 + (code == 'f' ? 1 : 0));
22318 }
22319 return;
22320
22321 /* Print a VFPv3 floating-point constant, represented as an integer
22322 index. */
22323 case 'G':
22324 {
22325 int index = vfp3_const_double_index (x);
22326 gcc_assert (index != -1);
22327 fprintf (stream, "%d", index);
22328 }
22329 return;
22330
22331 /* Print bits representing opcode features for Neon.
22332
22333 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22334 and polynomials as unsigned.
22335
22336 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22337
22338 Bit 2 is 1 for rounding functions, 0 otherwise. */
22339
22340 /* Identify the type as 's', 'u', 'p' or 'f'. */
22341 case 'T':
22342 {
22343 HOST_WIDE_INT bits = INTVAL (x);
22344 fputc ("uspf"[bits & 3], stream);
22345 }
22346 return;
22347
22348 /* Likewise, but signed and unsigned integers are both 'i'. */
22349 case 'F':
22350 {
22351 HOST_WIDE_INT bits = INTVAL (x);
22352 fputc ("iipf"[bits & 3], stream);
22353 }
22354 return;
22355
22356 /* As for 'T', but emit 'u' instead of 'p'. */
22357 case 't':
22358 {
22359 HOST_WIDE_INT bits = INTVAL (x);
22360 fputc ("usuf"[bits & 3], stream);
22361 }
22362 return;
22363
22364 /* Bit 2: rounding (vs none). */
22365 case 'O':
22366 {
22367 HOST_WIDE_INT bits = INTVAL (x);
22368 fputs ((bits & 4) != 0 ? "r" : "", stream);
22369 }
22370 return;
22371
22372 /* Memory operand for vld1/vst1 instruction. */
22373 case 'A':
22374 {
22375 rtx addr;
22376 bool postinc = FALSE;
22377 rtx postinc_reg = NULL;
22378 unsigned align, memsize, align_bits;
22379
22380 gcc_assert (MEM_P (x));
22381 addr = XEXP (x, 0);
22382 if (GET_CODE (addr) == POST_INC)
22383 {
22384 postinc = 1;
22385 addr = XEXP (addr, 0);
22386 }
22387 if (GET_CODE (addr) == POST_MODIFY)
22388 {
22389 postinc_reg = XEXP( XEXP (addr, 1), 1);
22390 addr = XEXP (addr, 0);
22391 }
22392 asm_fprintf (stream, "[%r", REGNO (addr));
22393
22394 /* We know the alignment of this access, so we can emit a hint in the
22395 instruction (for some alignments) as an aid to the memory subsystem
22396 of the target. */
22397 align = MEM_ALIGN (x) >> 3;
22398 memsize = MEM_SIZE (x);
22399
22400 /* Only certain alignment specifiers are supported by the hardware. */
22401 if (memsize == 32 && (align % 32) == 0)
22402 align_bits = 256;
22403 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22404 align_bits = 128;
22405 else if (memsize >= 8 && (align % 8) == 0)
22406 align_bits = 64;
22407 else
22408 align_bits = 0;
22409
22410 if (align_bits != 0)
22411 asm_fprintf (stream, ":%d", align_bits);
22412
22413 asm_fprintf (stream, "]");
22414
22415 if (postinc)
22416 fputs("!", stream);
22417 if (postinc_reg)
22418 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22419 }
22420 return;
22421
22422 case 'C':
22423 {
22424 rtx addr;
22425
22426 gcc_assert (MEM_P (x));
22427 addr = XEXP (x, 0);
22428 gcc_assert (REG_P (addr));
22429 asm_fprintf (stream, "[%r]", REGNO (addr));
22430 }
22431 return;
22432
22433 /* Translate an S register number into a D register number and element index. */
22434 case 'y':
22435 {
22436 machine_mode mode = GET_MODE (x);
22437 int regno;
22438
22439 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22440 {
22441 output_operand_lossage ("invalid operand for code '%c'", code);
22442 return;
22443 }
22444
22445 regno = REGNO (x);
22446 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22447 {
22448 output_operand_lossage ("invalid operand for code '%c'", code);
22449 return;
22450 }
22451
22452 regno = regno - FIRST_VFP_REGNUM;
22453 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22454 }
22455 return;
22456
22457 case 'v':
22458 gcc_assert (CONST_DOUBLE_P (x));
22459 int result;
22460 result = vfp3_const_double_for_fract_bits (x);
22461 if (result == 0)
22462 result = vfp3_const_double_for_bits (x);
22463 fprintf (stream, "#%d", result);
22464 return;
22465
22466 /* Register specifier for vld1.16/vst1.16. Translate the S register
22467 number into a D register number and element index. */
22468 case 'z':
22469 {
22470 machine_mode mode = GET_MODE (x);
22471 int regno;
22472
22473 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22474 {
22475 output_operand_lossage ("invalid operand for code '%c'", code);
22476 return;
22477 }
22478
22479 regno = REGNO (x);
22480 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22481 {
22482 output_operand_lossage ("invalid operand for code '%c'", code);
22483 return;
22484 }
22485
22486 regno = regno - FIRST_VFP_REGNUM;
22487 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22488 }
22489 return;
22490
22491 default:
22492 if (x == 0)
22493 {
22494 output_operand_lossage ("missing operand");
22495 return;
22496 }
22497
22498 switch (GET_CODE (x))
22499 {
22500 case REG:
22501 asm_fprintf (stream, "%r", REGNO (x));
22502 break;
22503
22504 case MEM:
22505 output_address (GET_MODE (x), XEXP (x, 0));
22506 break;
22507
22508 case CONST_DOUBLE:
22509 {
22510 char fpstr[20];
22511 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22512 sizeof (fpstr), 0, 1);
22513 fprintf (stream, "#%s", fpstr);
22514 }
22515 break;
22516
22517 default:
22518 gcc_assert (GET_CODE (x) != NEG);
22519 fputc ('#', stream);
22520 if (GET_CODE (x) == HIGH)
22521 {
22522 fputs (":lower16:", stream);
22523 x = XEXP (x, 0);
22524 }
22525
22526 output_addr_const (stream, x);
22527 break;
22528 }
22529 }
22530 }
22531 \f
22532 /* Target hook for printing a memory address. */
22533 static void
22534 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22535 {
22536 if (TARGET_32BIT)
22537 {
22538 int is_minus = GET_CODE (x) == MINUS;
22539
22540 if (REG_P (x))
22541 asm_fprintf (stream, "[%r]", REGNO (x));
22542 else if (GET_CODE (x) == PLUS || is_minus)
22543 {
22544 rtx base = XEXP (x, 0);
22545 rtx index = XEXP (x, 1);
22546 HOST_WIDE_INT offset = 0;
22547 if (!REG_P (base)
22548 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22549 {
22550 /* Ensure that BASE is a register. */
22551 /* (one of them must be). */
22552 /* Also ensure the SP is not used as in index register. */
22553 std::swap (base, index);
22554 }
22555 switch (GET_CODE (index))
22556 {
22557 case CONST_INT:
22558 offset = INTVAL (index);
22559 if (is_minus)
22560 offset = -offset;
22561 asm_fprintf (stream, "[%r, #%wd]",
22562 REGNO (base), offset);
22563 break;
22564
22565 case REG:
22566 asm_fprintf (stream, "[%r, %s%r]",
22567 REGNO (base), is_minus ? "-" : "",
22568 REGNO (index));
22569 break;
22570
22571 case MULT:
22572 case ASHIFTRT:
22573 case LSHIFTRT:
22574 case ASHIFT:
22575 case ROTATERT:
22576 {
22577 asm_fprintf (stream, "[%r, %s%r",
22578 REGNO (base), is_minus ? "-" : "",
22579 REGNO (XEXP (index, 0)));
22580 arm_print_operand (stream, index, 'S');
22581 fputs ("]", stream);
22582 break;
22583 }
22584
22585 default:
22586 gcc_unreachable ();
22587 }
22588 }
22589 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22590 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22591 {
22592 gcc_assert (REG_P (XEXP (x, 0)));
22593
22594 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22595 asm_fprintf (stream, "[%r, #%s%d]!",
22596 REGNO (XEXP (x, 0)),
22597 GET_CODE (x) == PRE_DEC ? "-" : "",
22598 GET_MODE_SIZE (mode));
22599 else
22600 asm_fprintf (stream, "[%r], #%s%d",
22601 REGNO (XEXP (x, 0)),
22602 GET_CODE (x) == POST_DEC ? "-" : "",
22603 GET_MODE_SIZE (mode));
22604 }
22605 else if (GET_CODE (x) == PRE_MODIFY)
22606 {
22607 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22608 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22609 asm_fprintf (stream, "#%wd]!",
22610 INTVAL (XEXP (XEXP (x, 1), 1)));
22611 else
22612 asm_fprintf (stream, "%r]!",
22613 REGNO (XEXP (XEXP (x, 1), 1)));
22614 }
22615 else if (GET_CODE (x) == POST_MODIFY)
22616 {
22617 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22618 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22619 asm_fprintf (stream, "#%wd",
22620 INTVAL (XEXP (XEXP (x, 1), 1)));
22621 else
22622 asm_fprintf (stream, "%r",
22623 REGNO (XEXP (XEXP (x, 1), 1)));
22624 }
22625 else output_addr_const (stream, x);
22626 }
22627 else
22628 {
22629 if (REG_P (x))
22630 asm_fprintf (stream, "[%r]", REGNO (x));
22631 else if (GET_CODE (x) == POST_INC)
22632 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22633 else if (GET_CODE (x) == PLUS)
22634 {
22635 gcc_assert (REG_P (XEXP (x, 0)));
22636 if (CONST_INT_P (XEXP (x, 1)))
22637 asm_fprintf (stream, "[%r, #%wd]",
22638 REGNO (XEXP (x, 0)),
22639 INTVAL (XEXP (x, 1)));
22640 else
22641 asm_fprintf (stream, "[%r, %r]",
22642 REGNO (XEXP (x, 0)),
22643 REGNO (XEXP (x, 1)));
22644 }
22645 else
22646 output_addr_const (stream, x);
22647 }
22648 }
22649 \f
22650 /* Target hook for indicating whether a punctuation character for
22651 TARGET_PRINT_OPERAND is valid. */
22652 static bool
22653 arm_print_operand_punct_valid_p (unsigned char code)
22654 {
22655 return (code == '@' || code == '|' || code == '.'
22656 || code == '(' || code == ')' || code == '#'
22657 || (TARGET_32BIT && (code == '?'))
22658 || (TARGET_THUMB2 && (code == '!'))
22659 || (TARGET_THUMB && (code == '_')));
22660 }
22661 \f
22662 /* Target hook for assembling integer objects. The ARM version needs to
22663 handle word-sized values specially. */
22664 static bool
22665 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22666 {
22667 machine_mode mode;
22668
22669 if (size == UNITS_PER_WORD && aligned_p)
22670 {
22671 fputs ("\t.word\t", asm_out_file);
22672 output_addr_const (asm_out_file, x);
22673
22674 /* Mark symbols as position independent. We only do this in the
22675 .text segment, not in the .data segment. */
22676 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22677 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22678 {
22679 /* See legitimize_pic_address for an explanation of the
22680 TARGET_VXWORKS_RTP check. */
22681 /* References to weak symbols cannot be resolved locally:
22682 they may be overridden by a non-weak definition at link
22683 time. */
22684 if (!arm_pic_data_is_text_relative
22685 || (GET_CODE (x) == SYMBOL_REF
22686 && (!SYMBOL_REF_LOCAL_P (x)
22687 || (SYMBOL_REF_DECL (x)
22688 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22689 fputs ("(GOT)", asm_out_file);
22690 else
22691 fputs ("(GOTOFF)", asm_out_file);
22692 }
22693 fputc ('\n', asm_out_file);
22694 return true;
22695 }
22696
22697 mode = GET_MODE (x);
22698
22699 if (arm_vector_mode_supported_p (mode))
22700 {
22701 int i, units;
22702
22703 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22704
22705 units = CONST_VECTOR_NUNITS (x);
22706 size = GET_MODE_UNIT_SIZE (mode);
22707
22708 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22709 for (i = 0; i < units; i++)
22710 {
22711 rtx elt = CONST_VECTOR_ELT (x, i);
22712 assemble_integer
22713 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22714 }
22715 else
22716 for (i = 0; i < units; i++)
22717 {
22718 rtx elt = CONST_VECTOR_ELT (x, i);
22719 assemble_real
22720 (*CONST_DOUBLE_REAL_VALUE (elt),
22721 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22722 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22723 }
22724
22725 return true;
22726 }
22727
22728 return default_assemble_integer (x, size, aligned_p);
22729 }
22730
22731 static void
22732 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22733 {
22734 section *s;
22735
22736 if (!TARGET_AAPCS_BASED)
22737 {
22738 (is_ctor ?
22739 default_named_section_asm_out_constructor
22740 : default_named_section_asm_out_destructor) (symbol, priority);
22741 return;
22742 }
22743
22744 /* Put these in the .init_array section, using a special relocation. */
22745 if (priority != DEFAULT_INIT_PRIORITY)
22746 {
22747 char buf[18];
22748 sprintf (buf, "%s.%.5u",
22749 is_ctor ? ".init_array" : ".fini_array",
22750 priority);
22751 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22752 }
22753 else if (is_ctor)
22754 s = ctors_section;
22755 else
22756 s = dtors_section;
22757
22758 switch_to_section (s);
22759 assemble_align (POINTER_SIZE);
22760 fputs ("\t.word\t", asm_out_file);
22761 output_addr_const (asm_out_file, symbol);
22762 fputs ("(target1)\n", asm_out_file);
22763 }
22764
22765 /* Add a function to the list of static constructors. */
22766
22767 static void
22768 arm_elf_asm_constructor (rtx symbol, int priority)
22769 {
22770 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22771 }
22772
22773 /* Add a function to the list of static destructors. */
22774
22775 static void
22776 arm_elf_asm_destructor (rtx symbol, int priority)
22777 {
22778 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22779 }
22780 \f
22781 /* A finite state machine takes care of noticing whether or not instructions
22782 can be conditionally executed, and thus decrease execution time and code
22783 size by deleting branch instructions. The fsm is controlled by
22784 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22785
22786 /* The state of the fsm controlling condition codes are:
22787 0: normal, do nothing special
22788 1: make ASM_OUTPUT_OPCODE not output this instruction
22789 2: make ASM_OUTPUT_OPCODE not output this instruction
22790 3: make instructions conditional
22791 4: make instructions conditional
22792
22793 State transitions (state->state by whom under condition):
22794 0 -> 1 final_prescan_insn if the `target' is a label
22795 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22796 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22797 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22798 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22799 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22800 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22801 (the target insn is arm_target_insn).
22802
22803 If the jump clobbers the conditions then we use states 2 and 4.
22804
22805 A similar thing can be done with conditional return insns.
22806
22807 XXX In case the `target' is an unconditional branch, this conditionalising
22808 of the instructions always reduces code size, but not always execution
22809 time. But then, I want to reduce the code size to somewhere near what
22810 /bin/cc produces. */
22811
22812 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22813 instructions. When a COND_EXEC instruction is seen the subsequent
22814 instructions are scanned so that multiple conditional instructions can be
22815 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22816 specify the length and true/false mask for the IT block. These will be
22817 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22818
22819 /* Returns the index of the ARM condition code string in
22820 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22821 COMPARISON should be an rtx like `(eq (...) (...))'. */
22822
22823 enum arm_cond_code
22824 maybe_get_arm_condition_code (rtx comparison)
22825 {
22826 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22827 enum arm_cond_code code;
22828 enum rtx_code comp_code = GET_CODE (comparison);
22829
22830 if (GET_MODE_CLASS (mode) != MODE_CC)
22831 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22832 XEXP (comparison, 1));
22833
22834 switch (mode)
22835 {
22836 case E_CC_DNEmode: code = ARM_NE; goto dominance;
22837 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
22838 case E_CC_DGEmode: code = ARM_GE; goto dominance;
22839 case E_CC_DGTmode: code = ARM_GT; goto dominance;
22840 case E_CC_DLEmode: code = ARM_LE; goto dominance;
22841 case E_CC_DLTmode: code = ARM_LT; goto dominance;
22842 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
22843 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
22844 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
22845 case E_CC_DLTUmode: code = ARM_CC;
22846
22847 dominance:
22848 if (comp_code == EQ)
22849 return ARM_INVERSE_CONDITION_CODE (code);
22850 if (comp_code == NE)
22851 return code;
22852 return ARM_NV;
22853
22854 case E_CC_NOOVmode:
22855 switch (comp_code)
22856 {
22857 case NE: return ARM_NE;
22858 case EQ: return ARM_EQ;
22859 case GE: return ARM_PL;
22860 case LT: return ARM_MI;
22861 default: return ARM_NV;
22862 }
22863
22864 case E_CC_Zmode:
22865 switch (comp_code)
22866 {
22867 case NE: return ARM_NE;
22868 case EQ: return ARM_EQ;
22869 default: return ARM_NV;
22870 }
22871
22872 case E_CC_Nmode:
22873 switch (comp_code)
22874 {
22875 case NE: return ARM_MI;
22876 case EQ: return ARM_PL;
22877 default: return ARM_NV;
22878 }
22879
22880 case E_CCFPEmode:
22881 case E_CCFPmode:
22882 /* We can handle all cases except UNEQ and LTGT. */
22883 switch (comp_code)
22884 {
22885 case GE: return ARM_GE;
22886 case GT: return ARM_GT;
22887 case LE: return ARM_LS;
22888 case LT: return ARM_MI;
22889 case NE: return ARM_NE;
22890 case EQ: return ARM_EQ;
22891 case ORDERED: return ARM_VC;
22892 case UNORDERED: return ARM_VS;
22893 case UNLT: return ARM_LT;
22894 case UNLE: return ARM_LE;
22895 case UNGT: return ARM_HI;
22896 case UNGE: return ARM_PL;
22897 /* UNEQ and LTGT do not have a representation. */
22898 case UNEQ: /* Fall through. */
22899 case LTGT: /* Fall through. */
22900 default: return ARM_NV;
22901 }
22902
22903 case E_CC_SWPmode:
22904 switch (comp_code)
22905 {
22906 case NE: return ARM_NE;
22907 case EQ: return ARM_EQ;
22908 case GE: return ARM_LE;
22909 case GT: return ARM_LT;
22910 case LE: return ARM_GE;
22911 case LT: return ARM_GT;
22912 case GEU: return ARM_LS;
22913 case GTU: return ARM_CC;
22914 case LEU: return ARM_CS;
22915 case LTU: return ARM_HI;
22916 default: return ARM_NV;
22917 }
22918
22919 case E_CC_Cmode:
22920 switch (comp_code)
22921 {
22922 case LTU: return ARM_CS;
22923 case GEU: return ARM_CC;
22924 case NE: return ARM_CS;
22925 case EQ: return ARM_CC;
22926 default: return ARM_NV;
22927 }
22928
22929 case E_CC_CZmode:
22930 switch (comp_code)
22931 {
22932 case NE: return ARM_NE;
22933 case EQ: return ARM_EQ;
22934 case GEU: return ARM_CS;
22935 case GTU: return ARM_HI;
22936 case LEU: return ARM_LS;
22937 case LTU: return ARM_CC;
22938 default: return ARM_NV;
22939 }
22940
22941 case E_CC_NCVmode:
22942 switch (comp_code)
22943 {
22944 case GE: return ARM_GE;
22945 case LT: return ARM_LT;
22946 case GEU: return ARM_CS;
22947 case LTU: return ARM_CC;
22948 default: return ARM_NV;
22949 }
22950
22951 case E_CC_Vmode:
22952 switch (comp_code)
22953 {
22954 case NE: return ARM_VS;
22955 case EQ: return ARM_VC;
22956 default: return ARM_NV;
22957 }
22958
22959 case E_CCmode:
22960 switch (comp_code)
22961 {
22962 case NE: return ARM_NE;
22963 case EQ: return ARM_EQ;
22964 case GE: return ARM_GE;
22965 case GT: return ARM_GT;
22966 case LE: return ARM_LE;
22967 case LT: return ARM_LT;
22968 case GEU: return ARM_CS;
22969 case GTU: return ARM_HI;
22970 case LEU: return ARM_LS;
22971 case LTU: return ARM_CC;
22972 default: return ARM_NV;
22973 }
22974
22975 default: gcc_unreachable ();
22976 }
22977 }
22978
22979 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22980 static enum arm_cond_code
22981 get_arm_condition_code (rtx comparison)
22982 {
22983 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22984 gcc_assert (code != ARM_NV);
22985 return code;
22986 }
22987
22988 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
22989 code registers when not targetting Thumb1. The VFP condition register
22990 only exists when generating hard-float code. */
22991 static bool
22992 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22993 {
22994 if (!TARGET_32BIT)
22995 return false;
22996
22997 *p1 = CC_REGNUM;
22998 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
22999 return true;
23000 }
23001
23002 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23003 instructions. */
23004 void
23005 thumb2_final_prescan_insn (rtx_insn *insn)
23006 {
23007 rtx_insn *first_insn = insn;
23008 rtx body = PATTERN (insn);
23009 rtx predicate;
23010 enum arm_cond_code code;
23011 int n;
23012 int mask;
23013 int max;
23014
23015 /* max_insns_skipped in the tune was already taken into account in the
23016 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23017 just emit the IT blocks as we can. It does not make sense to split
23018 the IT blocks. */
23019 max = MAX_INSN_PER_IT_BLOCK;
23020
23021 /* Remove the previous insn from the count of insns to be output. */
23022 if (arm_condexec_count)
23023 arm_condexec_count--;
23024
23025 /* Nothing to do if we are already inside a conditional block. */
23026 if (arm_condexec_count)
23027 return;
23028
23029 if (GET_CODE (body) != COND_EXEC)
23030 return;
23031
23032 /* Conditional jumps are implemented directly. */
23033 if (JUMP_P (insn))
23034 return;
23035
23036 predicate = COND_EXEC_TEST (body);
23037 arm_current_cc = get_arm_condition_code (predicate);
23038
23039 n = get_attr_ce_count (insn);
23040 arm_condexec_count = 1;
23041 arm_condexec_mask = (1 << n) - 1;
23042 arm_condexec_masklen = n;
23043 /* See if subsequent instructions can be combined into the same block. */
23044 for (;;)
23045 {
23046 insn = next_nonnote_insn (insn);
23047
23048 /* Jumping into the middle of an IT block is illegal, so a label or
23049 barrier terminates the block. */
23050 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23051 break;
23052
23053 body = PATTERN (insn);
23054 /* USE and CLOBBER aren't really insns, so just skip them. */
23055 if (GET_CODE (body) == USE
23056 || GET_CODE (body) == CLOBBER)
23057 continue;
23058
23059 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23060 if (GET_CODE (body) != COND_EXEC)
23061 break;
23062 /* Maximum number of conditionally executed instructions in a block. */
23063 n = get_attr_ce_count (insn);
23064 if (arm_condexec_masklen + n > max)
23065 break;
23066
23067 predicate = COND_EXEC_TEST (body);
23068 code = get_arm_condition_code (predicate);
23069 mask = (1 << n) - 1;
23070 if (arm_current_cc == code)
23071 arm_condexec_mask |= (mask << arm_condexec_masklen);
23072 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23073 break;
23074
23075 arm_condexec_count++;
23076 arm_condexec_masklen += n;
23077
23078 /* A jump must be the last instruction in a conditional block. */
23079 if (JUMP_P (insn))
23080 break;
23081 }
23082 /* Restore recog_data (getting the attributes of other insns can
23083 destroy this array, but final.c assumes that it remains intact
23084 across this call). */
23085 extract_constrain_insn_cached (first_insn);
23086 }
23087
23088 void
23089 arm_final_prescan_insn (rtx_insn *insn)
23090 {
23091 /* BODY will hold the body of INSN. */
23092 rtx body = PATTERN (insn);
23093
23094 /* This will be 1 if trying to repeat the trick, and things need to be
23095 reversed if it appears to fail. */
23096 int reverse = 0;
23097
23098 /* If we start with a return insn, we only succeed if we find another one. */
23099 int seeking_return = 0;
23100 enum rtx_code return_code = UNKNOWN;
23101
23102 /* START_INSN will hold the insn from where we start looking. This is the
23103 first insn after the following code_label if REVERSE is true. */
23104 rtx_insn *start_insn = insn;
23105
23106 /* If in state 4, check if the target branch is reached, in order to
23107 change back to state 0. */
23108 if (arm_ccfsm_state == 4)
23109 {
23110 if (insn == arm_target_insn)
23111 {
23112 arm_target_insn = NULL;
23113 arm_ccfsm_state = 0;
23114 }
23115 return;
23116 }
23117
23118 /* If in state 3, it is possible to repeat the trick, if this insn is an
23119 unconditional branch to a label, and immediately following this branch
23120 is the previous target label which is only used once, and the label this
23121 branch jumps to is not too far off. */
23122 if (arm_ccfsm_state == 3)
23123 {
23124 if (simplejump_p (insn))
23125 {
23126 start_insn = next_nonnote_insn (start_insn);
23127 if (BARRIER_P (start_insn))
23128 {
23129 /* XXX Isn't this always a barrier? */
23130 start_insn = next_nonnote_insn (start_insn);
23131 }
23132 if (LABEL_P (start_insn)
23133 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23134 && LABEL_NUSES (start_insn) == 1)
23135 reverse = TRUE;
23136 else
23137 return;
23138 }
23139 else if (ANY_RETURN_P (body))
23140 {
23141 start_insn = next_nonnote_insn (start_insn);
23142 if (BARRIER_P (start_insn))
23143 start_insn = next_nonnote_insn (start_insn);
23144 if (LABEL_P (start_insn)
23145 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23146 && LABEL_NUSES (start_insn) == 1)
23147 {
23148 reverse = TRUE;
23149 seeking_return = 1;
23150 return_code = GET_CODE (body);
23151 }
23152 else
23153 return;
23154 }
23155 else
23156 return;
23157 }
23158
23159 gcc_assert (!arm_ccfsm_state || reverse);
23160 if (!JUMP_P (insn))
23161 return;
23162
23163 /* This jump might be paralleled with a clobber of the condition codes
23164 the jump should always come first */
23165 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23166 body = XVECEXP (body, 0, 0);
23167
23168 if (reverse
23169 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23170 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23171 {
23172 int insns_skipped;
23173 int fail = FALSE, succeed = FALSE;
23174 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23175 int then_not_else = TRUE;
23176 rtx_insn *this_insn = start_insn;
23177 rtx label = 0;
23178
23179 /* Register the insn jumped to. */
23180 if (reverse)
23181 {
23182 if (!seeking_return)
23183 label = XEXP (SET_SRC (body), 0);
23184 }
23185 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23186 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23187 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23188 {
23189 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23190 then_not_else = FALSE;
23191 }
23192 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23193 {
23194 seeking_return = 1;
23195 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23196 }
23197 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23198 {
23199 seeking_return = 1;
23200 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23201 then_not_else = FALSE;
23202 }
23203 else
23204 gcc_unreachable ();
23205
23206 /* See how many insns this branch skips, and what kind of insns. If all
23207 insns are okay, and the label or unconditional branch to the same
23208 label is not too far away, succeed. */
23209 for (insns_skipped = 0;
23210 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23211 {
23212 rtx scanbody;
23213
23214 this_insn = next_nonnote_insn (this_insn);
23215 if (!this_insn)
23216 break;
23217
23218 switch (GET_CODE (this_insn))
23219 {
23220 case CODE_LABEL:
23221 /* Succeed if it is the target label, otherwise fail since
23222 control falls in from somewhere else. */
23223 if (this_insn == label)
23224 {
23225 arm_ccfsm_state = 1;
23226 succeed = TRUE;
23227 }
23228 else
23229 fail = TRUE;
23230 break;
23231
23232 case BARRIER:
23233 /* Succeed if the following insn is the target label.
23234 Otherwise fail.
23235 If return insns are used then the last insn in a function
23236 will be a barrier. */
23237 this_insn = next_nonnote_insn (this_insn);
23238 if (this_insn && this_insn == label)
23239 {
23240 arm_ccfsm_state = 1;
23241 succeed = TRUE;
23242 }
23243 else
23244 fail = TRUE;
23245 break;
23246
23247 case CALL_INSN:
23248 /* The AAPCS says that conditional calls should not be
23249 used since they make interworking inefficient (the
23250 linker can't transform BL<cond> into BLX). That's
23251 only a problem if the machine has BLX. */
23252 if (arm_arch5)
23253 {
23254 fail = TRUE;
23255 break;
23256 }
23257
23258 /* Succeed if the following insn is the target label, or
23259 if the following two insns are a barrier and the
23260 target label. */
23261 this_insn = next_nonnote_insn (this_insn);
23262 if (this_insn && BARRIER_P (this_insn))
23263 this_insn = next_nonnote_insn (this_insn);
23264
23265 if (this_insn && this_insn == label
23266 && insns_skipped < max_insns_skipped)
23267 {
23268 arm_ccfsm_state = 1;
23269 succeed = TRUE;
23270 }
23271 else
23272 fail = TRUE;
23273 break;
23274
23275 case JUMP_INSN:
23276 /* If this is an unconditional branch to the same label, succeed.
23277 If it is to another label, do nothing. If it is conditional,
23278 fail. */
23279 /* XXX Probably, the tests for SET and the PC are
23280 unnecessary. */
23281
23282 scanbody = PATTERN (this_insn);
23283 if (GET_CODE (scanbody) == SET
23284 && GET_CODE (SET_DEST (scanbody)) == PC)
23285 {
23286 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23287 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23288 {
23289 arm_ccfsm_state = 2;
23290 succeed = TRUE;
23291 }
23292 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23293 fail = TRUE;
23294 }
23295 /* Fail if a conditional return is undesirable (e.g. on a
23296 StrongARM), but still allow this if optimizing for size. */
23297 else if (GET_CODE (scanbody) == return_code
23298 && !use_return_insn (TRUE, NULL)
23299 && !optimize_size)
23300 fail = TRUE;
23301 else if (GET_CODE (scanbody) == return_code)
23302 {
23303 arm_ccfsm_state = 2;
23304 succeed = TRUE;
23305 }
23306 else if (GET_CODE (scanbody) == PARALLEL)
23307 {
23308 switch (get_attr_conds (this_insn))
23309 {
23310 case CONDS_NOCOND:
23311 break;
23312 default:
23313 fail = TRUE;
23314 break;
23315 }
23316 }
23317 else
23318 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23319
23320 break;
23321
23322 case INSN:
23323 /* Instructions using or affecting the condition codes make it
23324 fail. */
23325 scanbody = PATTERN (this_insn);
23326 if (!(GET_CODE (scanbody) == SET
23327 || GET_CODE (scanbody) == PARALLEL)
23328 || get_attr_conds (this_insn) != CONDS_NOCOND)
23329 fail = TRUE;
23330 break;
23331
23332 default:
23333 break;
23334 }
23335 }
23336 if (succeed)
23337 {
23338 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23339 arm_target_label = CODE_LABEL_NUMBER (label);
23340 else
23341 {
23342 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23343
23344 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23345 {
23346 this_insn = next_nonnote_insn (this_insn);
23347 gcc_assert (!this_insn
23348 || (!BARRIER_P (this_insn)
23349 && !LABEL_P (this_insn)));
23350 }
23351 if (!this_insn)
23352 {
23353 /* Oh, dear! we ran off the end.. give up. */
23354 extract_constrain_insn_cached (insn);
23355 arm_ccfsm_state = 0;
23356 arm_target_insn = NULL;
23357 return;
23358 }
23359 arm_target_insn = this_insn;
23360 }
23361
23362 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23363 what it was. */
23364 if (!reverse)
23365 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23366
23367 if (reverse || then_not_else)
23368 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23369 }
23370
23371 /* Restore recog_data (getting the attributes of other insns can
23372 destroy this array, but final.c assumes that it remains intact
23373 across this call. */
23374 extract_constrain_insn_cached (insn);
23375 }
23376 }
23377
23378 /* Output IT instructions. */
23379 void
23380 thumb2_asm_output_opcode (FILE * stream)
23381 {
23382 char buff[5];
23383 int n;
23384
23385 if (arm_condexec_mask)
23386 {
23387 for (n = 0; n < arm_condexec_masklen; n++)
23388 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23389 buff[n] = 0;
23390 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23391 arm_condition_codes[arm_current_cc]);
23392 arm_condexec_mask = 0;
23393 }
23394 }
23395
23396 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23397 UNITS_PER_WORD bytes wide. */
23398 static unsigned int
23399 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23400 {
23401 if (TARGET_32BIT
23402 && regno > PC_REGNUM
23403 && regno != FRAME_POINTER_REGNUM
23404 && regno != ARG_POINTER_REGNUM
23405 && !IS_VFP_REGNUM (regno))
23406 return 1;
23407
23408 return ARM_NUM_REGS (mode);
23409 }
23410
23411 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23412 static bool
23413 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23414 {
23415 if (GET_MODE_CLASS (mode) == MODE_CC)
23416 return (regno == CC_REGNUM
23417 || (TARGET_HARD_FLOAT
23418 && regno == VFPCC_REGNUM));
23419
23420 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23421 return false;
23422
23423 if (TARGET_THUMB1)
23424 /* For the Thumb we only allow values bigger than SImode in
23425 registers 0 - 6, so that there is always a second low
23426 register available to hold the upper part of the value.
23427 We probably we ought to ensure that the register is the
23428 start of an even numbered register pair. */
23429 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23430
23431 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23432 {
23433 if (mode == SFmode || mode == SImode)
23434 return VFP_REGNO_OK_FOR_SINGLE (regno);
23435
23436 if (mode == DFmode)
23437 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23438
23439 if (mode == HFmode)
23440 return VFP_REGNO_OK_FOR_SINGLE (regno);
23441
23442 /* VFP registers can hold HImode values. */
23443 if (mode == HImode)
23444 return VFP_REGNO_OK_FOR_SINGLE (regno);
23445
23446 if (TARGET_NEON)
23447 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23448 || (VALID_NEON_QREG_MODE (mode)
23449 && NEON_REGNO_OK_FOR_QUAD (regno))
23450 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23451 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23452 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23453 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23454 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23455
23456 return false;
23457 }
23458
23459 if (TARGET_REALLY_IWMMXT)
23460 {
23461 if (IS_IWMMXT_GR_REGNUM (regno))
23462 return mode == SImode;
23463
23464 if (IS_IWMMXT_REGNUM (regno))
23465 return VALID_IWMMXT_REG_MODE (mode);
23466 }
23467
23468 /* We allow almost any value to be stored in the general registers.
23469 Restrict doubleword quantities to even register pairs in ARM state
23470 so that we can use ldrd. Do not allow very large Neon structure
23471 opaque modes in general registers; they would use too many. */
23472 if (regno <= LAST_ARM_REGNUM)
23473 {
23474 if (ARM_NUM_REGS (mode) > 4)
23475 return false;
23476
23477 if (TARGET_THUMB2)
23478 return true;
23479
23480 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23481 }
23482
23483 if (regno == FRAME_POINTER_REGNUM
23484 || regno == ARG_POINTER_REGNUM)
23485 /* We only allow integers in the fake hard registers. */
23486 return GET_MODE_CLASS (mode) == MODE_INT;
23487
23488 return false;
23489 }
23490
23491 /* Implement TARGET_MODES_TIEABLE_P. */
23492
23493 static bool
23494 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23495 {
23496 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23497 return true;
23498
23499 /* We specifically want to allow elements of "structure" modes to
23500 be tieable to the structure. This more general condition allows
23501 other rarer situations too. */
23502 if (TARGET_NEON
23503 && (VALID_NEON_DREG_MODE (mode1)
23504 || VALID_NEON_QREG_MODE (mode1)
23505 || VALID_NEON_STRUCT_MODE (mode1))
23506 && (VALID_NEON_DREG_MODE (mode2)
23507 || VALID_NEON_QREG_MODE (mode2)
23508 || VALID_NEON_STRUCT_MODE (mode2)))
23509 return true;
23510
23511 return false;
23512 }
23513
23514 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23515 not used in arm mode. */
23516
23517 enum reg_class
23518 arm_regno_class (int regno)
23519 {
23520 if (regno == PC_REGNUM)
23521 return NO_REGS;
23522
23523 if (TARGET_THUMB1)
23524 {
23525 if (regno == STACK_POINTER_REGNUM)
23526 return STACK_REG;
23527 if (regno == CC_REGNUM)
23528 return CC_REG;
23529 if (regno < 8)
23530 return LO_REGS;
23531 return HI_REGS;
23532 }
23533
23534 if (TARGET_THUMB2 && regno < 8)
23535 return LO_REGS;
23536
23537 if ( regno <= LAST_ARM_REGNUM
23538 || regno == FRAME_POINTER_REGNUM
23539 || regno == ARG_POINTER_REGNUM)
23540 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23541
23542 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23543 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23544
23545 if (IS_VFP_REGNUM (regno))
23546 {
23547 if (regno <= D7_VFP_REGNUM)
23548 return VFP_D0_D7_REGS;
23549 else if (regno <= LAST_LO_VFP_REGNUM)
23550 return VFP_LO_REGS;
23551 else
23552 return VFP_HI_REGS;
23553 }
23554
23555 if (IS_IWMMXT_REGNUM (regno))
23556 return IWMMXT_REGS;
23557
23558 if (IS_IWMMXT_GR_REGNUM (regno))
23559 return IWMMXT_GR_REGS;
23560
23561 return NO_REGS;
23562 }
23563
23564 /* Handle a special case when computing the offset
23565 of an argument from the frame pointer. */
23566 int
23567 arm_debugger_arg_offset (int value, rtx addr)
23568 {
23569 rtx_insn *insn;
23570
23571 /* We are only interested if dbxout_parms() failed to compute the offset. */
23572 if (value != 0)
23573 return 0;
23574
23575 /* We can only cope with the case where the address is held in a register. */
23576 if (!REG_P (addr))
23577 return 0;
23578
23579 /* If we are using the frame pointer to point at the argument, then
23580 an offset of 0 is correct. */
23581 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23582 return 0;
23583
23584 /* If we are using the stack pointer to point at the
23585 argument, then an offset of 0 is correct. */
23586 /* ??? Check this is consistent with thumb2 frame layout. */
23587 if ((TARGET_THUMB || !frame_pointer_needed)
23588 && REGNO (addr) == SP_REGNUM)
23589 return 0;
23590
23591 /* Oh dear. The argument is pointed to by a register rather
23592 than being held in a register, or being stored at a known
23593 offset from the frame pointer. Since GDB only understands
23594 those two kinds of argument we must translate the address
23595 held in the register into an offset from the frame pointer.
23596 We do this by searching through the insns for the function
23597 looking to see where this register gets its value. If the
23598 register is initialized from the frame pointer plus an offset
23599 then we are in luck and we can continue, otherwise we give up.
23600
23601 This code is exercised by producing debugging information
23602 for a function with arguments like this:
23603
23604 double func (double a, double b, int c, double d) {return d;}
23605
23606 Without this code the stab for parameter 'd' will be set to
23607 an offset of 0 from the frame pointer, rather than 8. */
23608
23609 /* The if() statement says:
23610
23611 If the insn is a normal instruction
23612 and if the insn is setting the value in a register
23613 and if the register being set is the register holding the address of the argument
23614 and if the address is computing by an addition
23615 that involves adding to a register
23616 which is the frame pointer
23617 a constant integer
23618
23619 then... */
23620
23621 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23622 {
23623 if ( NONJUMP_INSN_P (insn)
23624 && GET_CODE (PATTERN (insn)) == SET
23625 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23626 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23627 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23628 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23629 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23630 )
23631 {
23632 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23633
23634 break;
23635 }
23636 }
23637
23638 if (value == 0)
23639 {
23640 debug_rtx (addr);
23641 warning (0, "unable to compute real location of stacked parameter");
23642 value = 8; /* XXX magic hack */
23643 }
23644
23645 return value;
23646 }
23647 \f
23648 /* Implement TARGET_PROMOTED_TYPE. */
23649
23650 static tree
23651 arm_promoted_type (const_tree t)
23652 {
23653 if (SCALAR_FLOAT_TYPE_P (t)
23654 && TYPE_PRECISION (t) == 16
23655 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23656 return float_type_node;
23657 return NULL_TREE;
23658 }
23659
23660 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23661 This simply adds HFmode as a supported mode; even though we don't
23662 implement arithmetic on this type directly, it's supported by
23663 optabs conversions, much the way the double-word arithmetic is
23664 special-cased in the default hook. */
23665
23666 static bool
23667 arm_scalar_mode_supported_p (scalar_mode mode)
23668 {
23669 if (mode == HFmode)
23670 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23671 else if (ALL_FIXED_POINT_MODE_P (mode))
23672 return true;
23673 else
23674 return default_scalar_mode_supported_p (mode);
23675 }
23676
23677 /* Set the value of FLT_EVAL_METHOD.
23678 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23679
23680 0: evaluate all operations and constants, whose semantic type has at
23681 most the range and precision of type float, to the range and
23682 precision of float; evaluate all other operations and constants to
23683 the range and precision of the semantic type;
23684
23685 N, where _FloatN is a supported interchange floating type
23686 evaluate all operations and constants, whose semantic type has at
23687 most the range and precision of _FloatN type, to the range and
23688 precision of the _FloatN type; evaluate all other operations and
23689 constants to the range and precision of the semantic type;
23690
23691 If we have the ARMv8.2-A extensions then we support _Float16 in native
23692 precision, so we should set this to 16. Otherwise, we support the type,
23693 but want to evaluate expressions in float precision, so set this to
23694 0. */
23695
23696 static enum flt_eval_method
23697 arm_excess_precision (enum excess_precision_type type)
23698 {
23699 switch (type)
23700 {
23701 case EXCESS_PRECISION_TYPE_FAST:
23702 case EXCESS_PRECISION_TYPE_STANDARD:
23703 /* We can calculate either in 16-bit range and precision or
23704 32-bit range and precision. Make that decision based on whether
23705 we have native support for the ARMv8.2-A 16-bit floating-point
23706 instructions or not. */
23707 return (TARGET_VFP_FP16INST
23708 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23709 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23710 case EXCESS_PRECISION_TYPE_IMPLICIT:
23711 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23712 default:
23713 gcc_unreachable ();
23714 }
23715 return FLT_EVAL_METHOD_UNPREDICTABLE;
23716 }
23717
23718
23719 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23720 _Float16 if we are using anything other than ieee format for 16-bit
23721 floating point. Otherwise, punt to the default implementation. */
23722 static opt_scalar_float_mode
23723 arm_floatn_mode (int n, bool extended)
23724 {
23725 if (!extended && n == 16)
23726 {
23727 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23728 return HFmode;
23729 return opt_scalar_float_mode ();
23730 }
23731
23732 return default_floatn_mode (n, extended);
23733 }
23734
23735
23736 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23737 not to early-clobber SRC registers in the process.
23738
23739 We assume that the operands described by SRC and DEST represent a
23740 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23741 number of components into which the copy has been decomposed. */
23742 void
23743 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23744 {
23745 unsigned int i;
23746
23747 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23748 || REGNO (operands[0]) < REGNO (operands[1]))
23749 {
23750 for (i = 0; i < count; i++)
23751 {
23752 operands[2 * i] = dest[i];
23753 operands[2 * i + 1] = src[i];
23754 }
23755 }
23756 else
23757 {
23758 for (i = 0; i < count; i++)
23759 {
23760 operands[2 * i] = dest[count - i - 1];
23761 operands[2 * i + 1] = src[count - i - 1];
23762 }
23763 }
23764 }
23765
23766 /* Split operands into moves from op[1] + op[2] into op[0]. */
23767
23768 void
23769 neon_split_vcombine (rtx operands[3])
23770 {
23771 unsigned int dest = REGNO (operands[0]);
23772 unsigned int src1 = REGNO (operands[1]);
23773 unsigned int src2 = REGNO (operands[2]);
23774 machine_mode halfmode = GET_MODE (operands[1]);
23775 unsigned int halfregs = REG_NREGS (operands[1]);
23776 rtx destlo, desthi;
23777
23778 if (src1 == dest && src2 == dest + halfregs)
23779 {
23780 /* No-op move. Can't split to nothing; emit something. */
23781 emit_note (NOTE_INSN_DELETED);
23782 return;
23783 }
23784
23785 /* Preserve register attributes for variable tracking. */
23786 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23787 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23788 GET_MODE_SIZE (halfmode));
23789
23790 /* Special case of reversed high/low parts. Use VSWP. */
23791 if (src2 == dest && src1 == dest + halfregs)
23792 {
23793 rtx x = gen_rtx_SET (destlo, operands[1]);
23794 rtx y = gen_rtx_SET (desthi, operands[2]);
23795 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23796 return;
23797 }
23798
23799 if (!reg_overlap_mentioned_p (operands[2], destlo))
23800 {
23801 /* Try to avoid unnecessary moves if part of the result
23802 is in the right place already. */
23803 if (src1 != dest)
23804 emit_move_insn (destlo, operands[1]);
23805 if (src2 != dest + halfregs)
23806 emit_move_insn (desthi, operands[2]);
23807 }
23808 else
23809 {
23810 if (src2 != dest + halfregs)
23811 emit_move_insn (desthi, operands[2]);
23812 if (src1 != dest)
23813 emit_move_insn (destlo, operands[1]);
23814 }
23815 }
23816 \f
23817 /* Return the number (counting from 0) of
23818 the least significant set bit in MASK. */
23819
23820 inline static int
23821 number_of_first_bit_set (unsigned mask)
23822 {
23823 return ctz_hwi (mask);
23824 }
23825
23826 /* Like emit_multi_reg_push, but allowing for a different set of
23827 registers to be described as saved. MASK is the set of registers
23828 to be saved; REAL_REGS is the set of registers to be described as
23829 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23830
23831 static rtx_insn *
23832 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23833 {
23834 unsigned long regno;
23835 rtx par[10], tmp, reg;
23836 rtx_insn *insn;
23837 int i, j;
23838
23839 /* Build the parallel of the registers actually being stored. */
23840 for (i = 0; mask; ++i, mask &= mask - 1)
23841 {
23842 regno = ctz_hwi (mask);
23843 reg = gen_rtx_REG (SImode, regno);
23844
23845 if (i == 0)
23846 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23847 else
23848 tmp = gen_rtx_USE (VOIDmode, reg);
23849
23850 par[i] = tmp;
23851 }
23852
23853 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23854 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23855 tmp = gen_frame_mem (BLKmode, tmp);
23856 tmp = gen_rtx_SET (tmp, par[0]);
23857 par[0] = tmp;
23858
23859 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23860 insn = emit_insn (tmp);
23861
23862 /* Always build the stack adjustment note for unwind info. */
23863 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23864 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23865 par[0] = tmp;
23866
23867 /* Build the parallel of the registers recorded as saved for unwind. */
23868 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23869 {
23870 regno = ctz_hwi (real_regs);
23871 reg = gen_rtx_REG (SImode, regno);
23872
23873 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23874 tmp = gen_frame_mem (SImode, tmp);
23875 tmp = gen_rtx_SET (tmp, reg);
23876 RTX_FRAME_RELATED_P (tmp) = 1;
23877 par[j + 1] = tmp;
23878 }
23879
23880 if (j == 0)
23881 tmp = par[0];
23882 else
23883 {
23884 RTX_FRAME_RELATED_P (par[0]) = 1;
23885 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23886 }
23887
23888 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23889
23890 return insn;
23891 }
23892
23893 /* Emit code to push or pop registers to or from the stack. F is the
23894 assembly file. MASK is the registers to pop. */
23895 static void
23896 thumb_pop (FILE *f, unsigned long mask)
23897 {
23898 int regno;
23899 int lo_mask = mask & 0xFF;
23900
23901 gcc_assert (mask);
23902
23903 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23904 {
23905 /* Special case. Do not generate a POP PC statement here, do it in
23906 thumb_exit() */
23907 thumb_exit (f, -1);
23908 return;
23909 }
23910
23911 fprintf (f, "\tpop\t{");
23912
23913 /* Look at the low registers first. */
23914 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23915 {
23916 if (lo_mask & 1)
23917 {
23918 asm_fprintf (f, "%r", regno);
23919
23920 if ((lo_mask & ~1) != 0)
23921 fprintf (f, ", ");
23922 }
23923 }
23924
23925 if (mask & (1 << PC_REGNUM))
23926 {
23927 /* Catch popping the PC. */
23928 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23929 || IS_CMSE_ENTRY (arm_current_func_type ()))
23930 {
23931 /* The PC is never poped directly, instead
23932 it is popped into r3 and then BX is used. */
23933 fprintf (f, "}\n");
23934
23935 thumb_exit (f, -1);
23936
23937 return;
23938 }
23939 else
23940 {
23941 if (mask & 0xFF)
23942 fprintf (f, ", ");
23943
23944 asm_fprintf (f, "%r", PC_REGNUM);
23945 }
23946 }
23947
23948 fprintf (f, "}\n");
23949 }
23950
23951 /* Generate code to return from a thumb function.
23952 If 'reg_containing_return_addr' is -1, then the return address is
23953 actually on the stack, at the stack pointer. */
23954 static void
23955 thumb_exit (FILE *f, int reg_containing_return_addr)
23956 {
23957 unsigned regs_available_for_popping;
23958 unsigned regs_to_pop;
23959 int pops_needed;
23960 unsigned available;
23961 unsigned required;
23962 machine_mode mode;
23963 int size;
23964 int restore_a4 = FALSE;
23965
23966 /* Compute the registers we need to pop. */
23967 regs_to_pop = 0;
23968 pops_needed = 0;
23969
23970 if (reg_containing_return_addr == -1)
23971 {
23972 regs_to_pop |= 1 << LR_REGNUM;
23973 ++pops_needed;
23974 }
23975
23976 if (TARGET_BACKTRACE)
23977 {
23978 /* Restore the (ARM) frame pointer and stack pointer. */
23979 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23980 pops_needed += 2;
23981 }
23982
23983 /* If there is nothing to pop then just emit the BX instruction and
23984 return. */
23985 if (pops_needed == 0)
23986 {
23987 if (crtl->calls_eh_return)
23988 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23989
23990 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23991 {
23992 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23993 reg_containing_return_addr);
23994 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23995 }
23996 else
23997 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23998 return;
23999 }
24000 /* Otherwise if we are not supporting interworking and we have not created
24001 a backtrace structure and the function was not entered in ARM mode then
24002 just pop the return address straight into the PC. */
24003 else if (!TARGET_INTERWORK
24004 && !TARGET_BACKTRACE
24005 && !is_called_in_ARM_mode (current_function_decl)
24006 && !crtl->calls_eh_return
24007 && !IS_CMSE_ENTRY (arm_current_func_type ()))
24008 {
24009 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24010 return;
24011 }
24012
24013 /* Find out how many of the (return) argument registers we can corrupt. */
24014 regs_available_for_popping = 0;
24015
24016 /* If returning via __builtin_eh_return, the bottom three registers
24017 all contain information needed for the return. */
24018 if (crtl->calls_eh_return)
24019 size = 12;
24020 else
24021 {
24022 /* If we can deduce the registers used from the function's
24023 return value. This is more reliable that examining
24024 df_regs_ever_live_p () because that will be set if the register is
24025 ever used in the function, not just if the register is used
24026 to hold a return value. */
24027
24028 if (crtl->return_rtx != 0)
24029 mode = GET_MODE (crtl->return_rtx);
24030 else
24031 mode = DECL_MODE (DECL_RESULT (current_function_decl));
24032
24033 size = GET_MODE_SIZE (mode);
24034
24035 if (size == 0)
24036 {
24037 /* In a void function we can use any argument register.
24038 In a function that returns a structure on the stack
24039 we can use the second and third argument registers. */
24040 if (mode == VOIDmode)
24041 regs_available_for_popping =
24042 (1 << ARG_REGISTER (1))
24043 | (1 << ARG_REGISTER (2))
24044 | (1 << ARG_REGISTER (3));
24045 else
24046 regs_available_for_popping =
24047 (1 << ARG_REGISTER (2))
24048 | (1 << ARG_REGISTER (3));
24049 }
24050 else if (size <= 4)
24051 regs_available_for_popping =
24052 (1 << ARG_REGISTER (2))
24053 | (1 << ARG_REGISTER (3));
24054 else if (size <= 8)
24055 regs_available_for_popping =
24056 (1 << ARG_REGISTER (3));
24057 }
24058
24059 /* Match registers to be popped with registers into which we pop them. */
24060 for (available = regs_available_for_popping,
24061 required = regs_to_pop;
24062 required != 0 && available != 0;
24063 available &= ~(available & - available),
24064 required &= ~(required & - required))
24065 -- pops_needed;
24066
24067 /* If we have any popping registers left over, remove them. */
24068 if (available > 0)
24069 regs_available_for_popping &= ~available;
24070
24071 /* Otherwise if we need another popping register we can use
24072 the fourth argument register. */
24073 else if (pops_needed)
24074 {
24075 /* If we have not found any free argument registers and
24076 reg a4 contains the return address, we must move it. */
24077 if (regs_available_for_popping == 0
24078 && reg_containing_return_addr == LAST_ARG_REGNUM)
24079 {
24080 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24081 reg_containing_return_addr = LR_REGNUM;
24082 }
24083 else if (size > 12)
24084 {
24085 /* Register a4 is being used to hold part of the return value,
24086 but we have dire need of a free, low register. */
24087 restore_a4 = TRUE;
24088
24089 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24090 }
24091
24092 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24093 {
24094 /* The fourth argument register is available. */
24095 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24096
24097 --pops_needed;
24098 }
24099 }
24100
24101 /* Pop as many registers as we can. */
24102 thumb_pop (f, regs_available_for_popping);
24103
24104 /* Process the registers we popped. */
24105 if (reg_containing_return_addr == -1)
24106 {
24107 /* The return address was popped into the lowest numbered register. */
24108 regs_to_pop &= ~(1 << LR_REGNUM);
24109
24110 reg_containing_return_addr =
24111 number_of_first_bit_set (regs_available_for_popping);
24112
24113 /* Remove this register for the mask of available registers, so that
24114 the return address will not be corrupted by further pops. */
24115 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24116 }
24117
24118 /* If we popped other registers then handle them here. */
24119 if (regs_available_for_popping)
24120 {
24121 int frame_pointer;
24122
24123 /* Work out which register currently contains the frame pointer. */
24124 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24125
24126 /* Move it into the correct place. */
24127 asm_fprintf (f, "\tmov\t%r, %r\n",
24128 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24129
24130 /* (Temporarily) remove it from the mask of popped registers. */
24131 regs_available_for_popping &= ~(1 << frame_pointer);
24132 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24133
24134 if (regs_available_for_popping)
24135 {
24136 int stack_pointer;
24137
24138 /* We popped the stack pointer as well,
24139 find the register that contains it. */
24140 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24141
24142 /* Move it into the stack register. */
24143 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24144
24145 /* At this point we have popped all necessary registers, so
24146 do not worry about restoring regs_available_for_popping
24147 to its correct value:
24148
24149 assert (pops_needed == 0)
24150 assert (regs_available_for_popping == (1 << frame_pointer))
24151 assert (regs_to_pop == (1 << STACK_POINTER)) */
24152 }
24153 else
24154 {
24155 /* Since we have just move the popped value into the frame
24156 pointer, the popping register is available for reuse, and
24157 we know that we still have the stack pointer left to pop. */
24158 regs_available_for_popping |= (1 << frame_pointer);
24159 }
24160 }
24161
24162 /* If we still have registers left on the stack, but we no longer have
24163 any registers into which we can pop them, then we must move the return
24164 address into the link register and make available the register that
24165 contained it. */
24166 if (regs_available_for_popping == 0 && pops_needed > 0)
24167 {
24168 regs_available_for_popping |= 1 << reg_containing_return_addr;
24169
24170 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24171 reg_containing_return_addr);
24172
24173 reg_containing_return_addr = LR_REGNUM;
24174 }
24175
24176 /* If we have registers left on the stack then pop some more.
24177 We know that at most we will want to pop FP and SP. */
24178 if (pops_needed > 0)
24179 {
24180 int popped_into;
24181 int move_to;
24182
24183 thumb_pop (f, regs_available_for_popping);
24184
24185 /* We have popped either FP or SP.
24186 Move whichever one it is into the correct register. */
24187 popped_into = number_of_first_bit_set (regs_available_for_popping);
24188 move_to = number_of_first_bit_set (regs_to_pop);
24189
24190 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24191 --pops_needed;
24192 }
24193
24194 /* If we still have not popped everything then we must have only
24195 had one register available to us and we are now popping the SP. */
24196 if (pops_needed > 0)
24197 {
24198 int popped_into;
24199
24200 thumb_pop (f, regs_available_for_popping);
24201
24202 popped_into = number_of_first_bit_set (regs_available_for_popping);
24203
24204 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24205 /*
24206 assert (regs_to_pop == (1 << STACK_POINTER))
24207 assert (pops_needed == 1)
24208 */
24209 }
24210
24211 /* If necessary restore the a4 register. */
24212 if (restore_a4)
24213 {
24214 if (reg_containing_return_addr != LR_REGNUM)
24215 {
24216 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24217 reg_containing_return_addr = LR_REGNUM;
24218 }
24219
24220 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24221 }
24222
24223 if (crtl->calls_eh_return)
24224 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24225
24226 /* Return to caller. */
24227 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24228 {
24229 /* This is for the cases where LR is not being used to contain the return
24230 address. It may therefore contain information that we might not want
24231 to leak, hence it must be cleared. The value in R0 will never be a
24232 secret at this point, so it is safe to use it, see the clearing code
24233 in 'cmse_nonsecure_entry_clear_before_return'. */
24234 if (reg_containing_return_addr != LR_REGNUM)
24235 asm_fprintf (f, "\tmov\tlr, r0\n");
24236
24237 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24238 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24239 }
24240 else
24241 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24242 }
24243 \f
24244 /* Scan INSN just before assembler is output for it.
24245 For Thumb-1, we track the status of the condition codes; this
24246 information is used in the cbranchsi4_insn pattern. */
24247 void
24248 thumb1_final_prescan_insn (rtx_insn *insn)
24249 {
24250 if (flag_print_asm_name)
24251 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24252 INSN_ADDRESSES (INSN_UID (insn)));
24253 /* Don't overwrite the previous setter when we get to a cbranch. */
24254 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24255 {
24256 enum attr_conds conds;
24257
24258 if (cfun->machine->thumb1_cc_insn)
24259 {
24260 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24261 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24262 CC_STATUS_INIT;
24263 }
24264 conds = get_attr_conds (insn);
24265 if (conds == CONDS_SET)
24266 {
24267 rtx set = single_set (insn);
24268 cfun->machine->thumb1_cc_insn = insn;
24269 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24270 cfun->machine->thumb1_cc_op1 = const0_rtx;
24271 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24272 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24273 {
24274 rtx src1 = XEXP (SET_SRC (set), 1);
24275 if (src1 == const0_rtx)
24276 cfun->machine->thumb1_cc_mode = CCmode;
24277 }
24278 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24279 {
24280 /* Record the src register operand instead of dest because
24281 cprop_hardreg pass propagates src. */
24282 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24283 }
24284 }
24285 else if (conds != CONDS_NOCOND)
24286 cfun->machine->thumb1_cc_insn = NULL_RTX;
24287 }
24288
24289 /* Check if unexpected far jump is used. */
24290 if (cfun->machine->lr_save_eliminated
24291 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24292 internal_error("Unexpected thumb1 far jump");
24293 }
24294
24295 int
24296 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24297 {
24298 unsigned HOST_WIDE_INT mask = 0xff;
24299 int i;
24300
24301 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24302 if (val == 0) /* XXX */
24303 return 0;
24304
24305 for (i = 0; i < 25; i++)
24306 if ((val & (mask << i)) == val)
24307 return 1;
24308
24309 return 0;
24310 }
24311
24312 /* Returns nonzero if the current function contains,
24313 or might contain a far jump. */
24314 static int
24315 thumb_far_jump_used_p (void)
24316 {
24317 rtx_insn *insn;
24318 bool far_jump = false;
24319 unsigned int func_size = 0;
24320
24321 /* If we have already decided that far jumps may be used,
24322 do not bother checking again, and always return true even if
24323 it turns out that they are not being used. Once we have made
24324 the decision that far jumps are present (and that hence the link
24325 register will be pushed onto the stack) we cannot go back on it. */
24326 if (cfun->machine->far_jump_used)
24327 return 1;
24328
24329 /* If this function is not being called from the prologue/epilogue
24330 generation code then it must be being called from the
24331 INITIAL_ELIMINATION_OFFSET macro. */
24332 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24333 {
24334 /* In this case we know that we are being asked about the elimination
24335 of the arg pointer register. If that register is not being used,
24336 then there are no arguments on the stack, and we do not have to
24337 worry that a far jump might force the prologue to push the link
24338 register, changing the stack offsets. In this case we can just
24339 return false, since the presence of far jumps in the function will
24340 not affect stack offsets.
24341
24342 If the arg pointer is live (or if it was live, but has now been
24343 eliminated and so set to dead) then we do have to test to see if
24344 the function might contain a far jump. This test can lead to some
24345 false negatives, since before reload is completed, then length of
24346 branch instructions is not known, so gcc defaults to returning their
24347 longest length, which in turn sets the far jump attribute to true.
24348
24349 A false negative will not result in bad code being generated, but it
24350 will result in a needless push and pop of the link register. We
24351 hope that this does not occur too often.
24352
24353 If we need doubleword stack alignment this could affect the other
24354 elimination offsets so we can't risk getting it wrong. */
24355 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24356 cfun->machine->arg_pointer_live = 1;
24357 else if (!cfun->machine->arg_pointer_live)
24358 return 0;
24359 }
24360
24361 /* We should not change far_jump_used during or after reload, as there is
24362 no chance to change stack frame layout. */
24363 if (reload_in_progress || reload_completed)
24364 return 0;
24365
24366 /* Check to see if the function contains a branch
24367 insn with the far jump attribute set. */
24368 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24369 {
24370 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24371 {
24372 far_jump = true;
24373 }
24374 func_size += get_attr_length (insn);
24375 }
24376
24377 /* Attribute far_jump will always be true for thumb1 before
24378 shorten_branch pass. So checking far_jump attribute before
24379 shorten_branch isn't much useful.
24380
24381 Following heuristic tries to estimate more accurately if a far jump
24382 may finally be used. The heuristic is very conservative as there is
24383 no chance to roll-back the decision of not to use far jump.
24384
24385 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24386 2-byte insn is associated with a 4 byte constant pool. Using
24387 function size 2048/3 as the threshold is conservative enough. */
24388 if (far_jump)
24389 {
24390 if ((func_size * 3) >= 2048)
24391 {
24392 /* Record the fact that we have decided that
24393 the function does use far jumps. */
24394 cfun->machine->far_jump_used = 1;
24395 return 1;
24396 }
24397 }
24398
24399 return 0;
24400 }
24401
24402 /* Return nonzero if FUNC must be entered in ARM mode. */
24403 static bool
24404 is_called_in_ARM_mode (tree func)
24405 {
24406 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24407
24408 /* Ignore the problem about functions whose address is taken. */
24409 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24410 return true;
24411
24412 #ifdef ARM_PE
24413 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24414 #else
24415 return false;
24416 #endif
24417 }
24418
24419 /* Given the stack offsets and register mask in OFFSETS, decide how
24420 many additional registers to push instead of subtracting a constant
24421 from SP. For epilogues the principle is the same except we use pop.
24422 FOR_PROLOGUE indicates which we're generating. */
24423 static int
24424 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24425 {
24426 HOST_WIDE_INT amount;
24427 unsigned long live_regs_mask = offsets->saved_regs_mask;
24428 /* Extract a mask of the ones we can give to the Thumb's push/pop
24429 instruction. */
24430 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24431 /* Then count how many other high registers will need to be pushed. */
24432 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24433 int n_free, reg_base, size;
24434
24435 if (!for_prologue && frame_pointer_needed)
24436 amount = offsets->locals_base - offsets->saved_regs;
24437 else
24438 amount = offsets->outgoing_args - offsets->saved_regs;
24439
24440 /* If the stack frame size is 512 exactly, we can save one load
24441 instruction, which should make this a win even when optimizing
24442 for speed. */
24443 if (!optimize_size && amount != 512)
24444 return 0;
24445
24446 /* Can't do this if there are high registers to push. */
24447 if (high_regs_pushed != 0)
24448 return 0;
24449
24450 /* Shouldn't do it in the prologue if no registers would normally
24451 be pushed at all. In the epilogue, also allow it if we'll have
24452 a pop insn for the PC. */
24453 if (l_mask == 0
24454 && (for_prologue
24455 || TARGET_BACKTRACE
24456 || (live_regs_mask & 1 << LR_REGNUM) == 0
24457 || TARGET_INTERWORK
24458 || crtl->args.pretend_args_size != 0))
24459 return 0;
24460
24461 /* Don't do this if thumb_expand_prologue wants to emit instructions
24462 between the push and the stack frame allocation. */
24463 if (for_prologue
24464 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24465 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24466 return 0;
24467
24468 reg_base = 0;
24469 n_free = 0;
24470 if (!for_prologue)
24471 {
24472 size = arm_size_return_regs ();
24473 reg_base = ARM_NUM_INTS (size);
24474 live_regs_mask >>= reg_base;
24475 }
24476
24477 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24478 && (for_prologue || call_used_regs[reg_base + n_free]))
24479 {
24480 live_regs_mask >>= 1;
24481 n_free++;
24482 }
24483
24484 if (n_free == 0)
24485 return 0;
24486 gcc_assert (amount / 4 * 4 == amount);
24487
24488 if (amount >= 512 && (amount - n_free * 4) < 512)
24489 return (amount - 508) / 4;
24490 if (amount <= n_free * 4)
24491 return amount / 4;
24492 return 0;
24493 }
24494
24495 /* The bits which aren't usefully expanded as rtl. */
24496 const char *
24497 thumb1_unexpanded_epilogue (void)
24498 {
24499 arm_stack_offsets *offsets;
24500 int regno;
24501 unsigned long live_regs_mask = 0;
24502 int high_regs_pushed = 0;
24503 int extra_pop;
24504 int had_to_push_lr;
24505 int size;
24506
24507 if (cfun->machine->return_used_this_function != 0)
24508 return "";
24509
24510 if (IS_NAKED (arm_current_func_type ()))
24511 return "";
24512
24513 offsets = arm_get_frame_offsets ();
24514 live_regs_mask = offsets->saved_regs_mask;
24515 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24516
24517 /* If we can deduce the registers used from the function's return value.
24518 This is more reliable that examining df_regs_ever_live_p () because that
24519 will be set if the register is ever used in the function, not just if
24520 the register is used to hold a return value. */
24521 size = arm_size_return_regs ();
24522
24523 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24524 if (extra_pop > 0)
24525 {
24526 unsigned long extra_mask = (1 << extra_pop) - 1;
24527 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24528 }
24529
24530 /* The prolog may have pushed some high registers to use as
24531 work registers. e.g. the testsuite file:
24532 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24533 compiles to produce:
24534 push {r4, r5, r6, r7, lr}
24535 mov r7, r9
24536 mov r6, r8
24537 push {r6, r7}
24538 as part of the prolog. We have to undo that pushing here. */
24539
24540 if (high_regs_pushed)
24541 {
24542 unsigned long mask = live_regs_mask & 0xff;
24543 int next_hi_reg;
24544
24545 /* The available low registers depend on the size of the value we are
24546 returning. */
24547 if (size <= 12)
24548 mask |= 1 << 3;
24549 if (size <= 8)
24550 mask |= 1 << 2;
24551
24552 if (mask == 0)
24553 /* Oh dear! We have no low registers into which we can pop
24554 high registers! */
24555 internal_error
24556 ("no low registers available for popping high registers");
24557
24558 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24559 if (live_regs_mask & (1 << next_hi_reg))
24560 break;
24561
24562 while (high_regs_pushed)
24563 {
24564 /* Find lo register(s) into which the high register(s) can
24565 be popped. */
24566 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24567 {
24568 if (mask & (1 << regno))
24569 high_regs_pushed--;
24570 if (high_regs_pushed == 0)
24571 break;
24572 }
24573
24574 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24575
24576 /* Pop the values into the low register(s). */
24577 thumb_pop (asm_out_file, mask);
24578
24579 /* Move the value(s) into the high registers. */
24580 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24581 {
24582 if (mask & (1 << regno))
24583 {
24584 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24585 regno);
24586
24587 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24588 if (live_regs_mask & (1 << next_hi_reg))
24589 break;
24590 }
24591 }
24592 }
24593 live_regs_mask &= ~0x0f00;
24594 }
24595
24596 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24597 live_regs_mask &= 0xff;
24598
24599 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24600 {
24601 /* Pop the return address into the PC. */
24602 if (had_to_push_lr)
24603 live_regs_mask |= 1 << PC_REGNUM;
24604
24605 /* Either no argument registers were pushed or a backtrace
24606 structure was created which includes an adjusted stack
24607 pointer, so just pop everything. */
24608 if (live_regs_mask)
24609 thumb_pop (asm_out_file, live_regs_mask);
24610
24611 /* We have either just popped the return address into the
24612 PC or it is was kept in LR for the entire function.
24613 Note that thumb_pop has already called thumb_exit if the
24614 PC was in the list. */
24615 if (!had_to_push_lr)
24616 thumb_exit (asm_out_file, LR_REGNUM);
24617 }
24618 else
24619 {
24620 /* Pop everything but the return address. */
24621 if (live_regs_mask)
24622 thumb_pop (asm_out_file, live_regs_mask);
24623
24624 if (had_to_push_lr)
24625 {
24626 if (size > 12)
24627 {
24628 /* We have no free low regs, so save one. */
24629 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24630 LAST_ARG_REGNUM);
24631 }
24632
24633 /* Get the return address into a temporary register. */
24634 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24635
24636 if (size > 12)
24637 {
24638 /* Move the return address to lr. */
24639 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24640 LAST_ARG_REGNUM);
24641 /* Restore the low register. */
24642 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24643 IP_REGNUM);
24644 regno = LR_REGNUM;
24645 }
24646 else
24647 regno = LAST_ARG_REGNUM;
24648 }
24649 else
24650 regno = LR_REGNUM;
24651
24652 /* Remove the argument registers that were pushed onto the stack. */
24653 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24654 SP_REGNUM, SP_REGNUM,
24655 crtl->args.pretend_args_size);
24656
24657 thumb_exit (asm_out_file, regno);
24658 }
24659
24660 return "";
24661 }
24662
24663 /* Functions to save and restore machine-specific function data. */
24664 static struct machine_function *
24665 arm_init_machine_status (void)
24666 {
24667 struct machine_function *machine;
24668 machine = ggc_cleared_alloc<machine_function> ();
24669
24670 #if ARM_FT_UNKNOWN != 0
24671 machine->func_type = ARM_FT_UNKNOWN;
24672 #endif
24673 return machine;
24674 }
24675
24676 /* Return an RTX indicating where the return address to the
24677 calling function can be found. */
24678 rtx
24679 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24680 {
24681 if (count != 0)
24682 return NULL_RTX;
24683
24684 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24685 }
24686
24687 /* Do anything needed before RTL is emitted for each function. */
24688 void
24689 arm_init_expanders (void)
24690 {
24691 /* Arrange to initialize and mark the machine per-function status. */
24692 init_machine_status = arm_init_machine_status;
24693
24694 /* This is to stop the combine pass optimizing away the alignment
24695 adjustment of va_arg. */
24696 /* ??? It is claimed that this should not be necessary. */
24697 if (cfun)
24698 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24699 }
24700
24701 /* Check that FUNC is called with a different mode. */
24702
24703 bool
24704 arm_change_mode_p (tree func)
24705 {
24706 if (TREE_CODE (func) != FUNCTION_DECL)
24707 return false;
24708
24709 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24710
24711 if (!callee_tree)
24712 callee_tree = target_option_default_node;
24713
24714 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24715 int flags = callee_opts->x_target_flags;
24716
24717 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24718 }
24719
24720 /* Like arm_compute_initial_elimination offset. Simpler because there
24721 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24722 to point at the base of the local variables after static stack
24723 space for a function has been allocated. */
24724
24725 HOST_WIDE_INT
24726 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24727 {
24728 arm_stack_offsets *offsets;
24729
24730 offsets = arm_get_frame_offsets ();
24731
24732 switch (from)
24733 {
24734 case ARG_POINTER_REGNUM:
24735 switch (to)
24736 {
24737 case STACK_POINTER_REGNUM:
24738 return offsets->outgoing_args - offsets->saved_args;
24739
24740 case FRAME_POINTER_REGNUM:
24741 return offsets->soft_frame - offsets->saved_args;
24742
24743 case ARM_HARD_FRAME_POINTER_REGNUM:
24744 return offsets->saved_regs - offsets->saved_args;
24745
24746 case THUMB_HARD_FRAME_POINTER_REGNUM:
24747 return offsets->locals_base - offsets->saved_args;
24748
24749 default:
24750 gcc_unreachable ();
24751 }
24752 break;
24753
24754 case FRAME_POINTER_REGNUM:
24755 switch (to)
24756 {
24757 case STACK_POINTER_REGNUM:
24758 return offsets->outgoing_args - offsets->soft_frame;
24759
24760 case ARM_HARD_FRAME_POINTER_REGNUM:
24761 return offsets->saved_regs - offsets->soft_frame;
24762
24763 case THUMB_HARD_FRAME_POINTER_REGNUM:
24764 return offsets->locals_base - offsets->soft_frame;
24765
24766 default:
24767 gcc_unreachable ();
24768 }
24769 break;
24770
24771 default:
24772 gcc_unreachable ();
24773 }
24774 }
24775
24776 /* Generate the function's prologue. */
24777
24778 void
24779 thumb1_expand_prologue (void)
24780 {
24781 rtx_insn *insn;
24782
24783 HOST_WIDE_INT amount;
24784 HOST_WIDE_INT size;
24785 arm_stack_offsets *offsets;
24786 unsigned long func_type;
24787 int regno;
24788 unsigned long live_regs_mask;
24789 unsigned long l_mask;
24790 unsigned high_regs_pushed = 0;
24791 bool lr_needs_saving;
24792
24793 func_type = arm_current_func_type ();
24794
24795 /* Naked functions don't have prologues. */
24796 if (IS_NAKED (func_type))
24797 {
24798 if (flag_stack_usage_info)
24799 current_function_static_stack_size = 0;
24800 return;
24801 }
24802
24803 if (IS_INTERRUPT (func_type))
24804 {
24805 error ("interrupt Service Routines cannot be coded in Thumb mode");
24806 return;
24807 }
24808
24809 if (is_called_in_ARM_mode (current_function_decl))
24810 emit_insn (gen_prologue_thumb1_interwork ());
24811
24812 offsets = arm_get_frame_offsets ();
24813 live_regs_mask = offsets->saved_regs_mask;
24814 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24815
24816 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24817 l_mask = live_regs_mask & 0x40ff;
24818 /* Then count how many other high registers will need to be pushed. */
24819 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24820
24821 if (crtl->args.pretend_args_size)
24822 {
24823 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24824
24825 if (cfun->machine->uses_anonymous_args)
24826 {
24827 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24828 unsigned long mask;
24829
24830 mask = 1ul << (LAST_ARG_REGNUM + 1);
24831 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24832
24833 insn = thumb1_emit_multi_reg_push (mask, 0);
24834 }
24835 else
24836 {
24837 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24838 stack_pointer_rtx, x));
24839 }
24840 RTX_FRAME_RELATED_P (insn) = 1;
24841 }
24842
24843 if (TARGET_BACKTRACE)
24844 {
24845 HOST_WIDE_INT offset = 0;
24846 unsigned work_register;
24847 rtx work_reg, x, arm_hfp_rtx;
24848
24849 /* We have been asked to create a stack backtrace structure.
24850 The code looks like this:
24851
24852 0 .align 2
24853 0 func:
24854 0 sub SP, #16 Reserve space for 4 registers.
24855 2 push {R7} Push low registers.
24856 4 add R7, SP, #20 Get the stack pointer before the push.
24857 6 str R7, [SP, #8] Store the stack pointer
24858 (before reserving the space).
24859 8 mov R7, PC Get hold of the start of this code + 12.
24860 10 str R7, [SP, #16] Store it.
24861 12 mov R7, FP Get hold of the current frame pointer.
24862 14 str R7, [SP, #4] Store it.
24863 16 mov R7, LR Get hold of the current return address.
24864 18 str R7, [SP, #12] Store it.
24865 20 add R7, SP, #16 Point at the start of the
24866 backtrace structure.
24867 22 mov FP, R7 Put this value into the frame pointer. */
24868
24869 work_register = thumb_find_work_register (live_regs_mask);
24870 work_reg = gen_rtx_REG (SImode, work_register);
24871 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24872
24873 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24874 stack_pointer_rtx, GEN_INT (-16)));
24875 RTX_FRAME_RELATED_P (insn) = 1;
24876
24877 if (l_mask)
24878 {
24879 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24880 RTX_FRAME_RELATED_P (insn) = 1;
24881 lr_needs_saving = false;
24882
24883 offset = bit_count (l_mask) * UNITS_PER_WORD;
24884 }
24885
24886 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24887 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24888
24889 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24890 x = gen_frame_mem (SImode, x);
24891 emit_move_insn (x, work_reg);
24892
24893 /* Make sure that the instruction fetching the PC is in the right place
24894 to calculate "start of backtrace creation code + 12". */
24895 /* ??? The stores using the common WORK_REG ought to be enough to
24896 prevent the scheduler from doing anything weird. Failing that
24897 we could always move all of the following into an UNSPEC_VOLATILE. */
24898 if (l_mask)
24899 {
24900 x = gen_rtx_REG (SImode, PC_REGNUM);
24901 emit_move_insn (work_reg, x);
24902
24903 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24904 x = gen_frame_mem (SImode, x);
24905 emit_move_insn (x, work_reg);
24906
24907 emit_move_insn (work_reg, arm_hfp_rtx);
24908
24909 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24910 x = gen_frame_mem (SImode, x);
24911 emit_move_insn (x, work_reg);
24912 }
24913 else
24914 {
24915 emit_move_insn (work_reg, arm_hfp_rtx);
24916
24917 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24918 x = gen_frame_mem (SImode, x);
24919 emit_move_insn (x, work_reg);
24920
24921 x = gen_rtx_REG (SImode, PC_REGNUM);
24922 emit_move_insn (work_reg, x);
24923
24924 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24925 x = gen_frame_mem (SImode, x);
24926 emit_move_insn (x, work_reg);
24927 }
24928
24929 x = gen_rtx_REG (SImode, LR_REGNUM);
24930 emit_move_insn (work_reg, x);
24931
24932 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24933 x = gen_frame_mem (SImode, x);
24934 emit_move_insn (x, work_reg);
24935
24936 x = GEN_INT (offset + 12);
24937 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24938
24939 emit_move_insn (arm_hfp_rtx, work_reg);
24940 }
24941 /* Optimization: If we are not pushing any low registers but we are going
24942 to push some high registers then delay our first push. This will just
24943 be a push of LR and we can combine it with the push of the first high
24944 register. */
24945 else if ((l_mask & 0xff) != 0
24946 || (high_regs_pushed == 0 && lr_needs_saving))
24947 {
24948 unsigned long mask = l_mask;
24949 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24950 insn = thumb1_emit_multi_reg_push (mask, mask);
24951 RTX_FRAME_RELATED_P (insn) = 1;
24952 lr_needs_saving = false;
24953 }
24954
24955 if (high_regs_pushed)
24956 {
24957 unsigned pushable_regs;
24958 unsigned next_hi_reg;
24959 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24960 : crtl->args.info.nregs;
24961 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24962
24963 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24964 if (live_regs_mask & (1 << next_hi_reg))
24965 break;
24966
24967 /* Here we need to mask out registers used for passing arguments
24968 even if they can be pushed. This is to avoid using them to stash the high
24969 registers. Such kind of stash may clobber the use of arguments. */
24970 pushable_regs = l_mask & (~arg_regs_mask);
24971 if (lr_needs_saving)
24972 pushable_regs &= ~(1 << LR_REGNUM);
24973
24974 if (pushable_regs == 0)
24975 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24976
24977 while (high_regs_pushed > 0)
24978 {
24979 unsigned long real_regs_mask = 0;
24980 unsigned long push_mask = 0;
24981
24982 for (regno = LR_REGNUM; regno >= 0; regno --)
24983 {
24984 if (pushable_regs & (1 << regno))
24985 {
24986 emit_move_insn (gen_rtx_REG (SImode, regno),
24987 gen_rtx_REG (SImode, next_hi_reg));
24988
24989 high_regs_pushed --;
24990 real_regs_mask |= (1 << next_hi_reg);
24991 push_mask |= (1 << regno);
24992
24993 if (high_regs_pushed)
24994 {
24995 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24996 next_hi_reg --)
24997 if (live_regs_mask & (1 << next_hi_reg))
24998 break;
24999 }
25000 else
25001 break;
25002 }
25003 }
25004
25005 /* If we had to find a work register and we have not yet
25006 saved the LR then add it to the list of regs to push. */
25007 if (lr_needs_saving)
25008 {
25009 push_mask |= 1 << LR_REGNUM;
25010 real_regs_mask |= 1 << LR_REGNUM;
25011 lr_needs_saving = false;
25012 }
25013
25014 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25015 RTX_FRAME_RELATED_P (insn) = 1;
25016 }
25017 }
25018
25019 /* Load the pic register before setting the frame pointer,
25020 so we can use r7 as a temporary work register. */
25021 if (flag_pic && arm_pic_register != INVALID_REGNUM)
25022 arm_load_pic_register (live_regs_mask);
25023
25024 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25025 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25026 stack_pointer_rtx);
25027
25028 size = offsets->outgoing_args - offsets->saved_args;
25029 if (flag_stack_usage_info)
25030 current_function_static_stack_size = size;
25031
25032 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25033 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25034 || flag_stack_clash_protection)
25035 && size)
25036 sorry ("-fstack-check=specific for Thumb-1");
25037
25038 amount = offsets->outgoing_args - offsets->saved_regs;
25039 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25040 if (amount)
25041 {
25042 if (amount < 512)
25043 {
25044 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25045 GEN_INT (- amount)));
25046 RTX_FRAME_RELATED_P (insn) = 1;
25047 }
25048 else
25049 {
25050 rtx reg, dwarf;
25051
25052 /* The stack decrement is too big for an immediate value in a single
25053 insn. In theory we could issue multiple subtracts, but after
25054 three of them it becomes more space efficient to place the full
25055 value in the constant pool and load into a register. (Also the
25056 ARM debugger really likes to see only one stack decrement per
25057 function). So instead we look for a scratch register into which
25058 we can load the decrement, and then we subtract this from the
25059 stack pointer. Unfortunately on the thumb the only available
25060 scratch registers are the argument registers, and we cannot use
25061 these as they may hold arguments to the function. Instead we
25062 attempt to locate a call preserved register which is used by this
25063 function. If we can find one, then we know that it will have
25064 been pushed at the start of the prologue and so we can corrupt
25065 it now. */
25066 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25067 if (live_regs_mask & (1 << regno))
25068 break;
25069
25070 gcc_assert(regno <= LAST_LO_REGNUM);
25071
25072 reg = gen_rtx_REG (SImode, regno);
25073
25074 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25075
25076 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25077 stack_pointer_rtx, reg));
25078
25079 dwarf = gen_rtx_SET (stack_pointer_rtx,
25080 plus_constant (Pmode, stack_pointer_rtx,
25081 -amount));
25082 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25083 RTX_FRAME_RELATED_P (insn) = 1;
25084 }
25085 }
25086
25087 if (frame_pointer_needed)
25088 thumb_set_frame_pointer (offsets);
25089
25090 /* If we are profiling, make sure no instructions are scheduled before
25091 the call to mcount. Similarly if the user has requested no
25092 scheduling in the prolog. Similarly if we want non-call exceptions
25093 using the EABI unwinder, to prevent faulting instructions from being
25094 swapped with a stack adjustment. */
25095 if (crtl->profile || !TARGET_SCHED_PROLOG
25096 || (arm_except_unwind_info (&global_options) == UI_TARGET
25097 && cfun->can_throw_non_call_exceptions))
25098 emit_insn (gen_blockage ());
25099
25100 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25101 if (live_regs_mask & 0xff)
25102 cfun->machine->lr_save_eliminated = 0;
25103 }
25104
25105 /* Clear caller saved registers not used to pass return values and leaked
25106 condition flags before exiting a cmse_nonsecure_entry function. */
25107
25108 void
25109 cmse_nonsecure_entry_clear_before_return (void)
25110 {
25111 int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25112 uint32_t padding_bits_to_clear = 0;
25113 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25114 auto_sbitmap to_clear_bitmap (maxregno + 1);
25115 tree result_type;
25116 rtx result_rtl;
25117
25118 bitmap_clear (to_clear_bitmap);
25119 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25120 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25121
25122 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25123 registers. */
25124 if (TARGET_HARD_FLOAT)
25125 {
25126 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25127
25128 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25129
25130 /* Make sure we don't clear the two scratch registers used to clear the
25131 relevant FPSCR bits in output_return_instruction. */
25132 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25133 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25134 emit_use (gen_rtx_REG (SImode, 4));
25135 bitmap_clear_bit (to_clear_bitmap, 4);
25136 }
25137
25138 /* If the user has defined registers to be caller saved, these are no longer
25139 restored by the function before returning and must thus be cleared for
25140 security purposes. */
25141 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25142 {
25143 /* We do not touch registers that can be used to pass arguments as per
25144 the AAPCS, since these should never be made callee-saved by user
25145 options. */
25146 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25147 continue;
25148 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25149 continue;
25150 if (call_used_regs[regno])
25151 bitmap_set_bit (to_clear_bitmap, regno);
25152 }
25153
25154 /* Make sure we do not clear the registers used to return the result in. */
25155 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25156 if (!VOID_TYPE_P (result_type))
25157 {
25158 uint64_t to_clear_return_mask;
25159 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25160
25161 /* No need to check that we return in registers, because we don't
25162 support returning on stack yet. */
25163 gcc_assert (REG_P (result_rtl));
25164 to_clear_return_mask
25165 = compute_not_to_clear_mask (result_type, result_rtl, 0,
25166 padding_bits_to_clear_ptr);
25167 if (to_clear_return_mask)
25168 {
25169 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25170 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25171 {
25172 if (to_clear_return_mask & (1ULL << regno))
25173 bitmap_clear_bit (to_clear_bitmap, regno);
25174 }
25175 }
25176 }
25177
25178 if (padding_bits_to_clear != 0)
25179 {
25180 rtx reg_rtx;
25181 auto_sbitmap to_clear_arg_regs_bitmap (R0_REGNUM + NUM_ARG_REGS);
25182
25183 /* Padding bits to clear is not 0 so we know we are dealing with
25184 returning a composite type, which only uses r0. Let's make sure that
25185 r1-r3 is cleared too, we will use r1 as a scratch register. */
25186 bitmap_clear (to_clear_arg_regs_bitmap);
25187 bitmap_set_range (to_clear_arg_regs_bitmap, R0_REGNUM + 1,
25188 NUM_ARG_REGS - 1);
25189 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25190
25191 reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25192
25193 /* Fill the lower half of the negated padding_bits_to_clear. */
25194 emit_move_insn (reg_rtx,
25195 GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25196
25197 /* Also fill the top half of the negated padding_bits_to_clear. */
25198 if (((~padding_bits_to_clear) >> 16) > 0)
25199 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25200 GEN_INT (16),
25201 GEN_INT (16)),
25202 GEN_INT ((~padding_bits_to_clear) >> 16)));
25203
25204 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25205 gen_rtx_REG (SImode, R0_REGNUM),
25206 reg_rtx));
25207 }
25208
25209 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25210 {
25211 if (!bitmap_bit_p (to_clear_bitmap, regno))
25212 continue;
25213
25214 if (IS_VFP_REGNUM (regno))
25215 {
25216 /* If regno is an even vfp register and its successor is also to
25217 be cleared, use vmov. */
25218 if (TARGET_VFP_DOUBLE
25219 && VFP_REGNO_OK_FOR_DOUBLE (regno)
25220 && bitmap_bit_p (to_clear_bitmap, regno + 1))
25221 {
25222 emit_move_insn (gen_rtx_REG (DFmode, regno),
25223 CONST1_RTX (DFmode));
25224 emit_use (gen_rtx_REG (DFmode, regno));
25225 regno++;
25226 }
25227 else
25228 {
25229 emit_move_insn (gen_rtx_REG (SFmode, regno),
25230 CONST1_RTX (SFmode));
25231 emit_use (gen_rtx_REG (SFmode, regno));
25232 }
25233 }
25234 else
25235 {
25236 if (TARGET_THUMB1)
25237 {
25238 if (regno == R0_REGNUM)
25239 emit_move_insn (gen_rtx_REG (SImode, regno),
25240 const0_rtx);
25241 else
25242 /* R0 has either been cleared before, see code above, or it
25243 holds a return value, either way it is not secret
25244 information. */
25245 emit_move_insn (gen_rtx_REG (SImode, regno),
25246 gen_rtx_REG (SImode, R0_REGNUM));
25247 emit_use (gen_rtx_REG (SImode, regno));
25248 }
25249 else
25250 {
25251 emit_move_insn (gen_rtx_REG (SImode, regno),
25252 gen_rtx_REG (SImode, LR_REGNUM));
25253 emit_use (gen_rtx_REG (SImode, regno));
25254 }
25255 }
25256 }
25257 }
25258
25259 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25260 POP instruction can be generated. LR should be replaced by PC. All
25261 the checks required are already done by USE_RETURN_INSN (). Hence,
25262 all we really need to check here is if single register is to be
25263 returned, or multiple register return. */
25264 void
25265 thumb2_expand_return (bool simple_return)
25266 {
25267 int i, num_regs;
25268 unsigned long saved_regs_mask;
25269 arm_stack_offsets *offsets;
25270
25271 offsets = arm_get_frame_offsets ();
25272 saved_regs_mask = offsets->saved_regs_mask;
25273
25274 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25275 if (saved_regs_mask & (1 << i))
25276 num_regs++;
25277
25278 if (!simple_return && saved_regs_mask)
25279 {
25280 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25281 functions or adapt code to handle according to ACLE. This path should
25282 not be reachable for cmse_nonsecure_entry functions though we prefer
25283 to assert it for now to ensure that future code changes do not silently
25284 change this behavior. */
25285 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25286 if (num_regs == 1)
25287 {
25288 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25289 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25290 rtx addr = gen_rtx_MEM (SImode,
25291 gen_rtx_POST_INC (SImode,
25292 stack_pointer_rtx));
25293 set_mem_alias_set (addr, get_frame_alias_set ());
25294 XVECEXP (par, 0, 0) = ret_rtx;
25295 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25296 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25297 emit_jump_insn (par);
25298 }
25299 else
25300 {
25301 saved_regs_mask &= ~ (1 << LR_REGNUM);
25302 saved_regs_mask |= (1 << PC_REGNUM);
25303 arm_emit_multi_reg_pop (saved_regs_mask);
25304 }
25305 }
25306 else
25307 {
25308 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25309 cmse_nonsecure_entry_clear_before_return ();
25310 emit_jump_insn (simple_return_rtx);
25311 }
25312 }
25313
25314 void
25315 thumb1_expand_epilogue (void)
25316 {
25317 HOST_WIDE_INT amount;
25318 arm_stack_offsets *offsets;
25319 int regno;
25320
25321 /* Naked functions don't have prologues. */
25322 if (IS_NAKED (arm_current_func_type ()))
25323 return;
25324
25325 offsets = arm_get_frame_offsets ();
25326 amount = offsets->outgoing_args - offsets->saved_regs;
25327
25328 if (frame_pointer_needed)
25329 {
25330 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25331 amount = offsets->locals_base - offsets->saved_regs;
25332 }
25333 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25334
25335 gcc_assert (amount >= 0);
25336 if (amount)
25337 {
25338 emit_insn (gen_blockage ());
25339
25340 if (amount < 512)
25341 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25342 GEN_INT (amount)));
25343 else
25344 {
25345 /* r3 is always free in the epilogue. */
25346 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25347
25348 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25349 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25350 }
25351 }
25352
25353 /* Emit a USE (stack_pointer_rtx), so that
25354 the stack adjustment will not be deleted. */
25355 emit_insn (gen_force_register_use (stack_pointer_rtx));
25356
25357 if (crtl->profile || !TARGET_SCHED_PROLOG)
25358 emit_insn (gen_blockage ());
25359
25360 /* Emit a clobber for each insn that will be restored in the epilogue,
25361 so that flow2 will get register lifetimes correct. */
25362 for (regno = 0; regno < 13; regno++)
25363 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25364 emit_clobber (gen_rtx_REG (SImode, regno));
25365
25366 if (! df_regs_ever_live_p (LR_REGNUM))
25367 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25368
25369 /* Clear all caller-saved regs that are not used to return. */
25370 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25371 cmse_nonsecure_entry_clear_before_return ();
25372 }
25373
25374 /* Epilogue code for APCS frame. */
25375 static void
25376 arm_expand_epilogue_apcs_frame (bool really_return)
25377 {
25378 unsigned long func_type;
25379 unsigned long saved_regs_mask;
25380 int num_regs = 0;
25381 int i;
25382 int floats_from_frame = 0;
25383 arm_stack_offsets *offsets;
25384
25385 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25386 func_type = arm_current_func_type ();
25387
25388 /* Get frame offsets for ARM. */
25389 offsets = arm_get_frame_offsets ();
25390 saved_regs_mask = offsets->saved_regs_mask;
25391
25392 /* Find the offset of the floating-point save area in the frame. */
25393 floats_from_frame
25394 = (offsets->saved_args
25395 + arm_compute_static_chain_stack_bytes ()
25396 - offsets->frame);
25397
25398 /* Compute how many core registers saved and how far away the floats are. */
25399 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25400 if (saved_regs_mask & (1 << i))
25401 {
25402 num_regs++;
25403 floats_from_frame += 4;
25404 }
25405
25406 if (TARGET_HARD_FLOAT)
25407 {
25408 int start_reg;
25409 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25410
25411 /* The offset is from IP_REGNUM. */
25412 int saved_size = arm_get_vfp_saved_size ();
25413 if (saved_size > 0)
25414 {
25415 rtx_insn *insn;
25416 floats_from_frame += saved_size;
25417 insn = emit_insn (gen_addsi3 (ip_rtx,
25418 hard_frame_pointer_rtx,
25419 GEN_INT (-floats_from_frame)));
25420 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25421 ip_rtx, hard_frame_pointer_rtx);
25422 }
25423
25424 /* Generate VFP register multi-pop. */
25425 start_reg = FIRST_VFP_REGNUM;
25426
25427 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25428 /* Look for a case where a reg does not need restoring. */
25429 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25430 && (!df_regs_ever_live_p (i + 1)
25431 || call_used_regs[i + 1]))
25432 {
25433 if (start_reg != i)
25434 arm_emit_vfp_multi_reg_pop (start_reg,
25435 (i - start_reg) / 2,
25436 gen_rtx_REG (SImode,
25437 IP_REGNUM));
25438 start_reg = i + 2;
25439 }
25440
25441 /* Restore the remaining regs that we have discovered (or possibly
25442 even all of them, if the conditional in the for loop never
25443 fired). */
25444 if (start_reg != i)
25445 arm_emit_vfp_multi_reg_pop (start_reg,
25446 (i - start_reg) / 2,
25447 gen_rtx_REG (SImode, IP_REGNUM));
25448 }
25449
25450 if (TARGET_IWMMXT)
25451 {
25452 /* The frame pointer is guaranteed to be non-double-word aligned, as
25453 it is set to double-word-aligned old_stack_pointer - 4. */
25454 rtx_insn *insn;
25455 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25456
25457 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25458 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25459 {
25460 rtx addr = gen_frame_mem (V2SImode,
25461 plus_constant (Pmode, hard_frame_pointer_rtx,
25462 - lrm_count * 4));
25463 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25464 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25465 gen_rtx_REG (V2SImode, i),
25466 NULL_RTX);
25467 lrm_count += 2;
25468 }
25469 }
25470
25471 /* saved_regs_mask should contain IP which contains old stack pointer
25472 at the time of activation creation. Since SP and IP are adjacent registers,
25473 we can restore the value directly into SP. */
25474 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25475 saved_regs_mask &= ~(1 << IP_REGNUM);
25476 saved_regs_mask |= (1 << SP_REGNUM);
25477
25478 /* There are two registers left in saved_regs_mask - LR and PC. We
25479 only need to restore LR (the return address), but to
25480 save time we can load it directly into PC, unless we need a
25481 special function exit sequence, or we are not really returning. */
25482 if (really_return
25483 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25484 && !crtl->calls_eh_return)
25485 /* Delete LR from the register mask, so that LR on
25486 the stack is loaded into the PC in the register mask. */
25487 saved_regs_mask &= ~(1 << LR_REGNUM);
25488 else
25489 saved_regs_mask &= ~(1 << PC_REGNUM);
25490
25491 num_regs = bit_count (saved_regs_mask);
25492 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25493 {
25494 rtx_insn *insn;
25495 emit_insn (gen_blockage ());
25496 /* Unwind the stack to just below the saved registers. */
25497 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25498 hard_frame_pointer_rtx,
25499 GEN_INT (- 4 * num_regs)));
25500
25501 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25502 stack_pointer_rtx, hard_frame_pointer_rtx);
25503 }
25504
25505 arm_emit_multi_reg_pop (saved_regs_mask);
25506
25507 if (IS_INTERRUPT (func_type))
25508 {
25509 /* Interrupt handlers will have pushed the
25510 IP onto the stack, so restore it now. */
25511 rtx_insn *insn;
25512 rtx addr = gen_rtx_MEM (SImode,
25513 gen_rtx_POST_INC (SImode,
25514 stack_pointer_rtx));
25515 set_mem_alias_set (addr, get_frame_alias_set ());
25516 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25517 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25518 gen_rtx_REG (SImode, IP_REGNUM),
25519 NULL_RTX);
25520 }
25521
25522 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25523 return;
25524
25525 if (crtl->calls_eh_return)
25526 emit_insn (gen_addsi3 (stack_pointer_rtx,
25527 stack_pointer_rtx,
25528 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25529
25530 if (IS_STACKALIGN (func_type))
25531 /* Restore the original stack pointer. Before prologue, the stack was
25532 realigned and the original stack pointer saved in r0. For details,
25533 see comment in arm_expand_prologue. */
25534 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25535
25536 emit_jump_insn (simple_return_rtx);
25537 }
25538
25539 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25540 function is not a sibcall. */
25541 void
25542 arm_expand_epilogue (bool really_return)
25543 {
25544 unsigned long func_type;
25545 unsigned long saved_regs_mask;
25546 int num_regs = 0;
25547 int i;
25548 int amount;
25549 arm_stack_offsets *offsets;
25550
25551 func_type = arm_current_func_type ();
25552
25553 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25554 let output_return_instruction take care of instruction emission if any. */
25555 if (IS_NAKED (func_type)
25556 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25557 {
25558 if (really_return)
25559 emit_jump_insn (simple_return_rtx);
25560 return;
25561 }
25562
25563 /* If we are throwing an exception, then we really must be doing a
25564 return, so we can't tail-call. */
25565 gcc_assert (!crtl->calls_eh_return || really_return);
25566
25567 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25568 {
25569 arm_expand_epilogue_apcs_frame (really_return);
25570 return;
25571 }
25572
25573 /* Get frame offsets for ARM. */
25574 offsets = arm_get_frame_offsets ();
25575 saved_regs_mask = offsets->saved_regs_mask;
25576 num_regs = bit_count (saved_regs_mask);
25577
25578 if (frame_pointer_needed)
25579 {
25580 rtx_insn *insn;
25581 /* Restore stack pointer if necessary. */
25582 if (TARGET_ARM)
25583 {
25584 /* In ARM mode, frame pointer points to first saved register.
25585 Restore stack pointer to last saved register. */
25586 amount = offsets->frame - offsets->saved_regs;
25587
25588 /* Force out any pending memory operations that reference stacked data
25589 before stack de-allocation occurs. */
25590 emit_insn (gen_blockage ());
25591 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25592 hard_frame_pointer_rtx,
25593 GEN_INT (amount)));
25594 arm_add_cfa_adjust_cfa_note (insn, amount,
25595 stack_pointer_rtx,
25596 hard_frame_pointer_rtx);
25597
25598 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25599 deleted. */
25600 emit_insn (gen_force_register_use (stack_pointer_rtx));
25601 }
25602 else
25603 {
25604 /* In Thumb-2 mode, the frame pointer points to the last saved
25605 register. */
25606 amount = offsets->locals_base - offsets->saved_regs;
25607 if (amount)
25608 {
25609 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25610 hard_frame_pointer_rtx,
25611 GEN_INT (amount)));
25612 arm_add_cfa_adjust_cfa_note (insn, amount,
25613 hard_frame_pointer_rtx,
25614 hard_frame_pointer_rtx);
25615 }
25616
25617 /* Force out any pending memory operations that reference stacked data
25618 before stack de-allocation occurs. */
25619 emit_insn (gen_blockage ());
25620 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25621 hard_frame_pointer_rtx));
25622 arm_add_cfa_adjust_cfa_note (insn, 0,
25623 stack_pointer_rtx,
25624 hard_frame_pointer_rtx);
25625 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25626 deleted. */
25627 emit_insn (gen_force_register_use (stack_pointer_rtx));
25628 }
25629 }
25630 else
25631 {
25632 /* Pop off outgoing args and local frame to adjust stack pointer to
25633 last saved register. */
25634 amount = offsets->outgoing_args - offsets->saved_regs;
25635 if (amount)
25636 {
25637 rtx_insn *tmp;
25638 /* Force out any pending memory operations that reference stacked data
25639 before stack de-allocation occurs. */
25640 emit_insn (gen_blockage ());
25641 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25642 stack_pointer_rtx,
25643 GEN_INT (amount)));
25644 arm_add_cfa_adjust_cfa_note (tmp, amount,
25645 stack_pointer_rtx, stack_pointer_rtx);
25646 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25647 not deleted. */
25648 emit_insn (gen_force_register_use (stack_pointer_rtx));
25649 }
25650 }
25651
25652 if (TARGET_HARD_FLOAT)
25653 {
25654 /* Generate VFP register multi-pop. */
25655 int end_reg = LAST_VFP_REGNUM + 1;
25656
25657 /* Scan the registers in reverse order. We need to match
25658 any groupings made in the prologue and generate matching
25659 vldm operations. The need to match groups is because,
25660 unlike pop, vldm can only do consecutive regs. */
25661 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25662 /* Look for a case where a reg does not need restoring. */
25663 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25664 && (!df_regs_ever_live_p (i + 1)
25665 || call_used_regs[i + 1]))
25666 {
25667 /* Restore the regs discovered so far (from reg+2 to
25668 end_reg). */
25669 if (end_reg > i + 2)
25670 arm_emit_vfp_multi_reg_pop (i + 2,
25671 (end_reg - (i + 2)) / 2,
25672 stack_pointer_rtx);
25673 end_reg = i;
25674 }
25675
25676 /* Restore the remaining regs that we have discovered (or possibly
25677 even all of them, if the conditional in the for loop never
25678 fired). */
25679 if (end_reg > i + 2)
25680 arm_emit_vfp_multi_reg_pop (i + 2,
25681 (end_reg - (i + 2)) / 2,
25682 stack_pointer_rtx);
25683 }
25684
25685 if (TARGET_IWMMXT)
25686 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25687 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25688 {
25689 rtx_insn *insn;
25690 rtx addr = gen_rtx_MEM (V2SImode,
25691 gen_rtx_POST_INC (SImode,
25692 stack_pointer_rtx));
25693 set_mem_alias_set (addr, get_frame_alias_set ());
25694 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25695 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25696 gen_rtx_REG (V2SImode, i),
25697 NULL_RTX);
25698 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25699 stack_pointer_rtx, stack_pointer_rtx);
25700 }
25701
25702 if (saved_regs_mask)
25703 {
25704 rtx insn;
25705 bool return_in_pc = false;
25706
25707 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25708 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25709 && !IS_CMSE_ENTRY (func_type)
25710 && !IS_STACKALIGN (func_type)
25711 && really_return
25712 && crtl->args.pretend_args_size == 0
25713 && saved_regs_mask & (1 << LR_REGNUM)
25714 && !crtl->calls_eh_return)
25715 {
25716 saved_regs_mask &= ~(1 << LR_REGNUM);
25717 saved_regs_mask |= (1 << PC_REGNUM);
25718 return_in_pc = true;
25719 }
25720
25721 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25722 {
25723 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25724 if (saved_regs_mask & (1 << i))
25725 {
25726 rtx addr = gen_rtx_MEM (SImode,
25727 gen_rtx_POST_INC (SImode,
25728 stack_pointer_rtx));
25729 set_mem_alias_set (addr, get_frame_alias_set ());
25730
25731 if (i == PC_REGNUM)
25732 {
25733 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25734 XVECEXP (insn, 0, 0) = ret_rtx;
25735 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25736 addr);
25737 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25738 insn = emit_jump_insn (insn);
25739 }
25740 else
25741 {
25742 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25743 addr));
25744 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25745 gen_rtx_REG (SImode, i),
25746 NULL_RTX);
25747 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25748 stack_pointer_rtx,
25749 stack_pointer_rtx);
25750 }
25751 }
25752 }
25753 else
25754 {
25755 if (TARGET_LDRD
25756 && current_tune->prefer_ldrd_strd
25757 && !optimize_function_for_size_p (cfun))
25758 {
25759 if (TARGET_THUMB2)
25760 thumb2_emit_ldrd_pop (saved_regs_mask);
25761 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25762 arm_emit_ldrd_pop (saved_regs_mask);
25763 else
25764 arm_emit_multi_reg_pop (saved_regs_mask);
25765 }
25766 else
25767 arm_emit_multi_reg_pop (saved_regs_mask);
25768 }
25769
25770 if (return_in_pc)
25771 return;
25772 }
25773
25774 amount
25775 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25776 if (amount)
25777 {
25778 int i, j;
25779 rtx dwarf = NULL_RTX;
25780 rtx_insn *tmp =
25781 emit_insn (gen_addsi3 (stack_pointer_rtx,
25782 stack_pointer_rtx,
25783 GEN_INT (amount)));
25784
25785 RTX_FRAME_RELATED_P (tmp) = 1;
25786
25787 if (cfun->machine->uses_anonymous_args)
25788 {
25789 /* Restore pretend args. Refer arm_expand_prologue on how to save
25790 pretend_args in stack. */
25791 int num_regs = crtl->args.pretend_args_size / 4;
25792 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25793 for (j = 0, i = 0; j < num_regs; i++)
25794 if (saved_regs_mask & (1 << i))
25795 {
25796 rtx reg = gen_rtx_REG (SImode, i);
25797 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25798 j++;
25799 }
25800 REG_NOTES (tmp) = dwarf;
25801 }
25802 arm_add_cfa_adjust_cfa_note (tmp, amount,
25803 stack_pointer_rtx, stack_pointer_rtx);
25804 }
25805
25806 /* Clear all caller-saved regs that are not used to return. */
25807 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25808 {
25809 /* CMSE_ENTRY always returns. */
25810 gcc_assert (really_return);
25811 cmse_nonsecure_entry_clear_before_return ();
25812 }
25813
25814 if (!really_return)
25815 return;
25816
25817 if (crtl->calls_eh_return)
25818 emit_insn (gen_addsi3 (stack_pointer_rtx,
25819 stack_pointer_rtx,
25820 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25821
25822 if (IS_STACKALIGN (func_type))
25823 /* Restore the original stack pointer. Before prologue, the stack was
25824 realigned and the original stack pointer saved in r0. For details,
25825 see comment in arm_expand_prologue. */
25826 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25827
25828 emit_jump_insn (simple_return_rtx);
25829 }
25830
25831 /* Implementation of insn prologue_thumb1_interwork. This is the first
25832 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25833
25834 const char *
25835 thumb1_output_interwork (void)
25836 {
25837 const char * name;
25838 FILE *f = asm_out_file;
25839
25840 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25841 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25842 == SYMBOL_REF);
25843 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25844
25845 /* Generate code sequence to switch us into Thumb mode. */
25846 /* The .code 32 directive has already been emitted by
25847 ASM_DECLARE_FUNCTION_NAME. */
25848 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25849 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25850
25851 /* Generate a label, so that the debugger will notice the
25852 change in instruction sets. This label is also used by
25853 the assembler to bypass the ARM code when this function
25854 is called from a Thumb encoded function elsewhere in the
25855 same file. Hence the definition of STUB_NAME here must
25856 agree with the definition in gas/config/tc-arm.c. */
25857
25858 #define STUB_NAME ".real_start_of"
25859
25860 fprintf (f, "\t.code\t16\n");
25861 #ifdef ARM_PE
25862 if (arm_dllexport_name_p (name))
25863 name = arm_strip_name_encoding (name);
25864 #endif
25865 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25866 fprintf (f, "\t.thumb_func\n");
25867 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25868
25869 return "";
25870 }
25871
25872 /* Handle the case of a double word load into a low register from
25873 a computed memory address. The computed address may involve a
25874 register which is overwritten by the load. */
25875 const char *
25876 thumb_load_double_from_address (rtx *operands)
25877 {
25878 rtx addr;
25879 rtx base;
25880 rtx offset;
25881 rtx arg1;
25882 rtx arg2;
25883
25884 gcc_assert (REG_P (operands[0]));
25885 gcc_assert (MEM_P (operands[1]));
25886
25887 /* Get the memory address. */
25888 addr = XEXP (operands[1], 0);
25889
25890 /* Work out how the memory address is computed. */
25891 switch (GET_CODE (addr))
25892 {
25893 case REG:
25894 operands[2] = adjust_address (operands[1], SImode, 4);
25895
25896 if (REGNO (operands[0]) == REGNO (addr))
25897 {
25898 output_asm_insn ("ldr\t%H0, %2", operands);
25899 output_asm_insn ("ldr\t%0, %1", operands);
25900 }
25901 else
25902 {
25903 output_asm_insn ("ldr\t%0, %1", operands);
25904 output_asm_insn ("ldr\t%H0, %2", operands);
25905 }
25906 break;
25907
25908 case CONST:
25909 /* Compute <address> + 4 for the high order load. */
25910 operands[2] = adjust_address (operands[1], SImode, 4);
25911
25912 output_asm_insn ("ldr\t%0, %1", operands);
25913 output_asm_insn ("ldr\t%H0, %2", operands);
25914 break;
25915
25916 case PLUS:
25917 arg1 = XEXP (addr, 0);
25918 arg2 = XEXP (addr, 1);
25919
25920 if (CONSTANT_P (arg1))
25921 base = arg2, offset = arg1;
25922 else
25923 base = arg1, offset = arg2;
25924
25925 gcc_assert (REG_P (base));
25926
25927 /* Catch the case of <address> = <reg> + <reg> */
25928 if (REG_P (offset))
25929 {
25930 int reg_offset = REGNO (offset);
25931 int reg_base = REGNO (base);
25932 int reg_dest = REGNO (operands[0]);
25933
25934 /* Add the base and offset registers together into the
25935 higher destination register. */
25936 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25937 reg_dest + 1, reg_base, reg_offset);
25938
25939 /* Load the lower destination register from the address in
25940 the higher destination register. */
25941 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25942 reg_dest, reg_dest + 1);
25943
25944 /* Load the higher destination register from its own address
25945 plus 4. */
25946 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25947 reg_dest + 1, reg_dest + 1);
25948 }
25949 else
25950 {
25951 /* Compute <address> + 4 for the high order load. */
25952 operands[2] = adjust_address (operands[1], SImode, 4);
25953
25954 /* If the computed address is held in the low order register
25955 then load the high order register first, otherwise always
25956 load the low order register first. */
25957 if (REGNO (operands[0]) == REGNO (base))
25958 {
25959 output_asm_insn ("ldr\t%H0, %2", operands);
25960 output_asm_insn ("ldr\t%0, %1", operands);
25961 }
25962 else
25963 {
25964 output_asm_insn ("ldr\t%0, %1", operands);
25965 output_asm_insn ("ldr\t%H0, %2", operands);
25966 }
25967 }
25968 break;
25969
25970 case LABEL_REF:
25971 /* With no registers to worry about we can just load the value
25972 directly. */
25973 operands[2] = adjust_address (operands[1], SImode, 4);
25974
25975 output_asm_insn ("ldr\t%H0, %2", operands);
25976 output_asm_insn ("ldr\t%0, %1", operands);
25977 break;
25978
25979 default:
25980 gcc_unreachable ();
25981 }
25982
25983 return "";
25984 }
25985
25986 const char *
25987 thumb_output_move_mem_multiple (int n, rtx *operands)
25988 {
25989 switch (n)
25990 {
25991 case 2:
25992 if (REGNO (operands[4]) > REGNO (operands[5]))
25993 std::swap (operands[4], operands[5]);
25994
25995 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25996 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25997 break;
25998
25999 case 3:
26000 if (REGNO (operands[4]) > REGNO (operands[5]))
26001 std::swap (operands[4], operands[5]);
26002 if (REGNO (operands[5]) > REGNO (operands[6]))
26003 std::swap (operands[5], operands[6]);
26004 if (REGNO (operands[4]) > REGNO (operands[5]))
26005 std::swap (operands[4], operands[5]);
26006
26007 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26008 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26009 break;
26010
26011 default:
26012 gcc_unreachable ();
26013 }
26014
26015 return "";
26016 }
26017
26018 /* Output a call-via instruction for thumb state. */
26019 const char *
26020 thumb_call_via_reg (rtx reg)
26021 {
26022 int regno = REGNO (reg);
26023 rtx *labelp;
26024
26025 gcc_assert (regno < LR_REGNUM);
26026
26027 /* If we are in the normal text section we can use a single instance
26028 per compilation unit. If we are doing function sections, then we need
26029 an entry per section, since we can't rely on reachability. */
26030 if (in_section == text_section)
26031 {
26032 thumb_call_reg_needed = 1;
26033
26034 if (thumb_call_via_label[regno] == NULL)
26035 thumb_call_via_label[regno] = gen_label_rtx ();
26036 labelp = thumb_call_via_label + regno;
26037 }
26038 else
26039 {
26040 if (cfun->machine->call_via[regno] == NULL)
26041 cfun->machine->call_via[regno] = gen_label_rtx ();
26042 labelp = cfun->machine->call_via + regno;
26043 }
26044
26045 output_asm_insn ("bl\t%a0", labelp);
26046 return "";
26047 }
26048
26049 /* Routines for generating rtl. */
26050 void
26051 thumb_expand_movmemqi (rtx *operands)
26052 {
26053 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26054 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26055 HOST_WIDE_INT len = INTVAL (operands[2]);
26056 HOST_WIDE_INT offset = 0;
26057
26058 while (len >= 12)
26059 {
26060 emit_insn (gen_movmem12b (out, in, out, in));
26061 len -= 12;
26062 }
26063
26064 if (len >= 8)
26065 {
26066 emit_insn (gen_movmem8b (out, in, out, in));
26067 len -= 8;
26068 }
26069
26070 if (len >= 4)
26071 {
26072 rtx reg = gen_reg_rtx (SImode);
26073 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26074 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26075 len -= 4;
26076 offset += 4;
26077 }
26078
26079 if (len >= 2)
26080 {
26081 rtx reg = gen_reg_rtx (HImode);
26082 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26083 plus_constant (Pmode, in,
26084 offset))));
26085 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26086 offset)),
26087 reg));
26088 len -= 2;
26089 offset += 2;
26090 }
26091
26092 if (len)
26093 {
26094 rtx reg = gen_reg_rtx (QImode);
26095 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26096 plus_constant (Pmode, in,
26097 offset))));
26098 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26099 offset)),
26100 reg));
26101 }
26102 }
26103
26104 void
26105 thumb_reload_out_hi (rtx *operands)
26106 {
26107 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26108 }
26109
26110 /* Return the length of a function name prefix
26111 that starts with the character 'c'. */
26112 static int
26113 arm_get_strip_length (int c)
26114 {
26115 switch (c)
26116 {
26117 ARM_NAME_ENCODING_LENGTHS
26118 default: return 0;
26119 }
26120 }
26121
26122 /* Return a pointer to a function's name with any
26123 and all prefix encodings stripped from it. */
26124 const char *
26125 arm_strip_name_encoding (const char *name)
26126 {
26127 int skip;
26128
26129 while ((skip = arm_get_strip_length (* name)))
26130 name += skip;
26131
26132 return name;
26133 }
26134
26135 /* If there is a '*' anywhere in the name's prefix, then
26136 emit the stripped name verbatim, otherwise prepend an
26137 underscore if leading underscores are being used. */
26138 void
26139 arm_asm_output_labelref (FILE *stream, const char *name)
26140 {
26141 int skip;
26142 int verbatim = 0;
26143
26144 while ((skip = arm_get_strip_length (* name)))
26145 {
26146 verbatim |= (*name == '*');
26147 name += skip;
26148 }
26149
26150 if (verbatim)
26151 fputs (name, stream);
26152 else
26153 asm_fprintf (stream, "%U%s", name);
26154 }
26155
26156 /* This function is used to emit an EABI tag and its associated value.
26157 We emit the numerical value of the tag in case the assembler does not
26158 support textual tags. (Eg gas prior to 2.20). If requested we include
26159 the tag name in a comment so that anyone reading the assembler output
26160 will know which tag is being set.
26161
26162 This function is not static because arm-c.c needs it too. */
26163
26164 void
26165 arm_emit_eabi_attribute (const char *name, int num, int val)
26166 {
26167 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26168 if (flag_verbose_asm || flag_debug_asm)
26169 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26170 asm_fprintf (asm_out_file, "\n");
26171 }
26172
26173 /* This function is used to print CPU tuning information as comment
26174 in assembler file. Pointers are not printed for now. */
26175
26176 void
26177 arm_print_tune_info (void)
26178 {
26179 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26180 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26181 current_tune->constant_limit);
26182 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26183 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26184 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26185 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26186 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26187 "prefetch.l1_cache_size:\t%d\n",
26188 current_tune->prefetch.l1_cache_size);
26189 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26190 "prefetch.l1_cache_line_size:\t%d\n",
26191 current_tune->prefetch.l1_cache_line_size);
26192 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26193 "prefer_constant_pool:\t%d\n",
26194 (int) current_tune->prefer_constant_pool);
26195 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26196 "branch_cost:\t(s:speed, p:predictable)\n");
26197 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26198 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26199 current_tune->branch_cost (false, false));
26200 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26201 current_tune->branch_cost (false, true));
26202 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26203 current_tune->branch_cost (true, false));
26204 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26205 current_tune->branch_cost (true, true));
26206 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26207 "prefer_ldrd_strd:\t%d\n",
26208 (int) current_tune->prefer_ldrd_strd);
26209 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26210 "logical_op_non_short_circuit:\t[%d,%d]\n",
26211 (int) current_tune->logical_op_non_short_circuit_thumb,
26212 (int) current_tune->logical_op_non_short_circuit_arm);
26213 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26214 "prefer_neon_for_64bits:\t%d\n",
26215 (int) current_tune->prefer_neon_for_64bits);
26216 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26217 "disparage_flag_setting_t16_encodings:\t%d\n",
26218 (int) current_tune->disparage_flag_setting_t16_encodings);
26219 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26220 "string_ops_prefer_neon:\t%d\n",
26221 (int) current_tune->string_ops_prefer_neon);
26222 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26223 "max_insns_inline_memset:\t%d\n",
26224 current_tune->max_insns_inline_memset);
26225 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26226 current_tune->fusible_ops);
26227 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26228 (int) current_tune->sched_autopref);
26229 }
26230
26231 /* Print .arch and .arch_extension directives corresponding to the
26232 current architecture configuration. */
26233 static void
26234 arm_print_asm_arch_directives ()
26235 {
26236 const arch_option *arch
26237 = arm_parse_arch_option_name (all_architectures, "-march",
26238 arm_active_target.arch_name);
26239 auto_sbitmap opt_bits (isa_num_bits);
26240
26241 gcc_assert (arch);
26242
26243 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26244 if (!arch->common.extensions)
26245 return;
26246
26247 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26248 opt->name != NULL;
26249 opt++)
26250 {
26251 if (!opt->remove)
26252 {
26253 arm_initialize_isa (opt_bits, opt->isa_bits);
26254
26255 /* If every feature bit of this option is set in the target
26256 ISA specification, print out the option name. However,
26257 don't print anything if all the bits are part of the
26258 FPU specification. */
26259 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26260 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26261 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26262 }
26263 }
26264 }
26265
26266 static void
26267 arm_file_start (void)
26268 {
26269 int val;
26270
26271 if (TARGET_BPABI)
26272 {
26273 /* We don't have a specified CPU. Use the architecture to
26274 generate the tags.
26275
26276 Note: it might be better to do this unconditionally, then the
26277 assembler would not need to know about all new CPU names as
26278 they are added. */
26279 if (!arm_active_target.core_name)
26280 {
26281 /* armv7ve doesn't support any extensions. */
26282 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26283 {
26284 /* Keep backward compatability for assemblers
26285 which don't support armv7ve. */
26286 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26287 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26288 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26289 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26290 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26291 }
26292 else
26293 arm_print_asm_arch_directives ();
26294 }
26295 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26296 asm_fprintf (asm_out_file, "\t.arch %s\n",
26297 arm_active_target.core_name + 8);
26298 else
26299 {
26300 const char* truncated_name
26301 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26302 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26303 }
26304
26305 if (print_tune_info)
26306 arm_print_tune_info ();
26307
26308 if (! TARGET_SOFT_FLOAT)
26309 {
26310 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26311 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26312
26313 if (TARGET_HARD_FLOAT_ABI)
26314 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26315 }
26316
26317 /* Some of these attributes only apply when the corresponding features
26318 are used. However we don't have any easy way of figuring this out.
26319 Conservatively record the setting that would have been used. */
26320
26321 if (flag_rounding_math)
26322 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26323
26324 if (!flag_unsafe_math_optimizations)
26325 {
26326 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26327 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26328 }
26329 if (flag_signaling_nans)
26330 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26331
26332 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26333 flag_finite_math_only ? 1 : 3);
26334
26335 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26336 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26337 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26338 flag_short_enums ? 1 : 2);
26339
26340 /* Tag_ABI_optimization_goals. */
26341 if (optimize_size)
26342 val = 4;
26343 else if (optimize >= 2)
26344 val = 2;
26345 else if (optimize)
26346 val = 1;
26347 else
26348 val = 6;
26349 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26350
26351 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26352 unaligned_access);
26353
26354 if (arm_fp16_format)
26355 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26356 (int) arm_fp16_format);
26357
26358 if (arm_lang_output_object_attributes_hook)
26359 arm_lang_output_object_attributes_hook();
26360 }
26361
26362 default_file_start ();
26363 }
26364
26365 static void
26366 arm_file_end (void)
26367 {
26368 int regno;
26369
26370 if (NEED_INDICATE_EXEC_STACK)
26371 /* Add .note.GNU-stack. */
26372 file_end_indicate_exec_stack ();
26373
26374 if (! thumb_call_reg_needed)
26375 return;
26376
26377 switch_to_section (text_section);
26378 asm_fprintf (asm_out_file, "\t.code 16\n");
26379 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26380
26381 for (regno = 0; regno < LR_REGNUM; regno++)
26382 {
26383 rtx label = thumb_call_via_label[regno];
26384
26385 if (label != 0)
26386 {
26387 targetm.asm_out.internal_label (asm_out_file, "L",
26388 CODE_LABEL_NUMBER (label));
26389 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26390 }
26391 }
26392 }
26393
26394 #ifndef ARM_PE
26395 /* Symbols in the text segment can be accessed without indirecting via the
26396 constant pool; it may take an extra binary operation, but this is still
26397 faster than indirecting via memory. Don't do this when not optimizing,
26398 since we won't be calculating al of the offsets necessary to do this
26399 simplification. */
26400
26401 static void
26402 arm_encode_section_info (tree decl, rtx rtl, int first)
26403 {
26404 if (optimize > 0 && TREE_CONSTANT (decl))
26405 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26406
26407 default_encode_section_info (decl, rtl, first);
26408 }
26409 #endif /* !ARM_PE */
26410
26411 static void
26412 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26413 {
26414 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26415 && !strcmp (prefix, "L"))
26416 {
26417 arm_ccfsm_state = 0;
26418 arm_target_insn = NULL;
26419 }
26420 default_internal_label (stream, prefix, labelno);
26421 }
26422
26423 /* Output code to add DELTA to the first argument, and then jump
26424 to FUNCTION. Used for C++ multiple inheritance. */
26425
26426 static void
26427 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26428 HOST_WIDE_INT, tree function)
26429 {
26430 static int thunk_label = 0;
26431 char label[256];
26432 char labelpc[256];
26433 int mi_delta = delta;
26434 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26435 int shift = 0;
26436 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26437 ? 1 : 0);
26438 if (mi_delta < 0)
26439 mi_delta = - mi_delta;
26440
26441 final_start_function (emit_barrier (), file, 1);
26442
26443 if (TARGET_THUMB1)
26444 {
26445 int labelno = thunk_label++;
26446 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26447 /* Thunks are entered in arm mode when available. */
26448 if (TARGET_THUMB1_ONLY)
26449 {
26450 /* push r3 so we can use it as a temporary. */
26451 /* TODO: Omit this save if r3 is not used. */
26452 fputs ("\tpush {r3}\n", file);
26453 fputs ("\tldr\tr3, ", file);
26454 }
26455 else
26456 {
26457 fputs ("\tldr\tr12, ", file);
26458 }
26459 assemble_name (file, label);
26460 fputc ('\n', file);
26461 if (flag_pic)
26462 {
26463 /* If we are generating PIC, the ldr instruction below loads
26464 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26465 the address of the add + 8, so we have:
26466
26467 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26468 = target + 1.
26469
26470 Note that we have "+ 1" because some versions of GNU ld
26471 don't set the low bit of the result for R_ARM_REL32
26472 relocations against thumb function symbols.
26473 On ARMv6M this is +4, not +8. */
26474 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26475 assemble_name (file, labelpc);
26476 fputs (":\n", file);
26477 if (TARGET_THUMB1_ONLY)
26478 {
26479 /* This is 2 insns after the start of the thunk, so we know it
26480 is 4-byte aligned. */
26481 fputs ("\tadd\tr3, pc, r3\n", file);
26482 fputs ("\tmov r12, r3\n", file);
26483 }
26484 else
26485 fputs ("\tadd\tr12, pc, r12\n", file);
26486 }
26487 else if (TARGET_THUMB1_ONLY)
26488 fputs ("\tmov r12, r3\n", file);
26489 }
26490 if (TARGET_THUMB1_ONLY)
26491 {
26492 if (mi_delta > 255)
26493 {
26494 fputs ("\tldr\tr3, ", file);
26495 assemble_name (file, label);
26496 fputs ("+4\n", file);
26497 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26498 mi_op, this_regno, this_regno);
26499 }
26500 else if (mi_delta != 0)
26501 {
26502 /* Thumb1 unified syntax requires s suffix in instruction name when
26503 one of the operands is immediate. */
26504 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26505 mi_op, this_regno, this_regno,
26506 mi_delta);
26507 }
26508 }
26509 else
26510 {
26511 /* TODO: Use movw/movt for large constants when available. */
26512 while (mi_delta != 0)
26513 {
26514 if ((mi_delta & (3 << shift)) == 0)
26515 shift += 2;
26516 else
26517 {
26518 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26519 mi_op, this_regno, this_regno,
26520 mi_delta & (0xff << shift));
26521 mi_delta &= ~(0xff << shift);
26522 shift += 8;
26523 }
26524 }
26525 }
26526 if (TARGET_THUMB1)
26527 {
26528 if (TARGET_THUMB1_ONLY)
26529 fputs ("\tpop\t{r3}\n", file);
26530
26531 fprintf (file, "\tbx\tr12\n");
26532 ASM_OUTPUT_ALIGN (file, 2);
26533 assemble_name (file, label);
26534 fputs (":\n", file);
26535 if (flag_pic)
26536 {
26537 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26538 rtx tem = XEXP (DECL_RTL (function), 0);
26539 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26540 pipeline offset is four rather than eight. Adjust the offset
26541 accordingly. */
26542 tem = plus_constant (GET_MODE (tem), tem,
26543 TARGET_THUMB1_ONLY ? -3 : -7);
26544 tem = gen_rtx_MINUS (GET_MODE (tem),
26545 tem,
26546 gen_rtx_SYMBOL_REF (Pmode,
26547 ggc_strdup (labelpc)));
26548 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26549 }
26550 else
26551 /* Output ".word .LTHUNKn". */
26552 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26553
26554 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26555 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26556 }
26557 else
26558 {
26559 fputs ("\tb\t", file);
26560 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26561 if (NEED_PLT_RELOC)
26562 fputs ("(PLT)", file);
26563 fputc ('\n', file);
26564 }
26565
26566 final_end_function ();
26567 }
26568
26569 /* MI thunk handling for TARGET_32BIT. */
26570
26571 static void
26572 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26573 HOST_WIDE_INT vcall_offset, tree function)
26574 {
26575 /* On ARM, this_regno is R0 or R1 depending on
26576 whether the function returns an aggregate or not.
26577 */
26578 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26579 function)
26580 ? R1_REGNUM : R0_REGNUM);
26581
26582 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26583 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26584 reload_completed = 1;
26585 emit_note (NOTE_INSN_PROLOGUE_END);
26586
26587 /* Add DELTA to THIS_RTX. */
26588 if (delta != 0)
26589 arm_split_constant (PLUS, Pmode, NULL_RTX,
26590 delta, this_rtx, this_rtx, false);
26591
26592 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26593 if (vcall_offset != 0)
26594 {
26595 /* Load *THIS_RTX. */
26596 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26597 /* Compute *THIS_RTX + VCALL_OFFSET. */
26598 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26599 false);
26600 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26601 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26602 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26603 }
26604
26605 /* Generate a tail call to the target function. */
26606 if (!TREE_USED (function))
26607 {
26608 assemble_external (function);
26609 TREE_USED (function) = 1;
26610 }
26611 rtx funexp = XEXP (DECL_RTL (function), 0);
26612 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26613 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26614 SIBLING_CALL_P (insn) = 1;
26615
26616 insn = get_insns ();
26617 shorten_branches (insn);
26618 final_start_function (insn, file, 1);
26619 final (insn, file, 1);
26620 final_end_function ();
26621
26622 /* Stop pretending this is a post-reload pass. */
26623 reload_completed = 0;
26624 }
26625
26626 /* Output code to add DELTA to the first argument, and then jump
26627 to FUNCTION. Used for C++ multiple inheritance. */
26628
26629 static void
26630 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26631 HOST_WIDE_INT vcall_offset, tree function)
26632 {
26633 if (TARGET_32BIT)
26634 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26635 else
26636 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26637 }
26638
26639 int
26640 arm_emit_vector_const (FILE *file, rtx x)
26641 {
26642 int i;
26643 const char * pattern;
26644
26645 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26646
26647 switch (GET_MODE (x))
26648 {
26649 case E_V2SImode: pattern = "%08x"; break;
26650 case E_V4HImode: pattern = "%04x"; break;
26651 case E_V8QImode: pattern = "%02x"; break;
26652 default: gcc_unreachable ();
26653 }
26654
26655 fprintf (file, "0x");
26656 for (i = CONST_VECTOR_NUNITS (x); i--;)
26657 {
26658 rtx element;
26659
26660 element = CONST_VECTOR_ELT (x, i);
26661 fprintf (file, pattern, INTVAL (element));
26662 }
26663
26664 return 1;
26665 }
26666
26667 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26668 HFmode constant pool entries are actually loaded with ldr. */
26669 void
26670 arm_emit_fp16_const (rtx c)
26671 {
26672 long bits;
26673
26674 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26675 if (WORDS_BIG_ENDIAN)
26676 assemble_zeros (2);
26677 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26678 if (!WORDS_BIG_ENDIAN)
26679 assemble_zeros (2);
26680 }
26681
26682 const char *
26683 arm_output_load_gr (rtx *operands)
26684 {
26685 rtx reg;
26686 rtx offset;
26687 rtx wcgr;
26688 rtx sum;
26689
26690 if (!MEM_P (operands [1])
26691 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26692 || !REG_P (reg = XEXP (sum, 0))
26693 || !CONST_INT_P (offset = XEXP (sum, 1))
26694 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26695 return "wldrw%?\t%0, %1";
26696
26697 /* Fix up an out-of-range load of a GR register. */
26698 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26699 wcgr = operands[0];
26700 operands[0] = reg;
26701 output_asm_insn ("ldr%?\t%0, %1", operands);
26702
26703 operands[0] = wcgr;
26704 operands[1] = reg;
26705 output_asm_insn ("tmcr%?\t%0, %1", operands);
26706 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26707
26708 return "";
26709 }
26710
26711 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26712
26713 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26714 named arg and all anonymous args onto the stack.
26715 XXX I know the prologue shouldn't be pushing registers, but it is faster
26716 that way. */
26717
26718 static void
26719 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26720 machine_mode mode,
26721 tree type,
26722 int *pretend_size,
26723 int second_time ATTRIBUTE_UNUSED)
26724 {
26725 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26726 int nregs;
26727
26728 cfun->machine->uses_anonymous_args = 1;
26729 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26730 {
26731 nregs = pcum->aapcs_ncrn;
26732 if (nregs & 1)
26733 {
26734 int res = arm_needs_doubleword_align (mode, type);
26735 if (res < 0 && warn_psabi)
26736 inform (input_location, "parameter passing for argument of "
26737 "type %qT changed in GCC 7.1", type);
26738 else if (res > 0)
26739 nregs++;
26740 }
26741 }
26742 else
26743 nregs = pcum->nregs;
26744
26745 if (nregs < NUM_ARG_REGS)
26746 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26747 }
26748
26749 /* We can't rely on the caller doing the proper promotion when
26750 using APCS or ATPCS. */
26751
26752 static bool
26753 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26754 {
26755 return !TARGET_AAPCS_BASED;
26756 }
26757
26758 static machine_mode
26759 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26760 machine_mode mode,
26761 int *punsignedp ATTRIBUTE_UNUSED,
26762 const_tree fntype ATTRIBUTE_UNUSED,
26763 int for_return ATTRIBUTE_UNUSED)
26764 {
26765 if (GET_MODE_CLASS (mode) == MODE_INT
26766 && GET_MODE_SIZE (mode) < 4)
26767 return SImode;
26768
26769 return mode;
26770 }
26771
26772
26773 static bool
26774 arm_default_short_enums (void)
26775 {
26776 return ARM_DEFAULT_SHORT_ENUMS;
26777 }
26778
26779
26780 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26781
26782 static bool
26783 arm_align_anon_bitfield (void)
26784 {
26785 return TARGET_AAPCS_BASED;
26786 }
26787
26788
26789 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26790
26791 static tree
26792 arm_cxx_guard_type (void)
26793 {
26794 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26795 }
26796
26797
26798 /* The EABI says test the least significant bit of a guard variable. */
26799
26800 static bool
26801 arm_cxx_guard_mask_bit (void)
26802 {
26803 return TARGET_AAPCS_BASED;
26804 }
26805
26806
26807 /* The EABI specifies that all array cookies are 8 bytes long. */
26808
26809 static tree
26810 arm_get_cookie_size (tree type)
26811 {
26812 tree size;
26813
26814 if (!TARGET_AAPCS_BASED)
26815 return default_cxx_get_cookie_size (type);
26816
26817 size = build_int_cst (sizetype, 8);
26818 return size;
26819 }
26820
26821
26822 /* The EABI says that array cookies should also contain the element size. */
26823
26824 static bool
26825 arm_cookie_has_size (void)
26826 {
26827 return TARGET_AAPCS_BASED;
26828 }
26829
26830
26831 /* The EABI says constructors and destructors should return a pointer to
26832 the object constructed/destroyed. */
26833
26834 static bool
26835 arm_cxx_cdtor_returns_this (void)
26836 {
26837 return TARGET_AAPCS_BASED;
26838 }
26839
26840 /* The EABI says that an inline function may never be the key
26841 method. */
26842
26843 static bool
26844 arm_cxx_key_method_may_be_inline (void)
26845 {
26846 return !TARGET_AAPCS_BASED;
26847 }
26848
26849 static void
26850 arm_cxx_determine_class_data_visibility (tree decl)
26851 {
26852 if (!TARGET_AAPCS_BASED
26853 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26854 return;
26855
26856 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26857 is exported. However, on systems without dynamic vague linkage,
26858 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26859 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26860 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26861 else
26862 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26863 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26864 }
26865
26866 static bool
26867 arm_cxx_class_data_always_comdat (void)
26868 {
26869 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26870 vague linkage if the class has no key function. */
26871 return !TARGET_AAPCS_BASED;
26872 }
26873
26874
26875 /* The EABI says __aeabi_atexit should be used to register static
26876 destructors. */
26877
26878 static bool
26879 arm_cxx_use_aeabi_atexit (void)
26880 {
26881 return TARGET_AAPCS_BASED;
26882 }
26883
26884
26885 void
26886 arm_set_return_address (rtx source, rtx scratch)
26887 {
26888 arm_stack_offsets *offsets;
26889 HOST_WIDE_INT delta;
26890 rtx addr, mem;
26891 unsigned long saved_regs;
26892
26893 offsets = arm_get_frame_offsets ();
26894 saved_regs = offsets->saved_regs_mask;
26895
26896 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26897 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26898 else
26899 {
26900 if (frame_pointer_needed)
26901 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26902 else
26903 {
26904 /* LR will be the first saved register. */
26905 delta = offsets->outgoing_args - (offsets->frame + 4);
26906
26907
26908 if (delta >= 4096)
26909 {
26910 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26911 GEN_INT (delta & ~4095)));
26912 addr = scratch;
26913 delta &= 4095;
26914 }
26915 else
26916 addr = stack_pointer_rtx;
26917
26918 addr = plus_constant (Pmode, addr, delta);
26919 }
26920
26921 /* The store needs to be marked to prevent DSE from deleting
26922 it as dead if it is based on fp. */
26923 mem = gen_frame_mem (Pmode, addr);
26924 MEM_VOLATILE_P (mem) = true;
26925 emit_move_insn (mem, source);
26926 }
26927 }
26928
26929
26930 void
26931 thumb_set_return_address (rtx source, rtx scratch)
26932 {
26933 arm_stack_offsets *offsets;
26934 HOST_WIDE_INT delta;
26935 HOST_WIDE_INT limit;
26936 int reg;
26937 rtx addr, mem;
26938 unsigned long mask;
26939
26940 emit_use (source);
26941
26942 offsets = arm_get_frame_offsets ();
26943 mask = offsets->saved_regs_mask;
26944 if (mask & (1 << LR_REGNUM))
26945 {
26946 limit = 1024;
26947 /* Find the saved regs. */
26948 if (frame_pointer_needed)
26949 {
26950 delta = offsets->soft_frame - offsets->saved_args;
26951 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26952 if (TARGET_THUMB1)
26953 limit = 128;
26954 }
26955 else
26956 {
26957 delta = offsets->outgoing_args - offsets->saved_args;
26958 reg = SP_REGNUM;
26959 }
26960 /* Allow for the stack frame. */
26961 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26962 delta -= 16;
26963 /* The link register is always the first saved register. */
26964 delta -= 4;
26965
26966 /* Construct the address. */
26967 addr = gen_rtx_REG (SImode, reg);
26968 if (delta > limit)
26969 {
26970 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26971 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26972 addr = scratch;
26973 }
26974 else
26975 addr = plus_constant (Pmode, addr, delta);
26976
26977 /* The store needs to be marked to prevent DSE from deleting
26978 it as dead if it is based on fp. */
26979 mem = gen_frame_mem (Pmode, addr);
26980 MEM_VOLATILE_P (mem) = true;
26981 emit_move_insn (mem, source);
26982 }
26983 else
26984 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26985 }
26986
26987 /* Implements target hook vector_mode_supported_p. */
26988 bool
26989 arm_vector_mode_supported_p (machine_mode mode)
26990 {
26991 /* Neon also supports V2SImode, etc. listed in the clause below. */
26992 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26993 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26994 || mode == V2DImode || mode == V8HFmode))
26995 return true;
26996
26997 if ((TARGET_NEON || TARGET_IWMMXT)
26998 && ((mode == V2SImode)
26999 || (mode == V4HImode)
27000 || (mode == V8QImode)))
27001 return true;
27002
27003 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27004 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27005 || mode == V2HAmode))
27006 return true;
27007
27008 return false;
27009 }
27010
27011 /* Implements target hook array_mode_supported_p. */
27012
27013 static bool
27014 arm_array_mode_supported_p (machine_mode mode,
27015 unsigned HOST_WIDE_INT nelems)
27016 {
27017 if (TARGET_NEON
27018 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27019 && (nelems >= 2 && nelems <= 4))
27020 return true;
27021
27022 return false;
27023 }
27024
27025 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27026 registers when autovectorizing for Neon, at least until multiple vector
27027 widths are supported properly by the middle-end. */
27028
27029 static machine_mode
27030 arm_preferred_simd_mode (scalar_mode mode)
27031 {
27032 if (TARGET_NEON)
27033 switch (mode)
27034 {
27035 case E_SFmode:
27036 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27037 case E_SImode:
27038 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27039 case E_HImode:
27040 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27041 case E_QImode:
27042 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27043 case E_DImode:
27044 if (!TARGET_NEON_VECTORIZE_DOUBLE)
27045 return V2DImode;
27046 break;
27047
27048 default:;
27049 }
27050
27051 if (TARGET_REALLY_IWMMXT)
27052 switch (mode)
27053 {
27054 case E_SImode:
27055 return V2SImode;
27056 case E_HImode:
27057 return V4HImode;
27058 case E_QImode:
27059 return V8QImode;
27060
27061 default:;
27062 }
27063
27064 return word_mode;
27065 }
27066
27067 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27068
27069 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27070 using r0-r4 for function arguments, r7 for the stack frame and don't have
27071 enough left over to do doubleword arithmetic. For Thumb-2 all the
27072 potentially problematic instructions accept high registers so this is not
27073 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27074 that require many low registers. */
27075 static bool
27076 arm_class_likely_spilled_p (reg_class_t rclass)
27077 {
27078 if ((TARGET_THUMB1 && rclass == LO_REGS)
27079 || rclass == CC_REG)
27080 return true;
27081
27082 return false;
27083 }
27084
27085 /* Implements target hook small_register_classes_for_mode_p. */
27086 bool
27087 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27088 {
27089 return TARGET_THUMB1;
27090 }
27091
27092 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27093 ARM insns and therefore guarantee that the shift count is modulo 256.
27094 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27095 guarantee no particular behavior for out-of-range counts. */
27096
27097 static unsigned HOST_WIDE_INT
27098 arm_shift_truncation_mask (machine_mode mode)
27099 {
27100 return mode == SImode ? 255 : 0;
27101 }
27102
27103
27104 /* Map internal gcc register numbers to DWARF2 register numbers. */
27105
27106 unsigned int
27107 arm_dbx_register_number (unsigned int regno)
27108 {
27109 if (regno < 16)
27110 return regno;
27111
27112 if (IS_VFP_REGNUM (regno))
27113 {
27114 /* See comment in arm_dwarf_register_span. */
27115 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27116 return 64 + regno - FIRST_VFP_REGNUM;
27117 else
27118 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27119 }
27120
27121 if (IS_IWMMXT_GR_REGNUM (regno))
27122 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27123
27124 if (IS_IWMMXT_REGNUM (regno))
27125 return 112 + regno - FIRST_IWMMXT_REGNUM;
27126
27127 return DWARF_FRAME_REGISTERS;
27128 }
27129
27130 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27131 GCC models tham as 64 32-bit registers, so we need to describe this to
27132 the DWARF generation code. Other registers can use the default. */
27133 static rtx
27134 arm_dwarf_register_span (rtx rtl)
27135 {
27136 machine_mode mode;
27137 unsigned regno;
27138 rtx parts[16];
27139 int nregs;
27140 int i;
27141
27142 regno = REGNO (rtl);
27143 if (!IS_VFP_REGNUM (regno))
27144 return NULL_RTX;
27145
27146 /* XXX FIXME: The EABI defines two VFP register ranges:
27147 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27148 256-287: D0-D31
27149 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27150 corresponding D register. Until GDB supports this, we shall use the
27151 legacy encodings. We also use these encodings for D0-D15 for
27152 compatibility with older debuggers. */
27153 mode = GET_MODE (rtl);
27154 if (GET_MODE_SIZE (mode) < 8)
27155 return NULL_RTX;
27156
27157 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27158 {
27159 nregs = GET_MODE_SIZE (mode) / 4;
27160 for (i = 0; i < nregs; i += 2)
27161 if (TARGET_BIG_END)
27162 {
27163 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27164 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27165 }
27166 else
27167 {
27168 parts[i] = gen_rtx_REG (SImode, regno + i);
27169 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27170 }
27171 }
27172 else
27173 {
27174 nregs = GET_MODE_SIZE (mode) / 8;
27175 for (i = 0; i < nregs; i++)
27176 parts[i] = gen_rtx_REG (DImode, regno + i);
27177 }
27178
27179 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27180 }
27181
27182 #if ARM_UNWIND_INFO
27183 /* Emit unwind directives for a store-multiple instruction or stack pointer
27184 push during alignment.
27185 These should only ever be generated by the function prologue code, so
27186 expect them to have a particular form.
27187 The store-multiple instruction sometimes pushes pc as the last register,
27188 although it should not be tracked into unwind information, or for -Os
27189 sometimes pushes some dummy registers before first register that needs
27190 to be tracked in unwind information; such dummy registers are there just
27191 to avoid separate stack adjustment, and will not be restored in the
27192 epilogue. */
27193
27194 static void
27195 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27196 {
27197 int i;
27198 HOST_WIDE_INT offset;
27199 HOST_WIDE_INT nregs;
27200 int reg_size;
27201 unsigned reg;
27202 unsigned lastreg;
27203 unsigned padfirst = 0, padlast = 0;
27204 rtx e;
27205
27206 e = XVECEXP (p, 0, 0);
27207 gcc_assert (GET_CODE (e) == SET);
27208
27209 /* First insn will adjust the stack pointer. */
27210 gcc_assert (GET_CODE (e) == SET
27211 && REG_P (SET_DEST (e))
27212 && REGNO (SET_DEST (e)) == SP_REGNUM
27213 && GET_CODE (SET_SRC (e)) == PLUS);
27214
27215 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27216 nregs = XVECLEN (p, 0) - 1;
27217 gcc_assert (nregs);
27218
27219 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27220 if (reg < 16)
27221 {
27222 /* For -Os dummy registers can be pushed at the beginning to
27223 avoid separate stack pointer adjustment. */
27224 e = XVECEXP (p, 0, 1);
27225 e = XEXP (SET_DEST (e), 0);
27226 if (GET_CODE (e) == PLUS)
27227 padfirst = INTVAL (XEXP (e, 1));
27228 gcc_assert (padfirst == 0 || optimize_size);
27229 /* The function prologue may also push pc, but not annotate it as it is
27230 never restored. We turn this into a stack pointer adjustment. */
27231 e = XVECEXP (p, 0, nregs);
27232 e = XEXP (SET_DEST (e), 0);
27233 if (GET_CODE (e) == PLUS)
27234 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27235 else
27236 padlast = offset - 4;
27237 gcc_assert (padlast == 0 || padlast == 4);
27238 if (padlast == 4)
27239 fprintf (asm_out_file, "\t.pad #4\n");
27240 reg_size = 4;
27241 fprintf (asm_out_file, "\t.save {");
27242 }
27243 else if (IS_VFP_REGNUM (reg))
27244 {
27245 reg_size = 8;
27246 fprintf (asm_out_file, "\t.vsave {");
27247 }
27248 else
27249 /* Unknown register type. */
27250 gcc_unreachable ();
27251
27252 /* If the stack increment doesn't match the size of the saved registers,
27253 something has gone horribly wrong. */
27254 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27255
27256 offset = padfirst;
27257 lastreg = 0;
27258 /* The remaining insns will describe the stores. */
27259 for (i = 1; i <= nregs; i++)
27260 {
27261 /* Expect (set (mem <addr>) (reg)).
27262 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27263 e = XVECEXP (p, 0, i);
27264 gcc_assert (GET_CODE (e) == SET
27265 && MEM_P (SET_DEST (e))
27266 && REG_P (SET_SRC (e)));
27267
27268 reg = REGNO (SET_SRC (e));
27269 gcc_assert (reg >= lastreg);
27270
27271 if (i != 1)
27272 fprintf (asm_out_file, ", ");
27273 /* We can't use %r for vfp because we need to use the
27274 double precision register names. */
27275 if (IS_VFP_REGNUM (reg))
27276 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27277 else
27278 asm_fprintf (asm_out_file, "%r", reg);
27279
27280 if (flag_checking)
27281 {
27282 /* Check that the addresses are consecutive. */
27283 e = XEXP (SET_DEST (e), 0);
27284 if (GET_CODE (e) == PLUS)
27285 gcc_assert (REG_P (XEXP (e, 0))
27286 && REGNO (XEXP (e, 0)) == SP_REGNUM
27287 && CONST_INT_P (XEXP (e, 1))
27288 && offset == INTVAL (XEXP (e, 1)));
27289 else
27290 gcc_assert (i == 1
27291 && REG_P (e)
27292 && REGNO (e) == SP_REGNUM);
27293 offset += reg_size;
27294 }
27295 }
27296 fprintf (asm_out_file, "}\n");
27297 if (padfirst)
27298 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27299 }
27300
27301 /* Emit unwind directives for a SET. */
27302
27303 static void
27304 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27305 {
27306 rtx e0;
27307 rtx e1;
27308 unsigned reg;
27309
27310 e0 = XEXP (p, 0);
27311 e1 = XEXP (p, 1);
27312 switch (GET_CODE (e0))
27313 {
27314 case MEM:
27315 /* Pushing a single register. */
27316 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27317 || !REG_P (XEXP (XEXP (e0, 0), 0))
27318 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27319 abort ();
27320
27321 asm_fprintf (asm_out_file, "\t.save ");
27322 if (IS_VFP_REGNUM (REGNO (e1)))
27323 asm_fprintf(asm_out_file, "{d%d}\n",
27324 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27325 else
27326 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27327 break;
27328
27329 case REG:
27330 if (REGNO (e0) == SP_REGNUM)
27331 {
27332 /* A stack increment. */
27333 if (GET_CODE (e1) != PLUS
27334 || !REG_P (XEXP (e1, 0))
27335 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27336 || !CONST_INT_P (XEXP (e1, 1)))
27337 abort ();
27338
27339 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27340 -INTVAL (XEXP (e1, 1)));
27341 }
27342 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27343 {
27344 HOST_WIDE_INT offset;
27345
27346 if (GET_CODE (e1) == PLUS)
27347 {
27348 if (!REG_P (XEXP (e1, 0))
27349 || !CONST_INT_P (XEXP (e1, 1)))
27350 abort ();
27351 reg = REGNO (XEXP (e1, 0));
27352 offset = INTVAL (XEXP (e1, 1));
27353 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27354 HARD_FRAME_POINTER_REGNUM, reg,
27355 offset);
27356 }
27357 else if (REG_P (e1))
27358 {
27359 reg = REGNO (e1);
27360 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27361 HARD_FRAME_POINTER_REGNUM, reg);
27362 }
27363 else
27364 abort ();
27365 }
27366 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27367 {
27368 /* Move from sp to reg. */
27369 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27370 }
27371 else if (GET_CODE (e1) == PLUS
27372 && REG_P (XEXP (e1, 0))
27373 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27374 && CONST_INT_P (XEXP (e1, 1)))
27375 {
27376 /* Set reg to offset from sp. */
27377 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27378 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27379 }
27380 else
27381 abort ();
27382 break;
27383
27384 default:
27385 abort ();
27386 }
27387 }
27388
27389
27390 /* Emit unwind directives for the given insn. */
27391
27392 static void
27393 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27394 {
27395 rtx note, pat;
27396 bool handled_one = false;
27397
27398 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27399 return;
27400
27401 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27402 && (TREE_NOTHROW (current_function_decl)
27403 || crtl->all_throwers_are_sibcalls))
27404 return;
27405
27406 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27407 return;
27408
27409 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27410 {
27411 switch (REG_NOTE_KIND (note))
27412 {
27413 case REG_FRAME_RELATED_EXPR:
27414 pat = XEXP (note, 0);
27415 goto found;
27416
27417 case REG_CFA_REGISTER:
27418 pat = XEXP (note, 0);
27419 if (pat == NULL)
27420 {
27421 pat = PATTERN (insn);
27422 if (GET_CODE (pat) == PARALLEL)
27423 pat = XVECEXP (pat, 0, 0);
27424 }
27425
27426 /* Only emitted for IS_STACKALIGN re-alignment. */
27427 {
27428 rtx dest, src;
27429 unsigned reg;
27430
27431 src = SET_SRC (pat);
27432 dest = SET_DEST (pat);
27433
27434 gcc_assert (src == stack_pointer_rtx);
27435 reg = REGNO (dest);
27436 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27437 reg + 0x90, reg);
27438 }
27439 handled_one = true;
27440 break;
27441
27442 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27443 to get correct dwarf information for shrink-wrap. We should not
27444 emit unwind information for it because these are used either for
27445 pretend arguments or notes to adjust sp and restore registers from
27446 stack. */
27447 case REG_CFA_DEF_CFA:
27448 case REG_CFA_ADJUST_CFA:
27449 case REG_CFA_RESTORE:
27450 return;
27451
27452 case REG_CFA_EXPRESSION:
27453 case REG_CFA_OFFSET:
27454 /* ??? Only handling here what we actually emit. */
27455 gcc_unreachable ();
27456
27457 default:
27458 break;
27459 }
27460 }
27461 if (handled_one)
27462 return;
27463 pat = PATTERN (insn);
27464 found:
27465
27466 switch (GET_CODE (pat))
27467 {
27468 case SET:
27469 arm_unwind_emit_set (asm_out_file, pat);
27470 break;
27471
27472 case SEQUENCE:
27473 /* Store multiple. */
27474 arm_unwind_emit_sequence (asm_out_file, pat);
27475 break;
27476
27477 default:
27478 abort();
27479 }
27480 }
27481
27482
27483 /* Output a reference from a function exception table to the type_info
27484 object X. The EABI specifies that the symbol should be relocated by
27485 an R_ARM_TARGET2 relocation. */
27486
27487 static bool
27488 arm_output_ttype (rtx x)
27489 {
27490 fputs ("\t.word\t", asm_out_file);
27491 output_addr_const (asm_out_file, x);
27492 /* Use special relocations for symbol references. */
27493 if (!CONST_INT_P (x))
27494 fputs ("(TARGET2)", asm_out_file);
27495 fputc ('\n', asm_out_file);
27496
27497 return TRUE;
27498 }
27499
27500 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27501
27502 static void
27503 arm_asm_emit_except_personality (rtx personality)
27504 {
27505 fputs ("\t.personality\t", asm_out_file);
27506 output_addr_const (asm_out_file, personality);
27507 fputc ('\n', asm_out_file);
27508 }
27509 #endif /* ARM_UNWIND_INFO */
27510
27511 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27512
27513 static void
27514 arm_asm_init_sections (void)
27515 {
27516 #if ARM_UNWIND_INFO
27517 exception_section = get_unnamed_section (0, output_section_asm_op,
27518 "\t.handlerdata");
27519 #endif /* ARM_UNWIND_INFO */
27520
27521 #ifdef OBJECT_FORMAT_ELF
27522 if (target_pure_code)
27523 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27524 #endif
27525 }
27526
27527 /* Output unwind directives for the start/end of a function. */
27528
27529 void
27530 arm_output_fn_unwind (FILE * f, bool prologue)
27531 {
27532 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27533 return;
27534
27535 if (prologue)
27536 fputs ("\t.fnstart\n", f);
27537 else
27538 {
27539 /* If this function will never be unwound, then mark it as such.
27540 The came condition is used in arm_unwind_emit to suppress
27541 the frame annotations. */
27542 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27543 && (TREE_NOTHROW (current_function_decl)
27544 || crtl->all_throwers_are_sibcalls))
27545 fputs("\t.cantunwind\n", f);
27546
27547 fputs ("\t.fnend\n", f);
27548 }
27549 }
27550
27551 static bool
27552 arm_emit_tls_decoration (FILE *fp, rtx x)
27553 {
27554 enum tls_reloc reloc;
27555 rtx val;
27556
27557 val = XVECEXP (x, 0, 0);
27558 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27559
27560 output_addr_const (fp, val);
27561
27562 switch (reloc)
27563 {
27564 case TLS_GD32:
27565 fputs ("(tlsgd)", fp);
27566 break;
27567 case TLS_LDM32:
27568 fputs ("(tlsldm)", fp);
27569 break;
27570 case TLS_LDO32:
27571 fputs ("(tlsldo)", fp);
27572 break;
27573 case TLS_IE32:
27574 fputs ("(gottpoff)", fp);
27575 break;
27576 case TLS_LE32:
27577 fputs ("(tpoff)", fp);
27578 break;
27579 case TLS_DESCSEQ:
27580 fputs ("(tlsdesc)", fp);
27581 break;
27582 default:
27583 gcc_unreachable ();
27584 }
27585
27586 switch (reloc)
27587 {
27588 case TLS_GD32:
27589 case TLS_LDM32:
27590 case TLS_IE32:
27591 case TLS_DESCSEQ:
27592 fputs (" + (. - ", fp);
27593 output_addr_const (fp, XVECEXP (x, 0, 2));
27594 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27595 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27596 output_addr_const (fp, XVECEXP (x, 0, 3));
27597 fputc (')', fp);
27598 break;
27599 default:
27600 break;
27601 }
27602
27603 return TRUE;
27604 }
27605
27606 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27607
27608 static void
27609 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27610 {
27611 gcc_assert (size == 4);
27612 fputs ("\t.word\t", file);
27613 output_addr_const (file, x);
27614 fputs ("(tlsldo)", file);
27615 }
27616
27617 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27618
27619 static bool
27620 arm_output_addr_const_extra (FILE *fp, rtx x)
27621 {
27622 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27623 return arm_emit_tls_decoration (fp, x);
27624 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27625 {
27626 char label[256];
27627 int labelno = INTVAL (XVECEXP (x, 0, 0));
27628
27629 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27630 assemble_name_raw (fp, label);
27631
27632 return TRUE;
27633 }
27634 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27635 {
27636 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27637 if (GOT_PCREL)
27638 fputs ("+.", fp);
27639 fputs ("-(", fp);
27640 output_addr_const (fp, XVECEXP (x, 0, 0));
27641 fputc (')', fp);
27642 return TRUE;
27643 }
27644 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27645 {
27646 output_addr_const (fp, XVECEXP (x, 0, 0));
27647 if (GOT_PCREL)
27648 fputs ("+.", fp);
27649 fputs ("-(", fp);
27650 output_addr_const (fp, XVECEXP (x, 0, 1));
27651 fputc (')', fp);
27652 return TRUE;
27653 }
27654 else if (GET_CODE (x) == CONST_VECTOR)
27655 return arm_emit_vector_const (fp, x);
27656
27657 return FALSE;
27658 }
27659
27660 /* Output assembly for a shift instruction.
27661 SET_FLAGS determines how the instruction modifies the condition codes.
27662 0 - Do not set condition codes.
27663 1 - Set condition codes.
27664 2 - Use smallest instruction. */
27665 const char *
27666 arm_output_shift(rtx * operands, int set_flags)
27667 {
27668 char pattern[100];
27669 static const char flag_chars[3] = {'?', '.', '!'};
27670 const char *shift;
27671 HOST_WIDE_INT val;
27672 char c;
27673
27674 c = flag_chars[set_flags];
27675 shift = shift_op(operands[3], &val);
27676 if (shift)
27677 {
27678 if (val != -1)
27679 operands[2] = GEN_INT(val);
27680 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27681 }
27682 else
27683 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27684
27685 output_asm_insn (pattern, operands);
27686 return "";
27687 }
27688
27689 /* Output assembly for a WMMX immediate shift instruction. */
27690 const char *
27691 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27692 {
27693 int shift = INTVAL (operands[2]);
27694 char templ[50];
27695 machine_mode opmode = GET_MODE (operands[0]);
27696
27697 gcc_assert (shift >= 0);
27698
27699 /* If the shift value in the register versions is > 63 (for D qualifier),
27700 31 (for W qualifier) or 15 (for H qualifier). */
27701 if (((opmode == V4HImode) && (shift > 15))
27702 || ((opmode == V2SImode) && (shift > 31))
27703 || ((opmode == DImode) && (shift > 63)))
27704 {
27705 if (wror_or_wsra)
27706 {
27707 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27708 output_asm_insn (templ, operands);
27709 if (opmode == DImode)
27710 {
27711 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27712 output_asm_insn (templ, operands);
27713 }
27714 }
27715 else
27716 {
27717 /* The destination register will contain all zeros. */
27718 sprintf (templ, "wzero\t%%0");
27719 output_asm_insn (templ, operands);
27720 }
27721 return "";
27722 }
27723
27724 if ((opmode == DImode) && (shift > 32))
27725 {
27726 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27727 output_asm_insn (templ, operands);
27728 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27729 output_asm_insn (templ, operands);
27730 }
27731 else
27732 {
27733 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27734 output_asm_insn (templ, operands);
27735 }
27736 return "";
27737 }
27738
27739 /* Output assembly for a WMMX tinsr instruction. */
27740 const char *
27741 arm_output_iwmmxt_tinsr (rtx *operands)
27742 {
27743 int mask = INTVAL (operands[3]);
27744 int i;
27745 char templ[50];
27746 int units = mode_nunits[GET_MODE (operands[0])];
27747 gcc_assert ((mask & (mask - 1)) == 0);
27748 for (i = 0; i < units; ++i)
27749 {
27750 if ((mask & 0x01) == 1)
27751 {
27752 break;
27753 }
27754 mask >>= 1;
27755 }
27756 gcc_assert (i < units);
27757 {
27758 switch (GET_MODE (operands[0]))
27759 {
27760 case E_V8QImode:
27761 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27762 break;
27763 case E_V4HImode:
27764 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27765 break;
27766 case E_V2SImode:
27767 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27768 break;
27769 default:
27770 gcc_unreachable ();
27771 break;
27772 }
27773 output_asm_insn (templ, operands);
27774 }
27775 return "";
27776 }
27777
27778 /* Output a Thumb-1 casesi dispatch sequence. */
27779 const char *
27780 thumb1_output_casesi (rtx *operands)
27781 {
27782 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27783
27784 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27785
27786 switch (GET_MODE(diff_vec))
27787 {
27788 case E_QImode:
27789 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27790 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27791 case E_HImode:
27792 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27793 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27794 case E_SImode:
27795 return "bl\t%___gnu_thumb1_case_si";
27796 default:
27797 gcc_unreachable ();
27798 }
27799 }
27800
27801 /* Output a Thumb-2 casesi instruction. */
27802 const char *
27803 thumb2_output_casesi (rtx *operands)
27804 {
27805 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27806
27807 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27808
27809 output_asm_insn ("cmp\t%0, %1", operands);
27810 output_asm_insn ("bhi\t%l3", operands);
27811 switch (GET_MODE(diff_vec))
27812 {
27813 case E_QImode:
27814 return "tbb\t[%|pc, %0]";
27815 case E_HImode:
27816 return "tbh\t[%|pc, %0, lsl #1]";
27817 case E_SImode:
27818 if (flag_pic)
27819 {
27820 output_asm_insn ("adr\t%4, %l2", operands);
27821 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27822 output_asm_insn ("add\t%4, %4, %5", operands);
27823 return "bx\t%4";
27824 }
27825 else
27826 {
27827 output_asm_insn ("adr\t%4, %l2", operands);
27828 return "ldr\t%|pc, [%4, %0, lsl #2]";
27829 }
27830 default:
27831 gcc_unreachable ();
27832 }
27833 }
27834
27835 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27836 per-core tuning structs. */
27837 static int
27838 arm_issue_rate (void)
27839 {
27840 return current_tune->issue_rate;
27841 }
27842
27843 /* Return how many instructions should scheduler lookahead to choose the
27844 best one. */
27845 static int
27846 arm_first_cycle_multipass_dfa_lookahead (void)
27847 {
27848 int issue_rate = arm_issue_rate ();
27849
27850 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27851 }
27852
27853 /* Enable modeling of L2 auto-prefetcher. */
27854 static int
27855 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27856 {
27857 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27858 }
27859
27860 const char *
27861 arm_mangle_type (const_tree type)
27862 {
27863 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27864 has to be managled as if it is in the "std" namespace. */
27865 if (TARGET_AAPCS_BASED
27866 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27867 return "St9__va_list";
27868
27869 /* Half-precision float. */
27870 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27871 return "Dh";
27872
27873 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27874 builtin type. */
27875 if (TYPE_NAME (type) != NULL)
27876 return arm_mangle_builtin_type (type);
27877
27878 /* Use the default mangling. */
27879 return NULL;
27880 }
27881
27882 /* Order of allocation of core registers for Thumb: this allocation is
27883 written over the corresponding initial entries of the array
27884 initialized with REG_ALLOC_ORDER. We allocate all low registers
27885 first. Saving and restoring a low register is usually cheaper than
27886 using a call-clobbered high register. */
27887
27888 static const int thumb_core_reg_alloc_order[] =
27889 {
27890 3, 2, 1, 0, 4, 5, 6, 7,
27891 12, 14, 8, 9, 10, 11
27892 };
27893
27894 /* Adjust register allocation order when compiling for Thumb. */
27895
27896 void
27897 arm_order_regs_for_local_alloc (void)
27898 {
27899 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27900 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27901 if (TARGET_THUMB)
27902 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27903 sizeof (thumb_core_reg_alloc_order));
27904 }
27905
27906 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27907
27908 bool
27909 arm_frame_pointer_required (void)
27910 {
27911 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27912 return true;
27913
27914 /* If the function receives nonlocal gotos, it needs to save the frame
27915 pointer in the nonlocal_goto_save_area object. */
27916 if (cfun->has_nonlocal_label)
27917 return true;
27918
27919 /* The frame pointer is required for non-leaf APCS frames. */
27920 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27921 return true;
27922
27923 /* If we are probing the stack in the prologue, we will have a faulting
27924 instruction prior to the stack adjustment and this requires a frame
27925 pointer if we want to catch the exception using the EABI unwinder. */
27926 if (!IS_INTERRUPT (arm_current_func_type ())
27927 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27928 || flag_stack_clash_protection)
27929 && arm_except_unwind_info (&global_options) == UI_TARGET
27930 && cfun->can_throw_non_call_exceptions)
27931 {
27932 HOST_WIDE_INT size = get_frame_size ();
27933
27934 /* That's irrelevant if there is no stack adjustment. */
27935 if (size <= 0)
27936 return false;
27937
27938 /* That's relevant only if there is a stack probe. */
27939 if (crtl->is_leaf && !cfun->calls_alloca)
27940 {
27941 /* We don't have the final size of the frame so adjust. */
27942 size += 32 * UNITS_PER_WORD;
27943 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
27944 return true;
27945 }
27946 else
27947 return true;
27948 }
27949
27950 return false;
27951 }
27952
27953 /* Only thumb1 can't support conditional execution, so return true if
27954 the target is not thumb1. */
27955 static bool
27956 arm_have_conditional_execution (void)
27957 {
27958 return !TARGET_THUMB1;
27959 }
27960
27961 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27962 static HOST_WIDE_INT
27963 arm_vector_alignment (const_tree type)
27964 {
27965 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27966
27967 if (TARGET_AAPCS_BASED)
27968 align = MIN (align, 64);
27969
27970 return align;
27971 }
27972
27973 static unsigned int
27974 arm_autovectorize_vector_sizes (void)
27975 {
27976 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27977 }
27978
27979 static bool
27980 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27981 {
27982 /* Vectors which aren't in packed structures will not be less aligned than
27983 the natural alignment of their element type, so this is safe. */
27984 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27985 return !is_packed;
27986
27987 return default_builtin_vector_alignment_reachable (type, is_packed);
27988 }
27989
27990 static bool
27991 arm_builtin_support_vector_misalignment (machine_mode mode,
27992 const_tree type, int misalignment,
27993 bool is_packed)
27994 {
27995 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27996 {
27997 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27998
27999 if (is_packed)
28000 return align == 1;
28001
28002 /* If the misalignment is unknown, we should be able to handle the access
28003 so long as it is not to a member of a packed data structure. */
28004 if (misalignment == -1)
28005 return true;
28006
28007 /* Return true if the misalignment is a multiple of the natural alignment
28008 of the vector's element type. This is probably always going to be
28009 true in practice, since we've already established that this isn't a
28010 packed access. */
28011 return ((misalignment % align) == 0);
28012 }
28013
28014 return default_builtin_support_vector_misalignment (mode, type, misalignment,
28015 is_packed);
28016 }
28017
28018 static void
28019 arm_conditional_register_usage (void)
28020 {
28021 int regno;
28022
28023 if (TARGET_THUMB1 && optimize_size)
28024 {
28025 /* When optimizing for size on Thumb-1, it's better not
28026 to use the HI regs, because of the overhead of
28027 stacking them. */
28028 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28029 fixed_regs[regno] = call_used_regs[regno] = 1;
28030 }
28031
28032 /* The link register can be clobbered by any branch insn,
28033 but we have no way to track that at present, so mark
28034 it as unavailable. */
28035 if (TARGET_THUMB1)
28036 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28037
28038 if (TARGET_32BIT && TARGET_HARD_FLOAT)
28039 {
28040 /* VFPv3 registers are disabled when earlier VFP
28041 versions are selected due to the definition of
28042 LAST_VFP_REGNUM. */
28043 for (regno = FIRST_VFP_REGNUM;
28044 regno <= LAST_VFP_REGNUM; ++ regno)
28045 {
28046 fixed_regs[regno] = 0;
28047 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28048 || regno >= FIRST_VFP_REGNUM + 32;
28049 }
28050 }
28051
28052 if (TARGET_REALLY_IWMMXT)
28053 {
28054 regno = FIRST_IWMMXT_GR_REGNUM;
28055 /* The 2002/10/09 revision of the XScale ABI has wCG0
28056 and wCG1 as call-preserved registers. The 2002/11/21
28057 revision changed this so that all wCG registers are
28058 scratch registers. */
28059 for (regno = FIRST_IWMMXT_GR_REGNUM;
28060 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28061 fixed_regs[regno] = 0;
28062 /* The XScale ABI has wR0 - wR9 as scratch registers,
28063 the rest as call-preserved registers. */
28064 for (regno = FIRST_IWMMXT_REGNUM;
28065 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28066 {
28067 fixed_regs[regno] = 0;
28068 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28069 }
28070 }
28071
28072 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28073 {
28074 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28075 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28076 }
28077 else if (TARGET_APCS_STACK)
28078 {
28079 fixed_regs[10] = 1;
28080 call_used_regs[10] = 1;
28081 }
28082 /* -mcaller-super-interworking reserves r11 for calls to
28083 _interwork_r11_call_via_rN(). Making the register global
28084 is an easy way of ensuring that it remains valid for all
28085 calls. */
28086 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28087 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28088 {
28089 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28090 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28091 if (TARGET_CALLER_INTERWORKING)
28092 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28093 }
28094 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28095 }
28096
28097 static reg_class_t
28098 arm_preferred_rename_class (reg_class_t rclass)
28099 {
28100 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28101 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28102 and code size can be reduced. */
28103 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28104 return LO_REGS;
28105 else
28106 return NO_REGS;
28107 }
28108
28109 /* Compute the attribute "length" of insn "*push_multi".
28110 So this function MUST be kept in sync with that insn pattern. */
28111 int
28112 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28113 {
28114 int i, regno, hi_reg;
28115 int num_saves = XVECLEN (parallel_op, 0);
28116
28117 /* ARM mode. */
28118 if (TARGET_ARM)
28119 return 4;
28120 /* Thumb1 mode. */
28121 if (TARGET_THUMB1)
28122 return 2;
28123
28124 /* Thumb2 mode. */
28125 regno = REGNO (first_op);
28126 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28127 list is 8-bit. Normally this means all registers in the list must be
28128 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28129 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28130 with 16-bit encoding. */
28131 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28132 for (i = 1; i < num_saves && !hi_reg; i++)
28133 {
28134 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28135 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28136 }
28137
28138 if (!hi_reg)
28139 return 2;
28140 return 4;
28141 }
28142
28143 /* Compute the attribute "length" of insn. Currently, this function is used
28144 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28145 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28146 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28147 true if OPERANDS contains insn which explicit updates base register. */
28148
28149 int
28150 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28151 {
28152 /* ARM mode. */
28153 if (TARGET_ARM)
28154 return 4;
28155 /* Thumb1 mode. */
28156 if (TARGET_THUMB1)
28157 return 2;
28158
28159 rtx parallel_op = operands[0];
28160 /* Initialize to elements number of PARALLEL. */
28161 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28162 /* Initialize the value to base register. */
28163 unsigned regno = REGNO (operands[1]);
28164 /* Skip return and write back pattern.
28165 We only need register pop pattern for later analysis. */
28166 unsigned first_indx = 0;
28167 first_indx += return_pc ? 1 : 0;
28168 first_indx += write_back_p ? 1 : 0;
28169
28170 /* A pop operation can be done through LDM or POP. If the base register is SP
28171 and if it's with write back, then a LDM will be alias of POP. */
28172 bool pop_p = (regno == SP_REGNUM && write_back_p);
28173 bool ldm_p = !pop_p;
28174
28175 /* Check base register for LDM. */
28176 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28177 return 4;
28178
28179 /* Check each register in the list. */
28180 for (; indx >= first_indx; indx--)
28181 {
28182 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28183 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28184 comment in arm_attr_length_push_multi. */
28185 if (REGNO_REG_CLASS (regno) == HI_REGS
28186 && (regno != PC_REGNUM || ldm_p))
28187 return 4;
28188 }
28189
28190 return 2;
28191 }
28192
28193 /* Compute the number of instructions emitted by output_move_double. */
28194 int
28195 arm_count_output_move_double_insns (rtx *operands)
28196 {
28197 int count;
28198 rtx ops[2];
28199 /* output_move_double may modify the operands array, so call it
28200 here on a copy of the array. */
28201 ops[0] = operands[0];
28202 ops[1] = operands[1];
28203 output_move_double (ops, false, &count);
28204 return count;
28205 }
28206
28207 int
28208 vfp3_const_double_for_fract_bits (rtx operand)
28209 {
28210 REAL_VALUE_TYPE r0;
28211
28212 if (!CONST_DOUBLE_P (operand))
28213 return 0;
28214
28215 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28216 if (exact_real_inverse (DFmode, &r0)
28217 && !REAL_VALUE_NEGATIVE (r0))
28218 {
28219 if (exact_real_truncate (DFmode, &r0))
28220 {
28221 HOST_WIDE_INT value = real_to_integer (&r0);
28222 value = value & 0xffffffff;
28223 if ((value != 0) && ( (value & (value - 1)) == 0))
28224 {
28225 int ret = exact_log2 (value);
28226 gcc_assert (IN_RANGE (ret, 0, 31));
28227 return ret;
28228 }
28229 }
28230 }
28231 return 0;
28232 }
28233
28234 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28235 log2 is in [1, 32], return that log2. Otherwise return -1.
28236 This is used in the patterns for vcvt.s32.f32 floating-point to
28237 fixed-point conversions. */
28238
28239 int
28240 vfp3_const_double_for_bits (rtx x)
28241 {
28242 const REAL_VALUE_TYPE *r;
28243
28244 if (!CONST_DOUBLE_P (x))
28245 return -1;
28246
28247 r = CONST_DOUBLE_REAL_VALUE (x);
28248
28249 if (REAL_VALUE_NEGATIVE (*r)
28250 || REAL_VALUE_ISNAN (*r)
28251 || REAL_VALUE_ISINF (*r)
28252 || !real_isinteger (r, SFmode))
28253 return -1;
28254
28255 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28256
28257 /* The exact_log2 above will have returned -1 if this is
28258 not an exact log2. */
28259 if (!IN_RANGE (hwint, 1, 32))
28260 return -1;
28261
28262 return hwint;
28263 }
28264
28265 \f
28266 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28267
28268 static void
28269 arm_pre_atomic_barrier (enum memmodel model)
28270 {
28271 if (need_atomic_barrier_p (model, true))
28272 emit_insn (gen_memory_barrier ());
28273 }
28274
28275 static void
28276 arm_post_atomic_barrier (enum memmodel model)
28277 {
28278 if (need_atomic_barrier_p (model, false))
28279 emit_insn (gen_memory_barrier ());
28280 }
28281
28282 /* Emit the load-exclusive and store-exclusive instructions.
28283 Use acquire and release versions if necessary. */
28284
28285 static void
28286 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28287 {
28288 rtx (*gen) (rtx, rtx);
28289
28290 if (acq)
28291 {
28292 switch (mode)
28293 {
28294 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28295 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28296 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28297 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28298 default:
28299 gcc_unreachable ();
28300 }
28301 }
28302 else
28303 {
28304 switch (mode)
28305 {
28306 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28307 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28308 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28309 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28310 default:
28311 gcc_unreachable ();
28312 }
28313 }
28314
28315 emit_insn (gen (rval, mem));
28316 }
28317
28318 static void
28319 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28320 rtx mem, bool rel)
28321 {
28322 rtx (*gen) (rtx, rtx, rtx);
28323
28324 if (rel)
28325 {
28326 switch (mode)
28327 {
28328 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28329 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28330 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28331 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28332 default:
28333 gcc_unreachable ();
28334 }
28335 }
28336 else
28337 {
28338 switch (mode)
28339 {
28340 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28341 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28342 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28343 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28344 default:
28345 gcc_unreachable ();
28346 }
28347 }
28348
28349 emit_insn (gen (bval, rval, mem));
28350 }
28351
28352 /* Mark the previous jump instruction as unlikely. */
28353
28354 static void
28355 emit_unlikely_jump (rtx insn)
28356 {
28357 rtx_insn *jump = emit_jump_insn (insn);
28358 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28359 }
28360
28361 /* Expand a compare and swap pattern. */
28362
28363 void
28364 arm_expand_compare_and_swap (rtx operands[])
28365 {
28366 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28367 machine_mode mode;
28368 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28369
28370 bval = operands[0];
28371 rval = operands[1];
28372 mem = operands[2];
28373 oldval = operands[3];
28374 newval = operands[4];
28375 is_weak = operands[5];
28376 mod_s = operands[6];
28377 mod_f = operands[7];
28378 mode = GET_MODE (mem);
28379
28380 /* Normally the succ memory model must be stronger than fail, but in the
28381 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28382 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28383
28384 if (TARGET_HAVE_LDACQ
28385 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28386 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28387 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28388
28389 switch (mode)
28390 {
28391 case E_QImode:
28392 case E_HImode:
28393 /* For narrow modes, we're going to perform the comparison in SImode,
28394 so do the zero-extension now. */
28395 rval = gen_reg_rtx (SImode);
28396 oldval = convert_modes (SImode, mode, oldval, true);
28397 /* FALLTHRU */
28398
28399 case E_SImode:
28400 /* Force the value into a register if needed. We waited until after
28401 the zero-extension above to do this properly. */
28402 if (!arm_add_operand (oldval, SImode))
28403 oldval = force_reg (SImode, oldval);
28404 break;
28405
28406 case E_DImode:
28407 if (!cmpdi_operand (oldval, mode))
28408 oldval = force_reg (mode, oldval);
28409 break;
28410
28411 default:
28412 gcc_unreachable ();
28413 }
28414
28415 if (TARGET_THUMB1)
28416 {
28417 switch (mode)
28418 {
28419 case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28420 case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28421 case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28422 case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28423 default:
28424 gcc_unreachable ();
28425 }
28426 }
28427 else
28428 {
28429 switch (mode)
28430 {
28431 case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28432 case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28433 case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28434 case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28435 default:
28436 gcc_unreachable ();
28437 }
28438 }
28439
28440 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28441 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28442
28443 if (mode == QImode || mode == HImode)
28444 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28445
28446 /* In all cases, we arrange for success to be signaled by Z set.
28447 This arrangement allows for the boolean result to be used directly
28448 in a subsequent branch, post optimization. For Thumb-1 targets, the
28449 boolean negation of the result is also stored in bval because Thumb-1
28450 backend lacks dependency tracking for CC flag due to flag-setting not
28451 being represented at RTL level. */
28452 if (TARGET_THUMB1)
28453 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28454 else
28455 {
28456 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28457 emit_insn (gen_rtx_SET (bval, x));
28458 }
28459 }
28460
28461 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28462 another memory store between the load-exclusive and store-exclusive can
28463 reset the monitor from Exclusive to Open state. This means we must wait
28464 until after reload to split the pattern, lest we get a register spill in
28465 the middle of the atomic sequence. Success of the compare and swap is
28466 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28467 for Thumb-1 targets (ie. negation of the boolean value returned by
28468 atomic_compare_and_swapmode standard pattern in operand 0). */
28469
28470 void
28471 arm_split_compare_and_swap (rtx operands[])
28472 {
28473 rtx rval, mem, oldval, newval, neg_bval;
28474 machine_mode mode;
28475 enum memmodel mod_s, mod_f;
28476 bool is_weak;
28477 rtx_code_label *label1, *label2;
28478 rtx x, cond;
28479
28480 rval = operands[1];
28481 mem = operands[2];
28482 oldval = operands[3];
28483 newval = operands[4];
28484 is_weak = (operands[5] != const0_rtx);
28485 mod_s = memmodel_from_int (INTVAL (operands[6]));
28486 mod_f = memmodel_from_int (INTVAL (operands[7]));
28487 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28488 mode = GET_MODE (mem);
28489
28490 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28491
28492 bool use_acquire = TARGET_HAVE_LDACQ
28493 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28494 || is_mm_release (mod_s));
28495
28496 bool use_release = TARGET_HAVE_LDACQ
28497 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28498 || is_mm_acquire (mod_s));
28499
28500 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28501 a full barrier is emitted after the store-release. */
28502 if (is_armv8_sync)
28503 use_acquire = false;
28504
28505 /* Checks whether a barrier is needed and emits one accordingly. */
28506 if (!(use_acquire || use_release))
28507 arm_pre_atomic_barrier (mod_s);
28508
28509 label1 = NULL;
28510 if (!is_weak)
28511 {
28512 label1 = gen_label_rtx ();
28513 emit_label (label1);
28514 }
28515 label2 = gen_label_rtx ();
28516
28517 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28518
28519 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28520 as required to communicate with arm_expand_compare_and_swap. */
28521 if (TARGET_32BIT)
28522 {
28523 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28524 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28525 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28526 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28527 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28528 }
28529 else
28530 {
28531 emit_move_insn (neg_bval, const1_rtx);
28532 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28533 if (thumb1_cmpneg_operand (oldval, SImode))
28534 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28535 label2, cond));
28536 else
28537 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28538 }
28539
28540 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28541
28542 /* Weak or strong, we want EQ to be true for success, so that we
28543 match the flags that we got from the compare above. */
28544 if (TARGET_32BIT)
28545 {
28546 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28547 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28548 emit_insn (gen_rtx_SET (cond, x));
28549 }
28550
28551 if (!is_weak)
28552 {
28553 /* Z is set to boolean value of !neg_bval, as required to communicate
28554 with arm_expand_compare_and_swap. */
28555 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28556 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28557 }
28558
28559 if (!is_mm_relaxed (mod_f))
28560 emit_label (label2);
28561
28562 /* Checks whether a barrier is needed and emits one accordingly. */
28563 if (is_armv8_sync
28564 || !(use_acquire || use_release))
28565 arm_post_atomic_barrier (mod_s);
28566
28567 if (is_mm_relaxed (mod_f))
28568 emit_label (label2);
28569 }
28570
28571 /* Split an atomic operation pattern. Operation is given by CODE and is one
28572 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28573 operation). Operation is performed on the content at MEM and on VALUE
28574 following the memory model MODEL_RTX. The content at MEM before and after
28575 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28576 success of the operation is returned in COND. Using a scratch register or
28577 an operand register for these determines what result is returned for that
28578 pattern. */
28579
28580 void
28581 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28582 rtx value, rtx model_rtx, rtx cond)
28583 {
28584 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28585 machine_mode mode = GET_MODE (mem);
28586 machine_mode wmode = (mode == DImode ? DImode : SImode);
28587 rtx_code_label *label;
28588 bool all_low_regs, bind_old_new;
28589 rtx x;
28590
28591 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28592
28593 bool use_acquire = TARGET_HAVE_LDACQ
28594 && !(is_mm_relaxed (model) || is_mm_consume (model)
28595 || is_mm_release (model));
28596
28597 bool use_release = TARGET_HAVE_LDACQ
28598 && !(is_mm_relaxed (model) || is_mm_consume (model)
28599 || is_mm_acquire (model));
28600
28601 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28602 a full barrier is emitted after the store-release. */
28603 if (is_armv8_sync)
28604 use_acquire = false;
28605
28606 /* Checks whether a barrier is needed and emits one accordingly. */
28607 if (!(use_acquire || use_release))
28608 arm_pre_atomic_barrier (model);
28609
28610 label = gen_label_rtx ();
28611 emit_label (label);
28612
28613 if (new_out)
28614 new_out = gen_lowpart (wmode, new_out);
28615 if (old_out)
28616 old_out = gen_lowpart (wmode, old_out);
28617 else
28618 old_out = new_out;
28619 value = simplify_gen_subreg (wmode, value, mode, 0);
28620
28621 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28622
28623 /* Does the operation require destination and first operand to use the same
28624 register? This is decided by register constraints of relevant insn
28625 patterns in thumb1.md. */
28626 gcc_assert (!new_out || REG_P (new_out));
28627 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28628 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28629 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28630 bind_old_new =
28631 (TARGET_THUMB1
28632 && code != SET
28633 && code != MINUS
28634 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28635
28636 /* We want to return the old value while putting the result of the operation
28637 in the same register as the old value so copy the old value over to the
28638 destination register and use that register for the operation. */
28639 if (old_out && bind_old_new)
28640 {
28641 emit_move_insn (new_out, old_out);
28642 old_out = new_out;
28643 }
28644
28645 switch (code)
28646 {
28647 case SET:
28648 new_out = value;
28649 break;
28650
28651 case NOT:
28652 x = gen_rtx_AND (wmode, old_out, value);
28653 emit_insn (gen_rtx_SET (new_out, x));
28654 x = gen_rtx_NOT (wmode, new_out);
28655 emit_insn (gen_rtx_SET (new_out, x));
28656 break;
28657
28658 case MINUS:
28659 if (CONST_INT_P (value))
28660 {
28661 value = GEN_INT (-INTVAL (value));
28662 code = PLUS;
28663 }
28664 /* FALLTHRU */
28665
28666 case PLUS:
28667 if (mode == DImode)
28668 {
28669 /* DImode plus/minus need to clobber flags. */
28670 /* The adddi3 and subdi3 patterns are incorrectly written so that
28671 they require matching operands, even when we could easily support
28672 three operands. Thankfully, this can be fixed up post-splitting,
28673 as the individual add+adc patterns do accept three operands and
28674 post-reload cprop can make these moves go away. */
28675 emit_move_insn (new_out, old_out);
28676 if (code == PLUS)
28677 x = gen_adddi3 (new_out, new_out, value);
28678 else
28679 x = gen_subdi3 (new_out, new_out, value);
28680 emit_insn (x);
28681 break;
28682 }
28683 /* FALLTHRU */
28684
28685 default:
28686 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28687 emit_insn (gen_rtx_SET (new_out, x));
28688 break;
28689 }
28690
28691 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28692 use_release);
28693
28694 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28695 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28696
28697 /* Checks whether a barrier is needed and emits one accordingly. */
28698 if (is_armv8_sync
28699 || !(use_acquire || use_release))
28700 arm_post_atomic_barrier (model);
28701 }
28702 \f
28703 #define MAX_VECT_LEN 16
28704
28705 struct expand_vec_perm_d
28706 {
28707 rtx target, op0, op1;
28708 auto_vec_perm_indices perm;
28709 machine_mode vmode;
28710 bool one_vector_p;
28711 bool testing_p;
28712 };
28713
28714 /* Generate a variable permutation. */
28715
28716 static void
28717 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28718 {
28719 machine_mode vmode = GET_MODE (target);
28720 bool one_vector_p = rtx_equal_p (op0, op1);
28721
28722 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28723 gcc_checking_assert (GET_MODE (op0) == vmode);
28724 gcc_checking_assert (GET_MODE (op1) == vmode);
28725 gcc_checking_assert (GET_MODE (sel) == vmode);
28726 gcc_checking_assert (TARGET_NEON);
28727
28728 if (one_vector_p)
28729 {
28730 if (vmode == V8QImode)
28731 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28732 else
28733 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28734 }
28735 else
28736 {
28737 rtx pair;
28738
28739 if (vmode == V8QImode)
28740 {
28741 pair = gen_reg_rtx (V16QImode);
28742 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28743 pair = gen_lowpart (TImode, pair);
28744 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28745 }
28746 else
28747 {
28748 pair = gen_reg_rtx (OImode);
28749 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28750 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28751 }
28752 }
28753 }
28754
28755 void
28756 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28757 {
28758 machine_mode vmode = GET_MODE (target);
28759 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28760 bool one_vector_p = rtx_equal_p (op0, op1);
28761 rtx rmask[MAX_VECT_LEN], mask;
28762
28763 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28764 numbering of elements for big-endian, we must reverse the order. */
28765 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28766
28767 /* The VTBL instruction does not use a modulo index, so we must take care
28768 of that ourselves. */
28769 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28770 for (i = 0; i < nelt; ++i)
28771 rmask[i] = mask;
28772 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28773 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28774
28775 arm_expand_vec_perm_1 (target, op0, op1, sel);
28776 }
28777
28778 /* Map lane ordering between architectural lane order, and GCC lane order,
28779 taking into account ABI. See comment above output_move_neon for details. */
28780
28781 static int
28782 neon_endian_lane_map (machine_mode mode, int lane)
28783 {
28784 if (BYTES_BIG_ENDIAN)
28785 {
28786 int nelems = GET_MODE_NUNITS (mode);
28787 /* Reverse lane order. */
28788 lane = (nelems - 1 - lane);
28789 /* Reverse D register order, to match ABI. */
28790 if (GET_MODE_SIZE (mode) == 16)
28791 lane = lane ^ (nelems / 2);
28792 }
28793 return lane;
28794 }
28795
28796 /* Some permutations index into pairs of vectors, this is a helper function
28797 to map indexes into those pairs of vectors. */
28798
28799 static int
28800 neon_pair_endian_lane_map (machine_mode mode, int lane)
28801 {
28802 int nelem = GET_MODE_NUNITS (mode);
28803 if (BYTES_BIG_ENDIAN)
28804 lane =
28805 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28806 return lane;
28807 }
28808
28809 /* Generate or test for an insn that supports a constant permutation. */
28810
28811 /* Recognize patterns for the VUZP insns. */
28812
28813 static bool
28814 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28815 {
28816 unsigned int i, odd, mask, nelt = d->perm.length ();
28817 rtx out0, out1, in0, in1;
28818 rtx (*gen)(rtx, rtx, rtx, rtx);
28819 int first_elem;
28820 int swap_nelt;
28821
28822 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28823 return false;
28824
28825 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28826 big endian pattern on 64 bit vectors, so we correct for that. */
28827 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28828 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28829
28830 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28831
28832 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28833 odd = 0;
28834 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28835 odd = 1;
28836 else
28837 return false;
28838 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28839
28840 for (i = 0; i < nelt; i++)
28841 {
28842 unsigned elt =
28843 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28844 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28845 return false;
28846 }
28847
28848 /* Success! */
28849 if (d->testing_p)
28850 return true;
28851
28852 switch (d->vmode)
28853 {
28854 case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28855 case E_V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28856 case E_V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28857 case E_V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28858 case E_V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
28859 case E_V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
28860 case E_V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28861 case E_V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28862 case E_V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28863 case E_V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28864 default:
28865 gcc_unreachable ();
28866 }
28867
28868 in0 = d->op0;
28869 in1 = d->op1;
28870 if (swap_nelt != 0)
28871 std::swap (in0, in1);
28872
28873 out0 = d->target;
28874 out1 = gen_reg_rtx (d->vmode);
28875 if (odd)
28876 std::swap (out0, out1);
28877
28878 emit_insn (gen (out0, in0, in1, out1));
28879 return true;
28880 }
28881
28882 /* Recognize patterns for the VZIP insns. */
28883
28884 static bool
28885 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28886 {
28887 unsigned int i, high, mask, nelt = d->perm.length ();
28888 rtx out0, out1, in0, in1;
28889 rtx (*gen)(rtx, rtx, rtx, rtx);
28890 int first_elem;
28891 bool is_swapped;
28892
28893 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28894 return false;
28895
28896 is_swapped = BYTES_BIG_ENDIAN;
28897
28898 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28899
28900 high = nelt / 2;
28901 if (first_elem == neon_endian_lane_map (d->vmode, high))
28902 ;
28903 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28904 high = 0;
28905 else
28906 return false;
28907 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28908
28909 for (i = 0; i < nelt / 2; i++)
28910 {
28911 unsigned elt =
28912 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28913 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28914 != elt)
28915 return false;
28916 elt =
28917 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28918 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28919 != elt)
28920 return false;
28921 }
28922
28923 /* Success! */
28924 if (d->testing_p)
28925 return true;
28926
28927 switch (d->vmode)
28928 {
28929 case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28930 case E_V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28931 case E_V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28932 case E_V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28933 case E_V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
28934 case E_V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
28935 case E_V4SImode: gen = gen_neon_vzipv4si_internal; break;
28936 case E_V2SImode: gen = gen_neon_vzipv2si_internal; break;
28937 case E_V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28938 case E_V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28939 default:
28940 gcc_unreachable ();
28941 }
28942
28943 in0 = d->op0;
28944 in1 = d->op1;
28945 if (is_swapped)
28946 std::swap (in0, in1);
28947
28948 out0 = d->target;
28949 out1 = gen_reg_rtx (d->vmode);
28950 if (high)
28951 std::swap (out0, out1);
28952
28953 emit_insn (gen (out0, in0, in1, out1));
28954 return true;
28955 }
28956
28957 /* Recognize patterns for the VREV insns. */
28958
28959 static bool
28960 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28961 {
28962 unsigned int i, j, diff, nelt = d->perm.length ();
28963 rtx (*gen)(rtx, rtx);
28964
28965 if (!d->one_vector_p)
28966 return false;
28967
28968 diff = d->perm[0];
28969 switch (diff)
28970 {
28971 case 7:
28972 switch (d->vmode)
28973 {
28974 case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
28975 case E_V8QImode: gen = gen_neon_vrev64v8qi; break;
28976 default:
28977 return false;
28978 }
28979 break;
28980 case 3:
28981 switch (d->vmode)
28982 {
28983 case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
28984 case E_V8QImode: gen = gen_neon_vrev32v8qi; break;
28985 case E_V8HImode: gen = gen_neon_vrev64v8hi; break;
28986 case E_V4HImode: gen = gen_neon_vrev64v4hi; break;
28987 case E_V8HFmode: gen = gen_neon_vrev64v8hf; break;
28988 case E_V4HFmode: gen = gen_neon_vrev64v4hf; break;
28989 default:
28990 return false;
28991 }
28992 break;
28993 case 1:
28994 switch (d->vmode)
28995 {
28996 case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
28997 case E_V8QImode: gen = gen_neon_vrev16v8qi; break;
28998 case E_V8HImode: gen = gen_neon_vrev32v8hi; break;
28999 case E_V4HImode: gen = gen_neon_vrev32v4hi; break;
29000 case E_V4SImode: gen = gen_neon_vrev64v4si; break;
29001 case E_V2SImode: gen = gen_neon_vrev64v2si; break;
29002 case E_V4SFmode: gen = gen_neon_vrev64v4sf; break;
29003 case E_V2SFmode: gen = gen_neon_vrev64v2sf; break;
29004 default:
29005 return false;
29006 }
29007 break;
29008 default:
29009 return false;
29010 }
29011
29012 for (i = 0; i < nelt ; i += diff + 1)
29013 for (j = 0; j <= diff; j += 1)
29014 {
29015 /* This is guaranteed to be true as the value of diff
29016 is 7, 3, 1 and we should have enough elements in the
29017 queue to generate this. Getting a vector mask with a
29018 value of diff other than these values implies that
29019 something is wrong by the time we get here. */
29020 gcc_assert (i + j < nelt);
29021 if (d->perm[i + j] != i + diff - j)
29022 return false;
29023 }
29024
29025 /* Success! */
29026 if (d->testing_p)
29027 return true;
29028
29029 emit_insn (gen (d->target, d->op0));
29030 return true;
29031 }
29032
29033 /* Recognize patterns for the VTRN insns. */
29034
29035 static bool
29036 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29037 {
29038 unsigned int i, odd, mask, nelt = d->perm.length ();
29039 rtx out0, out1, in0, in1;
29040 rtx (*gen)(rtx, rtx, rtx, rtx);
29041
29042 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29043 return false;
29044
29045 /* Note that these are little-endian tests. Adjust for big-endian later. */
29046 if (d->perm[0] == 0)
29047 odd = 0;
29048 else if (d->perm[0] == 1)
29049 odd = 1;
29050 else
29051 return false;
29052 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29053
29054 for (i = 0; i < nelt; i += 2)
29055 {
29056 if (d->perm[i] != i + odd)
29057 return false;
29058 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29059 return false;
29060 }
29061
29062 /* Success! */
29063 if (d->testing_p)
29064 return true;
29065
29066 switch (d->vmode)
29067 {
29068 case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29069 case E_V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
29070 case E_V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
29071 case E_V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
29072 case E_V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
29073 case E_V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
29074 case E_V4SImode: gen = gen_neon_vtrnv4si_internal; break;
29075 case E_V2SImode: gen = gen_neon_vtrnv2si_internal; break;
29076 case E_V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
29077 case E_V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
29078 default:
29079 gcc_unreachable ();
29080 }
29081
29082 in0 = d->op0;
29083 in1 = d->op1;
29084 if (BYTES_BIG_ENDIAN)
29085 {
29086 std::swap (in0, in1);
29087 odd = !odd;
29088 }
29089
29090 out0 = d->target;
29091 out1 = gen_reg_rtx (d->vmode);
29092 if (odd)
29093 std::swap (out0, out1);
29094
29095 emit_insn (gen (out0, in0, in1, out1));
29096 return true;
29097 }
29098
29099 /* Recognize patterns for the VEXT insns. */
29100
29101 static bool
29102 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29103 {
29104 unsigned int i, nelt = d->perm.length ();
29105 rtx (*gen) (rtx, rtx, rtx, rtx);
29106 rtx offset;
29107
29108 unsigned int location;
29109
29110 unsigned int next = d->perm[0] + 1;
29111
29112 /* TODO: Handle GCC's numbering of elements for big-endian. */
29113 if (BYTES_BIG_ENDIAN)
29114 return false;
29115
29116 /* Check if the extracted indexes are increasing by one. */
29117 for (i = 1; i < nelt; next++, i++)
29118 {
29119 /* If we hit the most significant element of the 2nd vector in
29120 the previous iteration, no need to test further. */
29121 if (next == 2 * nelt)
29122 return false;
29123
29124 /* If we are operating on only one vector: it could be a
29125 rotation. If there are only two elements of size < 64, let
29126 arm_evpc_neon_vrev catch it. */
29127 if (d->one_vector_p && (next == nelt))
29128 {
29129 if ((nelt == 2) && (d->vmode != V2DImode))
29130 return false;
29131 else
29132 next = 0;
29133 }
29134
29135 if (d->perm[i] != next)
29136 return false;
29137 }
29138
29139 location = d->perm[0];
29140
29141 switch (d->vmode)
29142 {
29143 case E_V16QImode: gen = gen_neon_vextv16qi; break;
29144 case E_V8QImode: gen = gen_neon_vextv8qi; break;
29145 case E_V4HImode: gen = gen_neon_vextv4hi; break;
29146 case E_V8HImode: gen = gen_neon_vextv8hi; break;
29147 case E_V2SImode: gen = gen_neon_vextv2si; break;
29148 case E_V4SImode: gen = gen_neon_vextv4si; break;
29149 case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29150 case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29151 case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29152 case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29153 case E_V2DImode: gen = gen_neon_vextv2di; break;
29154 default:
29155 return false;
29156 }
29157
29158 /* Success! */
29159 if (d->testing_p)
29160 return true;
29161
29162 offset = GEN_INT (location);
29163 emit_insn (gen (d->target, d->op0, d->op1, offset));
29164 return true;
29165 }
29166
29167 /* The NEON VTBL instruction is a fully variable permuation that's even
29168 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29169 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29170 can do slightly better by expanding this as a constant where we don't
29171 have to apply a mask. */
29172
29173 static bool
29174 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29175 {
29176 rtx rperm[MAX_VECT_LEN], sel;
29177 machine_mode vmode = d->vmode;
29178 unsigned int i, nelt = d->perm.length ();
29179
29180 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29181 numbering of elements for big-endian, we must reverse the order. */
29182 if (BYTES_BIG_ENDIAN)
29183 return false;
29184
29185 if (d->testing_p)
29186 return true;
29187
29188 /* Generic code will try constant permutation twice. Once with the
29189 original mode and again with the elements lowered to QImode.
29190 So wait and don't do the selector expansion ourselves. */
29191 if (vmode != V8QImode && vmode != V16QImode)
29192 return false;
29193
29194 for (i = 0; i < nelt; ++i)
29195 rperm[i] = GEN_INT (d->perm[i]);
29196 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29197 sel = force_reg (vmode, sel);
29198
29199 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29200 return true;
29201 }
29202
29203 static bool
29204 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29205 {
29206 /* Check if the input mask matches vext before reordering the
29207 operands. */
29208 if (TARGET_NEON)
29209 if (arm_evpc_neon_vext (d))
29210 return true;
29211
29212 /* The pattern matching functions above are written to look for a small
29213 number to begin the sequence (0, 1, N/2). If we begin with an index
29214 from the second operand, we can swap the operands. */
29215 unsigned int nelt = d->perm.length ();
29216 if (d->perm[0] >= nelt)
29217 {
29218 for (unsigned int i = 0; i < nelt; ++i)
29219 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29220
29221 std::swap (d->op0, d->op1);
29222 }
29223
29224 if (TARGET_NEON)
29225 {
29226 if (arm_evpc_neon_vuzp (d))
29227 return true;
29228 if (arm_evpc_neon_vzip (d))
29229 return true;
29230 if (arm_evpc_neon_vrev (d))
29231 return true;
29232 if (arm_evpc_neon_vtrn (d))
29233 return true;
29234 return arm_evpc_neon_vtbl (d);
29235 }
29236 return false;
29237 }
29238
29239 /* Expand a vec_perm_const pattern. */
29240
29241 bool
29242 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29243 {
29244 struct expand_vec_perm_d d;
29245 int i, nelt, which;
29246
29247 d.target = target;
29248 d.op0 = op0;
29249 d.op1 = op1;
29250
29251 d.vmode = GET_MODE (target);
29252 gcc_assert (VECTOR_MODE_P (d.vmode));
29253 d.testing_p = false;
29254
29255 nelt = GET_MODE_NUNITS (d.vmode);
29256 d.perm.reserve (nelt);
29257 for (i = which = 0; i < nelt; ++i)
29258 {
29259 rtx e = XVECEXP (sel, 0, i);
29260 int ei = INTVAL (e) & (2 * nelt - 1);
29261 which |= (ei < nelt ? 1 : 2);
29262 d.perm.quick_push (ei);
29263 }
29264
29265 switch (which)
29266 {
29267 default:
29268 gcc_unreachable();
29269
29270 case 3:
29271 d.one_vector_p = false;
29272 if (!rtx_equal_p (op0, op1))
29273 break;
29274
29275 /* The elements of PERM do not suggest that only the first operand
29276 is used, but both operands are identical. Allow easier matching
29277 of the permutation by folding the permutation into the single
29278 input vector. */
29279 /* FALLTHRU */
29280 case 2:
29281 for (i = 0; i < nelt; ++i)
29282 d.perm[i] &= nelt - 1;
29283 d.op0 = op1;
29284 d.one_vector_p = true;
29285 break;
29286
29287 case 1:
29288 d.op1 = op0;
29289 d.one_vector_p = true;
29290 break;
29291 }
29292
29293 return arm_expand_vec_perm_const_1 (&d);
29294 }
29295
29296 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29297
29298 static bool
29299 arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
29300 {
29301 struct expand_vec_perm_d d;
29302 unsigned int i, nelt, which;
29303 bool ret;
29304
29305 d.vmode = vmode;
29306 d.testing_p = true;
29307 d.perm.safe_splice (sel);
29308
29309 /* Categorize the set of elements in the selector. */
29310 nelt = GET_MODE_NUNITS (d.vmode);
29311 for (i = which = 0; i < nelt; ++i)
29312 {
29313 unsigned int e = d.perm[i];
29314 gcc_assert (e < 2 * nelt);
29315 which |= (e < nelt ? 1 : 2);
29316 }
29317
29318 /* For all elements from second vector, fold the elements to first. */
29319 if (which == 2)
29320 for (i = 0; i < nelt; ++i)
29321 d.perm[i] -= nelt;
29322
29323 /* Check whether the mask can be applied to the vector type. */
29324 d.one_vector_p = (which != 3);
29325
29326 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29327 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29328 if (!d.one_vector_p)
29329 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29330
29331 start_sequence ();
29332 ret = arm_expand_vec_perm_const_1 (&d);
29333 end_sequence ();
29334
29335 return ret;
29336 }
29337
29338 bool
29339 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29340 {
29341 /* If we are soft float and we do not have ldrd
29342 then all auto increment forms are ok. */
29343 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29344 return true;
29345
29346 switch (code)
29347 {
29348 /* Post increment and Pre Decrement are supported for all
29349 instruction forms except for vector forms. */
29350 case ARM_POST_INC:
29351 case ARM_PRE_DEC:
29352 if (VECTOR_MODE_P (mode))
29353 {
29354 if (code != ARM_PRE_DEC)
29355 return true;
29356 else
29357 return false;
29358 }
29359
29360 return true;
29361
29362 case ARM_POST_DEC:
29363 case ARM_PRE_INC:
29364 /* Without LDRD and mode size greater than
29365 word size, there is no point in auto-incrementing
29366 because ldm and stm will not have these forms. */
29367 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29368 return false;
29369
29370 /* Vector and floating point modes do not support
29371 these auto increment forms. */
29372 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29373 return false;
29374
29375 return true;
29376
29377 default:
29378 return false;
29379
29380 }
29381
29382 return false;
29383 }
29384
29385 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29386 on ARM, since we know that shifts by negative amounts are no-ops.
29387 Additionally, the default expansion code is not available or suitable
29388 for post-reload insn splits (this can occur when the register allocator
29389 chooses not to do a shift in NEON).
29390
29391 This function is used in both initial expand and post-reload splits, and
29392 handles all kinds of 64-bit shifts.
29393
29394 Input requirements:
29395 - It is safe for the input and output to be the same register, but
29396 early-clobber rules apply for the shift amount and scratch registers.
29397 - Shift by register requires both scratch registers. In all other cases
29398 the scratch registers may be NULL.
29399 - Ashiftrt by a register also clobbers the CC register. */
29400 void
29401 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29402 rtx amount, rtx scratch1, rtx scratch2)
29403 {
29404 rtx out_high = gen_highpart (SImode, out);
29405 rtx out_low = gen_lowpart (SImode, out);
29406 rtx in_high = gen_highpart (SImode, in);
29407 rtx in_low = gen_lowpart (SImode, in);
29408
29409 /* Terminology:
29410 in = the register pair containing the input value.
29411 out = the destination register pair.
29412 up = the high- or low-part of each pair.
29413 down = the opposite part to "up".
29414 In a shift, we can consider bits to shift from "up"-stream to
29415 "down"-stream, so in a left-shift "up" is the low-part and "down"
29416 is the high-part of each register pair. */
29417
29418 rtx out_up = code == ASHIFT ? out_low : out_high;
29419 rtx out_down = code == ASHIFT ? out_high : out_low;
29420 rtx in_up = code == ASHIFT ? in_low : in_high;
29421 rtx in_down = code == ASHIFT ? in_high : in_low;
29422
29423 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29424 gcc_assert (out
29425 && (REG_P (out) || GET_CODE (out) == SUBREG)
29426 && GET_MODE (out) == DImode);
29427 gcc_assert (in
29428 && (REG_P (in) || GET_CODE (in) == SUBREG)
29429 && GET_MODE (in) == DImode);
29430 gcc_assert (amount
29431 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29432 && GET_MODE (amount) == SImode)
29433 || CONST_INT_P (amount)));
29434 gcc_assert (scratch1 == NULL
29435 || (GET_CODE (scratch1) == SCRATCH)
29436 || (GET_MODE (scratch1) == SImode
29437 && REG_P (scratch1)));
29438 gcc_assert (scratch2 == NULL
29439 || (GET_CODE (scratch2) == SCRATCH)
29440 || (GET_MODE (scratch2) == SImode
29441 && REG_P (scratch2)));
29442 gcc_assert (!REG_P (out) || !REG_P (amount)
29443 || !HARD_REGISTER_P (out)
29444 || (REGNO (out) != REGNO (amount)
29445 && REGNO (out) + 1 != REGNO (amount)));
29446
29447 /* Macros to make following code more readable. */
29448 #define SUB_32(DEST,SRC) \
29449 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29450 #define RSB_32(DEST,SRC) \
29451 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29452 #define SUB_S_32(DEST,SRC) \
29453 gen_addsi3_compare0 ((DEST), (SRC), \
29454 GEN_INT (-32))
29455 #define SET(DEST,SRC) \
29456 gen_rtx_SET ((DEST), (SRC))
29457 #define SHIFT(CODE,SRC,AMOUNT) \
29458 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29459 #define LSHIFT(CODE,SRC,AMOUNT) \
29460 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29461 SImode, (SRC), (AMOUNT))
29462 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29463 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29464 SImode, (SRC), (AMOUNT))
29465 #define ORR(A,B) \
29466 gen_rtx_IOR (SImode, (A), (B))
29467 #define BRANCH(COND,LABEL) \
29468 gen_arm_cond_branch ((LABEL), \
29469 gen_rtx_ ## COND (CCmode, cc_reg, \
29470 const0_rtx), \
29471 cc_reg)
29472
29473 /* Shifts by register and shifts by constant are handled separately. */
29474 if (CONST_INT_P (amount))
29475 {
29476 /* We have a shift-by-constant. */
29477
29478 /* First, handle out-of-range shift amounts.
29479 In both cases we try to match the result an ARM instruction in a
29480 shift-by-register would give. This helps reduce execution
29481 differences between optimization levels, but it won't stop other
29482 parts of the compiler doing different things. This is "undefined
29483 behavior, in any case. */
29484 if (INTVAL (amount) <= 0)
29485 emit_insn (gen_movdi (out, in));
29486 else if (INTVAL (amount) >= 64)
29487 {
29488 if (code == ASHIFTRT)
29489 {
29490 rtx const31_rtx = GEN_INT (31);
29491 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29492 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29493 }
29494 else
29495 emit_insn (gen_movdi (out, const0_rtx));
29496 }
29497
29498 /* Now handle valid shifts. */
29499 else if (INTVAL (amount) < 32)
29500 {
29501 /* Shifts by a constant less than 32. */
29502 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29503
29504 /* Clearing the out register in DImode first avoids lots
29505 of spilling and results in less stack usage.
29506 Later this redundant insn is completely removed.
29507 Do that only if "in" and "out" are different registers. */
29508 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29509 emit_insn (SET (out, const0_rtx));
29510 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29511 emit_insn (SET (out_down,
29512 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29513 out_down)));
29514 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29515 }
29516 else
29517 {
29518 /* Shifts by a constant greater than 31. */
29519 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29520
29521 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29522 emit_insn (SET (out, const0_rtx));
29523 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29524 if (code == ASHIFTRT)
29525 emit_insn (gen_ashrsi3 (out_up, in_up,
29526 GEN_INT (31)));
29527 else
29528 emit_insn (SET (out_up, const0_rtx));
29529 }
29530 }
29531 else
29532 {
29533 /* We have a shift-by-register. */
29534 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29535
29536 /* This alternative requires the scratch registers. */
29537 gcc_assert (scratch1 && REG_P (scratch1));
29538 gcc_assert (scratch2 && REG_P (scratch2));
29539
29540 /* We will need the values "amount-32" and "32-amount" later.
29541 Swapping them around now allows the later code to be more general. */
29542 switch (code)
29543 {
29544 case ASHIFT:
29545 emit_insn (SUB_32 (scratch1, amount));
29546 emit_insn (RSB_32 (scratch2, amount));
29547 break;
29548 case ASHIFTRT:
29549 emit_insn (RSB_32 (scratch1, amount));
29550 /* Also set CC = amount > 32. */
29551 emit_insn (SUB_S_32 (scratch2, amount));
29552 break;
29553 case LSHIFTRT:
29554 emit_insn (RSB_32 (scratch1, amount));
29555 emit_insn (SUB_32 (scratch2, amount));
29556 break;
29557 default:
29558 gcc_unreachable ();
29559 }
29560
29561 /* Emit code like this:
29562
29563 arithmetic-left:
29564 out_down = in_down << amount;
29565 out_down = (in_up << (amount - 32)) | out_down;
29566 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29567 out_up = in_up << amount;
29568
29569 arithmetic-right:
29570 out_down = in_down >> amount;
29571 out_down = (in_up << (32 - amount)) | out_down;
29572 if (amount < 32)
29573 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29574 out_up = in_up << amount;
29575
29576 logical-right:
29577 out_down = in_down >> amount;
29578 out_down = (in_up << (32 - amount)) | out_down;
29579 if (amount < 32)
29580 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29581 out_up = in_up << amount;
29582
29583 The ARM and Thumb2 variants are the same but implemented slightly
29584 differently. If this were only called during expand we could just
29585 use the Thumb2 case and let combine do the right thing, but this
29586 can also be called from post-reload splitters. */
29587
29588 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29589
29590 if (!TARGET_THUMB2)
29591 {
29592 /* Emit code for ARM mode. */
29593 emit_insn (SET (out_down,
29594 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29595 if (code == ASHIFTRT)
29596 {
29597 rtx_code_label *done_label = gen_label_rtx ();
29598 emit_jump_insn (BRANCH (LT, done_label));
29599 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29600 out_down)));
29601 emit_label (done_label);
29602 }
29603 else
29604 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29605 out_down)));
29606 }
29607 else
29608 {
29609 /* Emit code for Thumb2 mode.
29610 Thumb2 can't do shift and or in one insn. */
29611 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29612 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29613
29614 if (code == ASHIFTRT)
29615 {
29616 rtx_code_label *done_label = gen_label_rtx ();
29617 emit_jump_insn (BRANCH (LT, done_label));
29618 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29619 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29620 emit_label (done_label);
29621 }
29622 else
29623 {
29624 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29625 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29626 }
29627 }
29628
29629 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29630 }
29631
29632 #undef SUB_32
29633 #undef RSB_32
29634 #undef SUB_S_32
29635 #undef SET
29636 #undef SHIFT
29637 #undef LSHIFT
29638 #undef REV_LSHIFT
29639 #undef ORR
29640 #undef BRANCH
29641 }
29642
29643 /* Returns true if the pattern is a valid symbolic address, which is either a
29644 symbol_ref or (symbol_ref + addend).
29645
29646 According to the ARM ELF ABI, the initial addend of REL-type relocations
29647 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29648 literal field of the instruction as a 16-bit signed value in the range
29649 -32768 <= A < 32768. */
29650
29651 bool
29652 arm_valid_symbolic_address_p (rtx addr)
29653 {
29654 rtx xop0, xop1 = NULL_RTX;
29655 rtx tmp = addr;
29656
29657 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29658 return true;
29659
29660 /* (const (plus: symbol_ref const_int)) */
29661 if (GET_CODE (addr) == CONST)
29662 tmp = XEXP (addr, 0);
29663
29664 if (GET_CODE (tmp) == PLUS)
29665 {
29666 xop0 = XEXP (tmp, 0);
29667 xop1 = XEXP (tmp, 1);
29668
29669 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29670 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29671 }
29672
29673 return false;
29674 }
29675
29676 /* Returns true if a valid comparison operation and makes
29677 the operands in a form that is valid. */
29678 bool
29679 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29680 {
29681 enum rtx_code code = GET_CODE (*comparison);
29682 int code_int;
29683 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29684 ? GET_MODE (*op2) : GET_MODE (*op1);
29685
29686 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29687
29688 if (code == UNEQ || code == LTGT)
29689 return false;
29690
29691 code_int = (int)code;
29692 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29693 PUT_CODE (*comparison, (enum rtx_code)code_int);
29694
29695 switch (mode)
29696 {
29697 case E_SImode:
29698 if (!arm_add_operand (*op1, mode))
29699 *op1 = force_reg (mode, *op1);
29700 if (!arm_add_operand (*op2, mode))
29701 *op2 = force_reg (mode, *op2);
29702 return true;
29703
29704 case E_DImode:
29705 if (!cmpdi_operand (*op1, mode))
29706 *op1 = force_reg (mode, *op1);
29707 if (!cmpdi_operand (*op2, mode))
29708 *op2 = force_reg (mode, *op2);
29709 return true;
29710
29711 case E_HFmode:
29712 if (!TARGET_VFP_FP16INST)
29713 break;
29714 /* FP16 comparisons are done in SF mode. */
29715 mode = SFmode;
29716 *op1 = convert_to_mode (mode, *op1, 1);
29717 *op2 = convert_to_mode (mode, *op2, 1);
29718 /* Fall through. */
29719 case E_SFmode:
29720 case E_DFmode:
29721 if (!vfp_compare_operand (*op1, mode))
29722 *op1 = force_reg (mode, *op1);
29723 if (!vfp_compare_operand (*op2, mode))
29724 *op2 = force_reg (mode, *op2);
29725 return true;
29726 default:
29727 break;
29728 }
29729
29730 return false;
29731
29732 }
29733
29734 /* Maximum number of instructions to set block of memory. */
29735 static int
29736 arm_block_set_max_insns (void)
29737 {
29738 if (optimize_function_for_size_p (cfun))
29739 return 4;
29740 else
29741 return current_tune->max_insns_inline_memset;
29742 }
29743
29744 /* Return TRUE if it's profitable to set block of memory for
29745 non-vectorized case. VAL is the value to set the memory
29746 with. LENGTH is the number of bytes to set. ALIGN is the
29747 alignment of the destination memory in bytes. UNALIGNED_P
29748 is TRUE if we can only set the memory with instructions
29749 meeting alignment requirements. USE_STRD_P is TRUE if we
29750 can use strd to set the memory. */
29751 static bool
29752 arm_block_set_non_vect_profit_p (rtx val,
29753 unsigned HOST_WIDE_INT length,
29754 unsigned HOST_WIDE_INT align,
29755 bool unaligned_p, bool use_strd_p)
29756 {
29757 int num = 0;
29758 /* For leftovers in bytes of 0-7, we can set the memory block using
29759 strb/strh/str with minimum instruction number. */
29760 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29761
29762 if (unaligned_p)
29763 {
29764 num = arm_const_inline_cost (SET, val);
29765 num += length / align + length % align;
29766 }
29767 else if (use_strd_p)
29768 {
29769 num = arm_const_double_inline_cost (val);
29770 num += (length >> 3) + leftover[length & 7];
29771 }
29772 else
29773 {
29774 num = arm_const_inline_cost (SET, val);
29775 num += (length >> 2) + leftover[length & 3];
29776 }
29777
29778 /* We may be able to combine last pair STRH/STRB into a single STR
29779 by shifting one byte back. */
29780 if (unaligned_access && length > 3 && (length & 3) == 3)
29781 num--;
29782
29783 return (num <= arm_block_set_max_insns ());
29784 }
29785
29786 /* Return TRUE if it's profitable to set block of memory for
29787 vectorized case. LENGTH is the number of bytes to set.
29788 ALIGN is the alignment of destination memory in bytes.
29789 MODE is the vector mode used to set the memory. */
29790 static bool
29791 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29792 unsigned HOST_WIDE_INT align,
29793 machine_mode mode)
29794 {
29795 int num;
29796 bool unaligned_p = ((align & 3) != 0);
29797 unsigned int nelt = GET_MODE_NUNITS (mode);
29798
29799 /* Instruction loading constant value. */
29800 num = 1;
29801 /* Instructions storing the memory. */
29802 num += (length + nelt - 1) / nelt;
29803 /* Instructions adjusting the address expression. Only need to
29804 adjust address expression if it's 4 bytes aligned and bytes
29805 leftover can only be stored by mis-aligned store instruction. */
29806 if (!unaligned_p && (length & 3) != 0)
29807 num++;
29808
29809 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29810 if (!unaligned_p && mode == V16QImode)
29811 num--;
29812
29813 return (num <= arm_block_set_max_insns ());
29814 }
29815
29816 /* Set a block of memory using vectorization instructions for the
29817 unaligned case. We fill the first LENGTH bytes of the memory
29818 area starting from DSTBASE with byte constant VALUE. ALIGN is
29819 the alignment requirement of memory. Return TRUE if succeeded. */
29820 static bool
29821 arm_block_set_unaligned_vect (rtx dstbase,
29822 unsigned HOST_WIDE_INT length,
29823 unsigned HOST_WIDE_INT value,
29824 unsigned HOST_WIDE_INT align)
29825 {
29826 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29827 rtx dst, mem;
29828 rtx val_elt, val_vec, reg;
29829 rtx rval[MAX_VECT_LEN];
29830 rtx (*gen_func) (rtx, rtx);
29831 machine_mode mode;
29832 unsigned HOST_WIDE_INT v = value;
29833 unsigned int offset = 0;
29834 gcc_assert ((align & 0x3) != 0);
29835 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29836 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29837 if (length >= nelt_v16)
29838 {
29839 mode = V16QImode;
29840 gen_func = gen_movmisalignv16qi;
29841 }
29842 else
29843 {
29844 mode = V8QImode;
29845 gen_func = gen_movmisalignv8qi;
29846 }
29847 nelt_mode = GET_MODE_NUNITS (mode);
29848 gcc_assert (length >= nelt_mode);
29849 /* Skip if it isn't profitable. */
29850 if (!arm_block_set_vect_profit_p (length, align, mode))
29851 return false;
29852
29853 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29854 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29855
29856 v = sext_hwi (v, BITS_PER_WORD);
29857 val_elt = GEN_INT (v);
29858 for (j = 0; j < nelt_mode; j++)
29859 rval[j] = val_elt;
29860
29861 reg = gen_reg_rtx (mode);
29862 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29863 /* Emit instruction loading the constant value. */
29864 emit_move_insn (reg, val_vec);
29865
29866 /* Handle nelt_mode bytes in a vector. */
29867 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29868 {
29869 emit_insn ((*gen_func) (mem, reg));
29870 if (i + 2 * nelt_mode <= length)
29871 {
29872 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29873 offset += nelt_mode;
29874 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29875 }
29876 }
29877
29878 /* If there are not less than nelt_v8 bytes leftover, we must be in
29879 V16QI mode. */
29880 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29881
29882 /* Handle (8, 16) bytes leftover. */
29883 if (i + nelt_v8 < length)
29884 {
29885 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29886 offset += length - i;
29887 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29888
29889 /* We are shifting bytes back, set the alignment accordingly. */
29890 if ((length & 1) != 0 && align >= 2)
29891 set_mem_align (mem, BITS_PER_UNIT);
29892
29893 emit_insn (gen_movmisalignv16qi (mem, reg));
29894 }
29895 /* Handle (0, 8] bytes leftover. */
29896 else if (i < length && i + nelt_v8 >= length)
29897 {
29898 if (mode == V16QImode)
29899 reg = gen_lowpart (V8QImode, reg);
29900
29901 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29902 + (nelt_mode - nelt_v8))));
29903 offset += (length - i) + (nelt_mode - nelt_v8);
29904 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29905
29906 /* We are shifting bytes back, set the alignment accordingly. */
29907 if ((length & 1) != 0 && align >= 2)
29908 set_mem_align (mem, BITS_PER_UNIT);
29909
29910 emit_insn (gen_movmisalignv8qi (mem, reg));
29911 }
29912
29913 return true;
29914 }
29915
29916 /* Set a block of memory using vectorization instructions for the
29917 aligned case. We fill the first LENGTH bytes of the memory area
29918 starting from DSTBASE with byte constant VALUE. ALIGN is the
29919 alignment requirement of memory. Return TRUE if succeeded. */
29920 static bool
29921 arm_block_set_aligned_vect (rtx dstbase,
29922 unsigned HOST_WIDE_INT length,
29923 unsigned HOST_WIDE_INT value,
29924 unsigned HOST_WIDE_INT align)
29925 {
29926 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29927 rtx dst, addr, mem;
29928 rtx val_elt, val_vec, reg;
29929 rtx rval[MAX_VECT_LEN];
29930 machine_mode mode;
29931 unsigned HOST_WIDE_INT v = value;
29932 unsigned int offset = 0;
29933
29934 gcc_assert ((align & 0x3) == 0);
29935 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29936 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29937 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29938 mode = V16QImode;
29939 else
29940 mode = V8QImode;
29941
29942 nelt_mode = GET_MODE_NUNITS (mode);
29943 gcc_assert (length >= nelt_mode);
29944 /* Skip if it isn't profitable. */
29945 if (!arm_block_set_vect_profit_p (length, align, mode))
29946 return false;
29947
29948 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29949
29950 v = sext_hwi (v, BITS_PER_WORD);
29951 val_elt = GEN_INT (v);
29952 for (j = 0; j < nelt_mode; j++)
29953 rval[j] = val_elt;
29954
29955 reg = gen_reg_rtx (mode);
29956 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29957 /* Emit instruction loading the constant value. */
29958 emit_move_insn (reg, val_vec);
29959
29960 i = 0;
29961 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29962 if (mode == V16QImode)
29963 {
29964 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29965 emit_insn (gen_movmisalignv16qi (mem, reg));
29966 i += nelt_mode;
29967 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29968 if (i + nelt_v8 < length && i + nelt_v16 > length)
29969 {
29970 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29971 offset += length - nelt_mode;
29972 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29973 /* We are shifting bytes back, set the alignment accordingly. */
29974 if ((length & 0x3) == 0)
29975 set_mem_align (mem, BITS_PER_UNIT * 4);
29976 else if ((length & 0x1) == 0)
29977 set_mem_align (mem, BITS_PER_UNIT * 2);
29978 else
29979 set_mem_align (mem, BITS_PER_UNIT);
29980
29981 emit_insn (gen_movmisalignv16qi (mem, reg));
29982 return true;
29983 }
29984 /* Fall through for bytes leftover. */
29985 mode = V8QImode;
29986 nelt_mode = GET_MODE_NUNITS (mode);
29987 reg = gen_lowpart (V8QImode, reg);
29988 }
29989
29990 /* Handle 8 bytes in a vector. */
29991 for (; (i + nelt_mode <= length); i += nelt_mode)
29992 {
29993 addr = plus_constant (Pmode, dst, i);
29994 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29995 emit_move_insn (mem, reg);
29996 }
29997
29998 /* Handle single word leftover by shifting 4 bytes back. We can
29999 use aligned access for this case. */
30000 if (i + UNITS_PER_WORD == length)
30001 {
30002 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30003 offset += i - UNITS_PER_WORD;
30004 mem = adjust_automodify_address (dstbase, mode, addr, offset);
30005 /* We are shifting 4 bytes back, set the alignment accordingly. */
30006 if (align > UNITS_PER_WORD)
30007 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30008
30009 emit_move_insn (mem, reg);
30010 }
30011 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30012 We have to use unaligned access for this case. */
30013 else if (i < length)
30014 {
30015 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30016 offset += length - nelt_mode;
30017 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30018 /* We are shifting bytes back, set the alignment accordingly. */
30019 if ((length & 1) == 0)
30020 set_mem_align (mem, BITS_PER_UNIT * 2);
30021 else
30022 set_mem_align (mem, BITS_PER_UNIT);
30023
30024 emit_insn (gen_movmisalignv8qi (mem, reg));
30025 }
30026
30027 return true;
30028 }
30029
30030 /* Set a block of memory using plain strh/strb instructions, only
30031 using instructions allowed by ALIGN on processor. We fill the
30032 first LENGTH bytes of the memory area starting from DSTBASE
30033 with byte constant VALUE. ALIGN is the alignment requirement
30034 of memory. */
30035 static bool
30036 arm_block_set_unaligned_non_vect (rtx dstbase,
30037 unsigned HOST_WIDE_INT length,
30038 unsigned HOST_WIDE_INT value,
30039 unsigned HOST_WIDE_INT align)
30040 {
30041 unsigned int i;
30042 rtx dst, addr, mem;
30043 rtx val_exp, val_reg, reg;
30044 machine_mode mode;
30045 HOST_WIDE_INT v = value;
30046
30047 gcc_assert (align == 1 || align == 2);
30048
30049 if (align == 2)
30050 v |= (value << BITS_PER_UNIT);
30051
30052 v = sext_hwi (v, BITS_PER_WORD);
30053 val_exp = GEN_INT (v);
30054 /* Skip if it isn't profitable. */
30055 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30056 align, true, false))
30057 return false;
30058
30059 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30060 mode = (align == 2 ? HImode : QImode);
30061 val_reg = force_reg (SImode, val_exp);
30062 reg = gen_lowpart (mode, val_reg);
30063
30064 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30065 {
30066 addr = plus_constant (Pmode, dst, i);
30067 mem = adjust_automodify_address (dstbase, mode, addr, i);
30068 emit_move_insn (mem, reg);
30069 }
30070
30071 /* Handle single byte leftover. */
30072 if (i + 1 == length)
30073 {
30074 reg = gen_lowpart (QImode, val_reg);
30075 addr = plus_constant (Pmode, dst, i);
30076 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30077 emit_move_insn (mem, reg);
30078 i++;
30079 }
30080
30081 gcc_assert (i == length);
30082 return true;
30083 }
30084
30085 /* Set a block of memory using plain strd/str/strh/strb instructions,
30086 to permit unaligned copies on processors which support unaligned
30087 semantics for those instructions. We fill the first LENGTH bytes
30088 of the memory area starting from DSTBASE with byte constant VALUE.
30089 ALIGN is the alignment requirement of memory. */
30090 static bool
30091 arm_block_set_aligned_non_vect (rtx dstbase,
30092 unsigned HOST_WIDE_INT length,
30093 unsigned HOST_WIDE_INT value,
30094 unsigned HOST_WIDE_INT align)
30095 {
30096 unsigned int i;
30097 rtx dst, addr, mem;
30098 rtx val_exp, val_reg, reg;
30099 unsigned HOST_WIDE_INT v;
30100 bool use_strd_p;
30101
30102 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30103 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30104
30105 v = (value | (value << 8) | (value << 16) | (value << 24));
30106 if (length < UNITS_PER_WORD)
30107 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30108
30109 if (use_strd_p)
30110 v |= (v << BITS_PER_WORD);
30111 else
30112 v = sext_hwi (v, BITS_PER_WORD);
30113
30114 val_exp = GEN_INT (v);
30115 /* Skip if it isn't profitable. */
30116 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30117 align, false, use_strd_p))
30118 {
30119 if (!use_strd_p)
30120 return false;
30121
30122 /* Try without strd. */
30123 v = (v >> BITS_PER_WORD);
30124 v = sext_hwi (v, BITS_PER_WORD);
30125 val_exp = GEN_INT (v);
30126 use_strd_p = false;
30127 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30128 align, false, use_strd_p))
30129 return false;
30130 }
30131
30132 i = 0;
30133 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30134 /* Handle double words using strd if possible. */
30135 if (use_strd_p)
30136 {
30137 val_reg = force_reg (DImode, val_exp);
30138 reg = val_reg;
30139 for (; (i + 8 <= length); i += 8)
30140 {
30141 addr = plus_constant (Pmode, dst, i);
30142 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30143 emit_move_insn (mem, reg);
30144 }
30145 }
30146 else
30147 val_reg = force_reg (SImode, val_exp);
30148
30149 /* Handle words. */
30150 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30151 for (; (i + 4 <= length); i += 4)
30152 {
30153 addr = plus_constant (Pmode, dst, i);
30154 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30155 if ((align & 3) == 0)
30156 emit_move_insn (mem, reg);
30157 else
30158 emit_insn (gen_unaligned_storesi (mem, reg));
30159 }
30160
30161 /* Merge last pair of STRH and STRB into a STR if possible. */
30162 if (unaligned_access && i > 0 && (i + 3) == length)
30163 {
30164 addr = plus_constant (Pmode, dst, i - 1);
30165 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30166 /* We are shifting one byte back, set the alignment accordingly. */
30167 if ((align & 1) == 0)
30168 set_mem_align (mem, BITS_PER_UNIT);
30169
30170 /* Most likely this is an unaligned access, and we can't tell at
30171 compilation time. */
30172 emit_insn (gen_unaligned_storesi (mem, reg));
30173 return true;
30174 }
30175
30176 /* Handle half word leftover. */
30177 if (i + 2 <= length)
30178 {
30179 reg = gen_lowpart (HImode, val_reg);
30180 addr = plus_constant (Pmode, dst, i);
30181 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30182 if ((align & 1) == 0)
30183 emit_move_insn (mem, reg);
30184 else
30185 emit_insn (gen_unaligned_storehi (mem, reg));
30186
30187 i += 2;
30188 }
30189
30190 /* Handle single byte leftover. */
30191 if (i + 1 == length)
30192 {
30193 reg = gen_lowpart (QImode, val_reg);
30194 addr = plus_constant (Pmode, dst, i);
30195 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30196 emit_move_insn (mem, reg);
30197 }
30198
30199 return true;
30200 }
30201
30202 /* Set a block of memory using vectorization instructions for both
30203 aligned and unaligned cases. We fill the first LENGTH bytes of
30204 the memory area starting from DSTBASE with byte constant VALUE.
30205 ALIGN is the alignment requirement of memory. */
30206 static bool
30207 arm_block_set_vect (rtx dstbase,
30208 unsigned HOST_WIDE_INT length,
30209 unsigned HOST_WIDE_INT value,
30210 unsigned HOST_WIDE_INT align)
30211 {
30212 /* Check whether we need to use unaligned store instruction. */
30213 if (((align & 3) != 0 || (length & 3) != 0)
30214 /* Check whether unaligned store instruction is available. */
30215 && (!unaligned_access || BYTES_BIG_ENDIAN))
30216 return false;
30217
30218 if ((align & 3) == 0)
30219 return arm_block_set_aligned_vect (dstbase, length, value, align);
30220 else
30221 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30222 }
30223
30224 /* Expand string store operation. Firstly we try to do that by using
30225 vectorization instructions, then try with ARM unaligned access and
30226 double-word store if profitable. OPERANDS[0] is the destination,
30227 OPERANDS[1] is the number of bytes, operands[2] is the value to
30228 initialize the memory, OPERANDS[3] is the known alignment of the
30229 destination. */
30230 bool
30231 arm_gen_setmem (rtx *operands)
30232 {
30233 rtx dstbase = operands[0];
30234 unsigned HOST_WIDE_INT length;
30235 unsigned HOST_WIDE_INT value;
30236 unsigned HOST_WIDE_INT align;
30237
30238 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30239 return false;
30240
30241 length = UINTVAL (operands[1]);
30242 if (length > 64)
30243 return false;
30244
30245 value = (UINTVAL (operands[2]) & 0xFF);
30246 align = UINTVAL (operands[3]);
30247 if (TARGET_NEON && length >= 8
30248 && current_tune->string_ops_prefer_neon
30249 && arm_block_set_vect (dstbase, length, value, align))
30250 return true;
30251
30252 if (!unaligned_access && (align & 3) != 0)
30253 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30254
30255 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30256 }
30257
30258
30259 static bool
30260 arm_macro_fusion_p (void)
30261 {
30262 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30263 }
30264
30265 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30266 for MOVW / MOVT macro fusion. */
30267
30268 static bool
30269 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30270 {
30271 /* We are trying to fuse
30272 movw imm / movt imm
30273 instructions as a group that gets scheduled together. */
30274
30275 rtx set_dest = SET_DEST (curr_set);
30276
30277 if (GET_MODE (set_dest) != SImode)
30278 return false;
30279
30280 /* We are trying to match:
30281 prev (movw) == (set (reg r0) (const_int imm16))
30282 curr (movt) == (set (zero_extract (reg r0)
30283 (const_int 16)
30284 (const_int 16))
30285 (const_int imm16_1))
30286 or
30287 prev (movw) == (set (reg r1)
30288 (high (symbol_ref ("SYM"))))
30289 curr (movt) == (set (reg r0)
30290 (lo_sum (reg r1)
30291 (symbol_ref ("SYM")))) */
30292
30293 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30294 {
30295 if (CONST_INT_P (SET_SRC (curr_set))
30296 && CONST_INT_P (SET_SRC (prev_set))
30297 && REG_P (XEXP (set_dest, 0))
30298 && REG_P (SET_DEST (prev_set))
30299 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30300 return true;
30301
30302 }
30303 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30304 && REG_P (SET_DEST (curr_set))
30305 && REG_P (SET_DEST (prev_set))
30306 && GET_CODE (SET_SRC (prev_set)) == HIGH
30307 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30308 return true;
30309
30310 return false;
30311 }
30312
30313 static bool
30314 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30315 {
30316 rtx prev_set = single_set (prev);
30317 rtx curr_set = single_set (curr);
30318
30319 if (!prev_set
30320 || !curr_set)
30321 return false;
30322
30323 if (any_condjump_p (curr))
30324 return false;
30325
30326 if (!arm_macro_fusion_p ())
30327 return false;
30328
30329 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30330 && aarch_crypto_can_dual_issue (prev, curr))
30331 return true;
30332
30333 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30334 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30335 return true;
30336
30337 return false;
30338 }
30339
30340 /* Return true iff the instruction fusion described by OP is enabled. */
30341 bool
30342 arm_fusion_enabled_p (tune_params::fuse_ops op)
30343 {
30344 return current_tune->fusible_ops & op;
30345 }
30346
30347 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30348 scheduled for speculative execution. Reject the long-running division
30349 and square-root instructions. */
30350
30351 static bool
30352 arm_sched_can_speculate_insn (rtx_insn *insn)
30353 {
30354 switch (get_attr_type (insn))
30355 {
30356 case TYPE_SDIV:
30357 case TYPE_UDIV:
30358 case TYPE_FDIVS:
30359 case TYPE_FDIVD:
30360 case TYPE_FSQRTS:
30361 case TYPE_FSQRTD:
30362 case TYPE_NEON_FP_SQRT_S:
30363 case TYPE_NEON_FP_SQRT_D:
30364 case TYPE_NEON_FP_SQRT_S_Q:
30365 case TYPE_NEON_FP_SQRT_D_Q:
30366 case TYPE_NEON_FP_DIV_S:
30367 case TYPE_NEON_FP_DIV_D:
30368 case TYPE_NEON_FP_DIV_S_Q:
30369 case TYPE_NEON_FP_DIV_D_Q:
30370 return false;
30371 default:
30372 return true;
30373 }
30374 }
30375
30376 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30377
30378 static unsigned HOST_WIDE_INT
30379 arm_asan_shadow_offset (void)
30380 {
30381 return HOST_WIDE_INT_1U << 29;
30382 }
30383
30384
30385 /* This is a temporary fix for PR60655. Ideally we need
30386 to handle most of these cases in the generic part but
30387 currently we reject minus (..) (sym_ref). We try to
30388 ameliorate the case with minus (sym_ref1) (sym_ref2)
30389 where they are in the same section. */
30390
30391 static bool
30392 arm_const_not_ok_for_debug_p (rtx p)
30393 {
30394 tree decl_op0 = NULL;
30395 tree decl_op1 = NULL;
30396
30397 if (GET_CODE (p) == UNSPEC)
30398 return true;
30399 if (GET_CODE (p) == MINUS)
30400 {
30401 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30402 {
30403 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30404 if (decl_op1
30405 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30406 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30407 {
30408 if ((VAR_P (decl_op1)
30409 || TREE_CODE (decl_op1) == CONST_DECL)
30410 && (VAR_P (decl_op0)
30411 || TREE_CODE (decl_op0) == CONST_DECL))
30412 return (get_variable_section (decl_op1, false)
30413 != get_variable_section (decl_op0, false));
30414
30415 if (TREE_CODE (decl_op1) == LABEL_DECL
30416 && TREE_CODE (decl_op0) == LABEL_DECL)
30417 return (DECL_CONTEXT (decl_op1)
30418 != DECL_CONTEXT (decl_op0));
30419 }
30420
30421 return true;
30422 }
30423 }
30424
30425 return false;
30426 }
30427
30428 /* return TRUE if x is a reference to a value in a constant pool */
30429 extern bool
30430 arm_is_constant_pool_ref (rtx x)
30431 {
30432 return (MEM_P (x)
30433 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30434 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30435 }
30436
30437 /* Remember the last target of arm_set_current_function. */
30438 static GTY(()) tree arm_previous_fndecl;
30439
30440 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30441
30442 void
30443 save_restore_target_globals (tree new_tree)
30444 {
30445 /* If we have a previous state, use it. */
30446 if (TREE_TARGET_GLOBALS (new_tree))
30447 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30448 else if (new_tree == target_option_default_node)
30449 restore_target_globals (&default_target_globals);
30450 else
30451 {
30452 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30453 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30454 }
30455
30456 arm_option_params_internal ();
30457 }
30458
30459 /* Invalidate arm_previous_fndecl. */
30460
30461 void
30462 arm_reset_previous_fndecl (void)
30463 {
30464 arm_previous_fndecl = NULL_TREE;
30465 }
30466
30467 /* Establish appropriate back-end context for processing the function
30468 FNDECL. The argument might be NULL to indicate processing at top
30469 level, outside of any function scope. */
30470
30471 static void
30472 arm_set_current_function (tree fndecl)
30473 {
30474 if (!fndecl || fndecl == arm_previous_fndecl)
30475 return;
30476
30477 tree old_tree = (arm_previous_fndecl
30478 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30479 : NULL_TREE);
30480
30481 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30482
30483 /* If current function has no attributes but previous one did,
30484 use the default node. */
30485 if (! new_tree && old_tree)
30486 new_tree = target_option_default_node;
30487
30488 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30489 the default have been handled by save_restore_target_globals from
30490 arm_pragma_target_parse. */
30491 if (old_tree == new_tree)
30492 return;
30493
30494 arm_previous_fndecl = fndecl;
30495
30496 /* First set the target options. */
30497 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30498
30499 save_restore_target_globals (new_tree);
30500 }
30501
30502 /* Implement TARGET_OPTION_PRINT. */
30503
30504 static void
30505 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30506 {
30507 int flags = ptr->x_target_flags;
30508 const char *fpu_name;
30509
30510 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30511 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30512
30513 fprintf (file, "%*sselected isa %s\n", indent, "",
30514 TARGET_THUMB2_P (flags) ? "thumb2" :
30515 TARGET_THUMB_P (flags) ? "thumb1" :
30516 "arm");
30517
30518 if (ptr->x_arm_arch_string)
30519 fprintf (file, "%*sselected architecture %s\n", indent, "",
30520 ptr->x_arm_arch_string);
30521
30522 if (ptr->x_arm_cpu_string)
30523 fprintf (file, "%*sselected CPU %s\n", indent, "",
30524 ptr->x_arm_cpu_string);
30525
30526 if (ptr->x_arm_tune_string)
30527 fprintf (file, "%*sselected tune %s\n", indent, "",
30528 ptr->x_arm_tune_string);
30529
30530 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30531 }
30532
30533 /* Hook to determine if one function can safely inline another. */
30534
30535 static bool
30536 arm_can_inline_p (tree caller, tree callee)
30537 {
30538 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30539 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30540 bool can_inline = true;
30541
30542 struct cl_target_option *caller_opts
30543 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30544 : target_option_default_node);
30545
30546 struct cl_target_option *callee_opts
30547 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30548 : target_option_default_node);
30549
30550 if (callee_opts == caller_opts)
30551 return true;
30552
30553 /* Callee's ISA features should be a subset of the caller's. */
30554 struct arm_build_target caller_target;
30555 struct arm_build_target callee_target;
30556 caller_target.isa = sbitmap_alloc (isa_num_bits);
30557 callee_target.isa = sbitmap_alloc (isa_num_bits);
30558
30559 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30560 false);
30561 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30562 false);
30563 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30564 can_inline = false;
30565
30566 sbitmap_free (caller_target.isa);
30567 sbitmap_free (callee_target.isa);
30568
30569 /* OK to inline between different modes.
30570 Function with mode specific instructions, e.g using asm,
30571 must be explicitly protected with noinline. */
30572 return can_inline;
30573 }
30574
30575 /* Hook to fix function's alignment affected by target attribute. */
30576
30577 static void
30578 arm_relayout_function (tree fndecl)
30579 {
30580 if (DECL_USER_ALIGN (fndecl))
30581 return;
30582
30583 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30584
30585 if (!callee_tree)
30586 callee_tree = target_option_default_node;
30587
30588 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30589 SET_DECL_ALIGN
30590 (fndecl,
30591 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30592 }
30593
30594 /* Inner function to process the attribute((target(...))), take an argument and
30595 set the current options from the argument. If we have a list, recursively
30596 go over the list. */
30597
30598 static bool
30599 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30600 {
30601 if (TREE_CODE (args) == TREE_LIST)
30602 {
30603 bool ret = true;
30604
30605 for (; args; args = TREE_CHAIN (args))
30606 if (TREE_VALUE (args)
30607 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30608 ret = false;
30609 return ret;
30610 }
30611
30612 else if (TREE_CODE (args) != STRING_CST)
30613 {
30614 error ("attribute %<target%> argument not a string");
30615 return false;
30616 }
30617
30618 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30619 char *q;
30620
30621 while ((q = strtok (argstr, ",")) != NULL)
30622 {
30623 while (ISSPACE (*q)) ++q;
30624
30625 argstr = NULL;
30626 if (!strncmp (q, "thumb", 5))
30627 opts->x_target_flags |= MASK_THUMB;
30628
30629 else if (!strncmp (q, "arm", 3))
30630 opts->x_target_flags &= ~MASK_THUMB;
30631
30632 else if (!strncmp (q, "fpu=", 4))
30633 {
30634 int fpu_index;
30635 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30636 &fpu_index, CL_TARGET))
30637 {
30638 error ("invalid fpu for attribute(target(\"%s\"))", q);
30639 return false;
30640 }
30641 if (fpu_index == TARGET_FPU_auto)
30642 {
30643 /* This doesn't really make sense until we support
30644 general dynamic selection of the architecture and all
30645 sub-features. */
30646 sorry ("auto fpu selection not currently permitted here");
30647 return false;
30648 }
30649 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30650 }
30651 else
30652 {
30653 error ("attribute(target(\"%s\")) is unknown", q);
30654 return false;
30655 }
30656 }
30657
30658 return true;
30659 }
30660
30661 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30662
30663 tree
30664 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30665 struct gcc_options *opts_set)
30666 {
30667 struct cl_target_option cl_opts;
30668
30669 if (!arm_valid_target_attribute_rec (args, opts))
30670 return NULL_TREE;
30671
30672 cl_target_option_save (&cl_opts, opts);
30673 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30674 arm_option_check_internal (opts);
30675 /* Do any overrides, such as global options arch=xxx. */
30676 arm_option_override_internal (opts, opts_set);
30677
30678 return build_target_option_node (opts);
30679 }
30680
30681 static void
30682 add_attribute (const char * mode, tree *attributes)
30683 {
30684 size_t len = strlen (mode);
30685 tree value = build_string (len, mode);
30686
30687 TREE_TYPE (value) = build_array_type (char_type_node,
30688 build_index_type (size_int (len)));
30689
30690 *attributes = tree_cons (get_identifier ("target"),
30691 build_tree_list (NULL_TREE, value),
30692 *attributes);
30693 }
30694
30695 /* For testing. Insert thumb or arm modes alternatively on functions. */
30696
30697 static void
30698 arm_insert_attributes (tree fndecl, tree * attributes)
30699 {
30700 const char *mode;
30701
30702 if (! TARGET_FLIP_THUMB)
30703 return;
30704
30705 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30706 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30707 return;
30708
30709 /* Nested definitions must inherit mode. */
30710 if (current_function_decl)
30711 {
30712 mode = TARGET_THUMB ? "thumb" : "arm";
30713 add_attribute (mode, attributes);
30714 return;
30715 }
30716
30717 /* If there is already a setting don't change it. */
30718 if (lookup_attribute ("target", *attributes) != NULL)
30719 return;
30720
30721 mode = thumb_flipper ? "thumb" : "arm";
30722 add_attribute (mode, attributes);
30723
30724 thumb_flipper = !thumb_flipper;
30725 }
30726
30727 /* Hook to validate attribute((target("string"))). */
30728
30729 static bool
30730 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30731 tree args, int ARG_UNUSED (flags))
30732 {
30733 bool ret = true;
30734 struct gcc_options func_options;
30735 tree cur_tree, new_optimize;
30736 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30737
30738 /* Get the optimization options of the current function. */
30739 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30740
30741 /* If the function changed the optimization levels as well as setting target
30742 options, start with the optimizations specified. */
30743 if (!func_optimize)
30744 func_optimize = optimization_default_node;
30745
30746 /* Init func_options. */
30747 memset (&func_options, 0, sizeof (func_options));
30748 init_options_struct (&func_options, NULL);
30749 lang_hooks.init_options_struct (&func_options);
30750
30751 /* Initialize func_options to the defaults. */
30752 cl_optimization_restore (&func_options,
30753 TREE_OPTIMIZATION (func_optimize));
30754
30755 cl_target_option_restore (&func_options,
30756 TREE_TARGET_OPTION (target_option_default_node));
30757
30758 /* Set func_options flags with new target mode. */
30759 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30760 &global_options_set);
30761
30762 if (cur_tree == NULL_TREE)
30763 ret = false;
30764
30765 new_optimize = build_optimization_node (&func_options);
30766
30767 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30768
30769 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30770
30771 finalize_options_struct (&func_options);
30772
30773 return ret;
30774 }
30775
30776 /* Match an ISA feature bitmap to a named FPU. We always use the
30777 first entry that exactly matches the feature set, so that we
30778 effectively canonicalize the FPU name for the assembler. */
30779 static const char*
30780 arm_identify_fpu_from_isa (sbitmap isa)
30781 {
30782 auto_sbitmap fpubits (isa_num_bits);
30783 auto_sbitmap cand_fpubits (isa_num_bits);
30784
30785 bitmap_and (fpubits, isa, isa_all_fpubits);
30786
30787 /* If there are no ISA feature bits relating to the FPU, we must be
30788 doing soft-float. */
30789 if (bitmap_empty_p (fpubits))
30790 return "softvfp";
30791
30792 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30793 {
30794 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30795 if (bitmap_equal_p (fpubits, cand_fpubits))
30796 return all_fpus[i].name;
30797 }
30798 /* We must find an entry, or things have gone wrong. */
30799 gcc_unreachable ();
30800 }
30801
30802 void
30803 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30804 {
30805
30806 fprintf (stream, "\t.syntax unified\n");
30807
30808 if (TARGET_THUMB)
30809 {
30810 if (is_called_in_ARM_mode (decl)
30811 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30812 && cfun->is_thunk))
30813 fprintf (stream, "\t.code 32\n");
30814 else if (TARGET_THUMB1)
30815 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30816 else
30817 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30818 }
30819 else
30820 fprintf (stream, "\t.arm\n");
30821
30822 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30823 (TARGET_SOFT_FLOAT
30824 ? "softvfp"
30825 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30826
30827 if (TARGET_POKE_FUNCTION_NAME)
30828 arm_poke_function_name (stream, (const char *) name);
30829 }
30830
30831 /* If MEM is in the form of [base+offset], extract the two parts
30832 of address and set to BASE and OFFSET, otherwise return false
30833 after clearing BASE and OFFSET. */
30834
30835 static bool
30836 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30837 {
30838 rtx addr;
30839
30840 gcc_assert (MEM_P (mem));
30841
30842 addr = XEXP (mem, 0);
30843
30844 /* Strip off const from addresses like (const (addr)). */
30845 if (GET_CODE (addr) == CONST)
30846 addr = XEXP (addr, 0);
30847
30848 if (GET_CODE (addr) == REG)
30849 {
30850 *base = addr;
30851 *offset = const0_rtx;
30852 return true;
30853 }
30854
30855 if (GET_CODE (addr) == PLUS
30856 && GET_CODE (XEXP (addr, 0)) == REG
30857 && CONST_INT_P (XEXP (addr, 1)))
30858 {
30859 *base = XEXP (addr, 0);
30860 *offset = XEXP (addr, 1);
30861 return true;
30862 }
30863
30864 *base = NULL_RTX;
30865 *offset = NULL_RTX;
30866
30867 return false;
30868 }
30869
30870 /* If INSN is a load or store of address in the form of [base+offset],
30871 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30872 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30873 otherwise return FALSE. */
30874
30875 static bool
30876 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30877 {
30878 rtx x, dest, src;
30879
30880 gcc_assert (INSN_P (insn));
30881 x = PATTERN (insn);
30882 if (GET_CODE (x) != SET)
30883 return false;
30884
30885 src = SET_SRC (x);
30886 dest = SET_DEST (x);
30887 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30888 {
30889 *is_load = false;
30890 extract_base_offset_in_addr (dest, base, offset);
30891 }
30892 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30893 {
30894 *is_load = true;
30895 extract_base_offset_in_addr (src, base, offset);
30896 }
30897 else
30898 return false;
30899
30900 return (*base != NULL_RTX && *offset != NULL_RTX);
30901 }
30902
30903 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30904
30905 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30906 and PRI are only calculated for these instructions. For other instruction,
30907 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30908 instruction fusion can be supported by returning different priorities.
30909
30910 It's important that irrelevant instructions get the largest FUSION_PRI. */
30911
30912 static void
30913 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30914 int *fusion_pri, int *pri)
30915 {
30916 int tmp, off_val;
30917 bool is_load;
30918 rtx base, offset;
30919
30920 gcc_assert (INSN_P (insn));
30921
30922 tmp = max_pri - 1;
30923 if (!fusion_load_store (insn, &base, &offset, &is_load))
30924 {
30925 *pri = tmp;
30926 *fusion_pri = tmp;
30927 return;
30928 }
30929
30930 /* Load goes first. */
30931 if (is_load)
30932 *fusion_pri = tmp - 1;
30933 else
30934 *fusion_pri = tmp - 2;
30935
30936 tmp /= 2;
30937
30938 /* INSN with smaller base register goes first. */
30939 tmp -= ((REGNO (base) & 0xff) << 20);
30940
30941 /* INSN with smaller offset goes first. */
30942 off_val = (int)(INTVAL (offset));
30943 if (off_val >= 0)
30944 tmp -= (off_val & 0xfffff);
30945 else
30946 tmp += ((- off_val) & 0xfffff);
30947
30948 *pri = tmp;
30949 return;
30950 }
30951
30952
30953 /* Construct and return a PARALLEL RTX vector with elements numbering the
30954 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30955 the vector - from the perspective of the architecture. This does not
30956 line up with GCC's perspective on lane numbers, so we end up with
30957 different masks depending on our target endian-ness. The diagram
30958 below may help. We must draw the distinction when building masks
30959 which select one half of the vector. An instruction selecting
30960 architectural low-lanes for a big-endian target, must be described using
30961 a mask selecting GCC high-lanes.
30962
30963 Big-Endian Little-Endian
30964
30965 GCC 0 1 2 3 3 2 1 0
30966 | x | x | x | x | | x | x | x | x |
30967 Architecture 3 2 1 0 3 2 1 0
30968
30969 Low Mask: { 2, 3 } { 0, 1 }
30970 High Mask: { 0, 1 } { 2, 3 }
30971 */
30972
30973 rtx
30974 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30975 {
30976 int nunits = GET_MODE_NUNITS (mode);
30977 rtvec v = rtvec_alloc (nunits / 2);
30978 int high_base = nunits / 2;
30979 int low_base = 0;
30980 int base;
30981 rtx t1;
30982 int i;
30983
30984 if (BYTES_BIG_ENDIAN)
30985 base = high ? low_base : high_base;
30986 else
30987 base = high ? high_base : low_base;
30988
30989 for (i = 0; i < nunits / 2; i++)
30990 RTVEC_ELT (v, i) = GEN_INT (base + i);
30991
30992 t1 = gen_rtx_PARALLEL (mode, v);
30993 return t1;
30994 }
30995
30996 /* Check OP for validity as a PARALLEL RTX vector with elements
30997 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30998 from the perspective of the architecture. See the diagram above
30999 arm_simd_vect_par_cnst_half_p for more details. */
31000
31001 bool
31002 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31003 bool high)
31004 {
31005 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31006 HOST_WIDE_INT count_op = XVECLEN (op, 0);
31007 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31008 int i = 0;
31009
31010 if (!VECTOR_MODE_P (mode))
31011 return false;
31012
31013 if (count_op != count_ideal)
31014 return false;
31015
31016 for (i = 0; i < count_ideal; i++)
31017 {
31018 rtx elt_op = XVECEXP (op, 0, i);
31019 rtx elt_ideal = XVECEXP (ideal, 0, i);
31020
31021 if (!CONST_INT_P (elt_op)
31022 || INTVAL (elt_ideal) != INTVAL (elt_op))
31023 return false;
31024 }
31025 return true;
31026 }
31027
31028 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31029 in Thumb1. */
31030 static bool
31031 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31032 const_tree)
31033 {
31034 /* For now, we punt and not handle this for TARGET_THUMB1. */
31035 if (vcall_offset && TARGET_THUMB1)
31036 return false;
31037
31038 /* Otherwise ok. */
31039 return true;
31040 }
31041
31042 /* Generate RTL for a conditional branch with rtx comparison CODE in
31043 mode CC_MODE. The destination of the unlikely conditional branch
31044 is LABEL_REF. */
31045
31046 void
31047 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31048 rtx label_ref)
31049 {
31050 rtx x;
31051 x = gen_rtx_fmt_ee (code, VOIDmode,
31052 gen_rtx_REG (cc_mode, CC_REGNUM),
31053 const0_rtx);
31054
31055 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31056 gen_rtx_LABEL_REF (VOIDmode, label_ref),
31057 pc_rtx);
31058 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31059 }
31060
31061 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31062
31063 For pure-code sections there is no letter code for this attribute, so
31064 output all the section flags numerically when this is needed. */
31065
31066 static bool
31067 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31068 {
31069
31070 if (flags & SECTION_ARM_PURECODE)
31071 {
31072 *num = 0x20000000;
31073
31074 if (!(flags & SECTION_DEBUG))
31075 *num |= 0x2;
31076 if (flags & SECTION_EXCLUDE)
31077 *num |= 0x80000000;
31078 if (flags & SECTION_WRITE)
31079 *num |= 0x1;
31080 if (flags & SECTION_CODE)
31081 *num |= 0x4;
31082 if (flags & SECTION_MERGE)
31083 *num |= 0x10;
31084 if (flags & SECTION_STRINGS)
31085 *num |= 0x20;
31086 if (flags & SECTION_TLS)
31087 *num |= 0x400;
31088 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31089 *num |= 0x200;
31090
31091 return true;
31092 }
31093
31094 return false;
31095 }
31096
31097 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31098
31099 If pure-code is passed as an option, make sure all functions are in
31100 sections that have the SHF_ARM_PURECODE attribute. */
31101
31102 static section *
31103 arm_function_section (tree decl, enum node_frequency freq,
31104 bool startup, bool exit)
31105 {
31106 const char * section_name;
31107 section * sec;
31108
31109 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31110 return default_function_section (decl, freq, startup, exit);
31111
31112 if (!target_pure_code)
31113 return default_function_section (decl, freq, startup, exit);
31114
31115
31116 section_name = DECL_SECTION_NAME (decl);
31117
31118 /* If a function is not in a named section then it falls under the 'default'
31119 text section, also known as '.text'. We can preserve previous behavior as
31120 the default text section already has the SHF_ARM_PURECODE section
31121 attribute. */
31122 if (!section_name)
31123 {
31124 section *default_sec = default_function_section (decl, freq, startup,
31125 exit);
31126
31127 /* If default_sec is not null, then it must be a special section like for
31128 example .text.startup. We set the pure-code attribute and return the
31129 same section to preserve existing behavior. */
31130 if (default_sec)
31131 default_sec->common.flags |= SECTION_ARM_PURECODE;
31132 return default_sec;
31133 }
31134
31135 /* Otherwise look whether a section has already been created with
31136 'section_name'. */
31137 sec = get_named_section (decl, section_name, 0);
31138 if (!sec)
31139 /* If that is not the case passing NULL as the section's name to
31140 'get_named_section' will create a section with the declaration's
31141 section name. */
31142 sec = get_named_section (decl, NULL, 0);
31143
31144 /* Set the SHF_ARM_PURECODE attribute. */
31145 sec->common.flags |= SECTION_ARM_PURECODE;
31146
31147 return sec;
31148 }
31149
31150 /* Implements the TARGET_SECTION_FLAGS hook.
31151
31152 If DECL is a function declaration and pure-code is passed as an option
31153 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31154 section's name and RELOC indicates whether the declarations initializer may
31155 contain runtime relocations. */
31156
31157 static unsigned int
31158 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31159 {
31160 unsigned int flags = default_section_type_flags (decl, name, reloc);
31161
31162 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31163 flags |= SECTION_ARM_PURECODE;
31164
31165 return flags;
31166 }
31167
31168 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31169
31170 static void
31171 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31172 rtx op0, rtx op1,
31173 rtx *quot_p, rtx *rem_p)
31174 {
31175 if (mode == SImode)
31176 gcc_assert (!TARGET_IDIV);
31177
31178 scalar_int_mode libval_mode
31179 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31180
31181 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31182 libval_mode,
31183 op0, GET_MODE (op0),
31184 op1, GET_MODE (op1));
31185
31186 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31187 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31188 GET_MODE_SIZE (mode));
31189
31190 gcc_assert (quotient);
31191 gcc_assert (remainder);
31192
31193 *quot_p = quotient;
31194 *rem_p = remainder;
31195 }
31196
31197 /* This function checks for the availability of the coprocessor builtin passed
31198 in BUILTIN for the current target. Returns true if it is available and
31199 false otherwise. If a BUILTIN is passed for which this function has not
31200 been implemented it will cause an exception. */
31201
31202 bool
31203 arm_coproc_builtin_available (enum unspecv builtin)
31204 {
31205 /* None of these builtins are available in Thumb mode if the target only
31206 supports Thumb-1. */
31207 if (TARGET_THUMB1)
31208 return false;
31209
31210 switch (builtin)
31211 {
31212 case VUNSPEC_CDP:
31213 case VUNSPEC_LDC:
31214 case VUNSPEC_LDCL:
31215 case VUNSPEC_STC:
31216 case VUNSPEC_STCL:
31217 case VUNSPEC_MCR:
31218 case VUNSPEC_MRC:
31219 if (arm_arch4)
31220 return true;
31221 break;
31222 case VUNSPEC_CDP2:
31223 case VUNSPEC_LDC2:
31224 case VUNSPEC_LDC2L:
31225 case VUNSPEC_STC2:
31226 case VUNSPEC_STC2L:
31227 case VUNSPEC_MCR2:
31228 case VUNSPEC_MRC2:
31229 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31230 ARMv8-{A,M}. */
31231 if (arm_arch5)
31232 return true;
31233 break;
31234 case VUNSPEC_MCRR:
31235 case VUNSPEC_MRRC:
31236 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31237 ARMv8-{A,M}. */
31238 if (arm_arch6 || arm_arch5te)
31239 return true;
31240 break;
31241 case VUNSPEC_MCRR2:
31242 case VUNSPEC_MRRC2:
31243 if (arm_arch6)
31244 return true;
31245 break;
31246 default:
31247 gcc_unreachable ();
31248 }
31249 return false;
31250 }
31251
31252 /* This function returns true if OP is a valid memory operand for the ldc and
31253 stc coprocessor instructions and false otherwise. */
31254
31255 bool
31256 arm_coproc_ldc_stc_legitimate_address (rtx op)
31257 {
31258 HOST_WIDE_INT range;
31259 /* Has to be a memory operand. */
31260 if (!MEM_P (op))
31261 return false;
31262
31263 op = XEXP (op, 0);
31264
31265 /* We accept registers. */
31266 if (REG_P (op))
31267 return true;
31268
31269 switch GET_CODE (op)
31270 {
31271 case PLUS:
31272 {
31273 /* Or registers with an offset. */
31274 if (!REG_P (XEXP (op, 0)))
31275 return false;
31276
31277 op = XEXP (op, 1);
31278
31279 /* The offset must be an immediate though. */
31280 if (!CONST_INT_P (op))
31281 return false;
31282
31283 range = INTVAL (op);
31284
31285 /* Within the range of [-1020,1020]. */
31286 if (!IN_RANGE (range, -1020, 1020))
31287 return false;
31288
31289 /* And a multiple of 4. */
31290 return (range % 4) == 0;
31291 }
31292 case PRE_INC:
31293 case POST_INC:
31294 case PRE_DEC:
31295 case POST_DEC:
31296 return REG_P (XEXP (op, 0));
31297 default:
31298 gcc_unreachable ();
31299 }
31300 return false;
31301 }
31302
31303 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31304
31305 In VFPv1, VFP registers could only be accessed in the mode they were
31306 set, so subregs would be invalid there. However, we don't support
31307 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31308
31309 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31310 VFP registers in little-endian order. We can't describe that accurately to
31311 GCC, so avoid taking subregs of such values.
31312
31313 The only exception is going from a 128-bit to a 64-bit type. In that
31314 case the data layout happens to be consistent for big-endian, so we
31315 explicitly allow that case. */
31316
31317 static bool
31318 arm_can_change_mode_class (machine_mode from, machine_mode to,
31319 reg_class_t rclass)
31320 {
31321 if (TARGET_BIG_END
31322 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31323 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31324 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31325 && reg_classes_intersect_p (VFP_REGS, rclass))
31326 return false;
31327 return true;
31328 }
31329
31330 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31331 strcpy from constants will be faster. */
31332
31333 static HOST_WIDE_INT
31334 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31335 {
31336 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31337 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31338 return MAX (align, BITS_PER_WORD * factor);
31339 return align;
31340 }
31341
31342 #if CHECKING_P
31343 namespace selftest {
31344
31345 /* Scan the static data tables generated by parsecpu.awk looking for
31346 potential issues with the data. We primarily check for
31347 inconsistencies in the option extensions at present (extensions
31348 that duplicate others but aren't marked as aliases). Furthermore,
31349 for correct canonicalization later options must never be a subset
31350 of an earlier option. Any extension should also only specify other
31351 feature bits and never an architecture bit. The architecture is inferred
31352 from the declaration of the extension. */
31353 static void
31354 arm_test_cpu_arch_data (void)
31355 {
31356 const arch_option *arch;
31357 const cpu_option *cpu;
31358 auto_sbitmap target_isa (isa_num_bits);
31359 auto_sbitmap isa1 (isa_num_bits);
31360 auto_sbitmap isa2 (isa_num_bits);
31361
31362 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31363 {
31364 const cpu_arch_extension *ext1, *ext2;
31365
31366 if (arch->common.extensions == NULL)
31367 continue;
31368
31369 arm_initialize_isa (target_isa, arch->common.isa_bits);
31370
31371 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31372 {
31373 if (ext1->alias)
31374 continue;
31375
31376 arm_initialize_isa (isa1, ext1->isa_bits);
31377 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31378 {
31379 if (ext2->alias || ext1->remove != ext2->remove)
31380 continue;
31381
31382 arm_initialize_isa (isa2, ext2->isa_bits);
31383 /* If the option is a subset of the parent option, it doesn't
31384 add anything and so isn't useful. */
31385 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31386
31387 /* If the extension specifies any architectural bits then
31388 disallow it. Extensions should only specify feature bits. */
31389 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31390 }
31391 }
31392 }
31393
31394 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31395 {
31396 const cpu_arch_extension *ext1, *ext2;
31397
31398 if (cpu->common.extensions == NULL)
31399 continue;
31400
31401 arm_initialize_isa (target_isa, arch->common.isa_bits);
31402
31403 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31404 {
31405 if (ext1->alias)
31406 continue;
31407
31408 arm_initialize_isa (isa1, ext1->isa_bits);
31409 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31410 {
31411 if (ext2->alias || ext1->remove != ext2->remove)
31412 continue;
31413
31414 arm_initialize_isa (isa2, ext2->isa_bits);
31415 /* If the option is a subset of the parent option, it doesn't
31416 add anything and so isn't useful. */
31417 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31418
31419 /* If the extension specifies any architectural bits then
31420 disallow it. Extensions should only specify feature bits. */
31421 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31422 }
31423 }
31424 }
31425 }
31426
31427 /* Scan the static data tables generated by parsecpu.awk looking for
31428 potential issues with the data. Here we check for consistency between the
31429 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31430 a feature bit that is not defined by any FPU flag. */
31431 static void
31432 arm_test_fpu_data (void)
31433 {
31434 auto_sbitmap isa_all_fpubits (isa_num_bits);
31435 auto_sbitmap fpubits (isa_num_bits);
31436 auto_sbitmap tmpset (isa_num_bits);
31437
31438 static const enum isa_feature fpu_bitlist[]
31439 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31440 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31441
31442 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31443 {
31444 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31445 bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31446 bitmap_clear (isa_all_fpubits);
31447 bitmap_copy (isa_all_fpubits, tmpset);
31448 }
31449
31450 if (!bitmap_empty_p (isa_all_fpubits))
31451 {
31452 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31453 " group that are not defined by any FPU.\n"
31454 " Check your arm-cpus.in.\n");
31455 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31456 }
31457 }
31458
31459 static void
31460 arm_run_selftests (void)
31461 {
31462 arm_test_cpu_arch_data ();
31463 arm_test_fpu_data ();
31464 }
31465 } /* Namespace selftest. */
31466
31467 #undef TARGET_RUN_TARGET_SELFTESTS
31468 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31469 #endif /* CHECKING_P */
31470
31471 struct gcc_target targetm = TARGET_INITIALIZER;
31472
31473 #include "gt-arm.h"